入门使用适用于 Iceberg 的 OneLake 表 API

OneLake 提供 REST API 终结点,用于与 Microsoft Fabric 中的表进行交互。 此终结点支持 Fabric 中 Apache Iceberg 表的只读元数据操作。 这些操作与 Iceberg REST 目录 API 开放标准(IRC)相兼容。

重要

此功能目前为预览版

先决条件

详细了解 OneLake 表 API for Iceberg ,并确保查看 先决条件信息

客户端快速入门示例

查看这些示例,了解如何设置现有的 Iceberg REST 目录(IRC)客户端或库,以便与新的 OneLake 表终结点一起使用。

PyIceberg

使用以下示例 Python 代码将 PyIceberg 配置为使用 OneLake 表 API 终结点。 然后,列出数据项中的架构和表。

此代码假定当前已登录的用户可以使用默认的 AzureCredential。 或者,可以使用 MSAL Python 库 来获取令牌。

from pyiceberg.catalog import load_catalog
from azure.identity import DefaultAzureCredential

# Iceberg base URL at the OneLake table API endpoint
table_api_url = "https://onelake.table.fabric.microsoft.com/iceberg"

# Entra ID token
credential = DefaultAzureCredential()
token = credential.get_token("https://storage.azure.com/.default").token

# Client configuration options
fabric_workspace_id = "12345678-abcd-4fbd-9e50-3937d8eb1915"
fabric_data_item_id = "98765432-dcba-4209-8ac2-0821c7f8bd91"
warehouse = f"{fabric_workspace_id}/{fabric_data_item_id}"
account_name = "onelake"
account_host = f"{account_name}.blob.fabric.microsoft.com"

# Configure the catalog object for a specific data item
catalog = load_catalog("onelake_catalog", **{
    "uri": table_api_url,
    "token": token,
    "warehouse": warehouse,
    "adls.account-name": account_name,
    "adls.account-host": account_host,
    "adls.credential": credential,
})

# List schemas and tables within a data item
schemas = catalog.list_namespaces()
print(schemas)
for schema in schemas:
    tables = catalog.list_tables(schema)
    print(tables)

Snowflake

使用以下示例代码在 Snowflake 中创建新的 目录链接数据库 。 此数据库将自动包含连接的 Fabric 数据项中找到的任何架构和表。 这涉及到创建 目录集成外部卷数据库

-- Create catalog integration object
CREATE OR REPLACE CATALOG INTEGRATION IRC_CATINT
    CATALOG_SOURCE = ICEBERG_REST
    TABLE_FORMAT = ICEBERG
    REST_CONFIG = (
        CATALOG_URI = 'https://onelake.table.fabric.microsoft.com/iceberg' -- Iceberg base URL at the OneLake table endpoint
        CATALOG_NAME = '12345678-abcd-4fbd-9e50-3937d8eb1915/98765432-dcba-4209-8ac2-0821c7f8bd91' -- Fabric data item scope, in the form `workspaceID/dataItemID`
    )
    REST_AUTHENTICATION = (
        TYPE = OAUTH -- Entra auth
        OAUTH_TOKEN_URI = 'https://login.microsoftonline.com/11122233-1122-4138-8485-a47dc5d60435/oauth2/v2.0/token' -- Entra tenant ID
        OAUTH_CLIENT_ID = '44332211-aabb-4d12-aef5-de09732c24b1' -- Entra application client ID
        OAUTH_CLIENT_SECRET = '[secret]' -- Entra application client secret value
        OAUTH_ALLOWED_SCOPES = ('https://storage.azure.com/.default') -- Storage token audience
    )
    ENABLED = TRUE
;

-- Create external volume object
CREATE OR REPLACE EXTERNAL VOLUME IRC_EXVOL
    STORAGE_LOCATIONS =
    (
        (
            NAME = 'IRC_EXVOL'
            STORAGE_PROVIDER = 'AZURE'
            STORAGE_BASE_URL = 'azure://onelake.dfs.fabric.microsoft.com/12345678-abcd-4fbd-9e50-3937d8eb1915/98765432-dcba-4209-8ac2-0821c7f8bd91'
            AZURE_TENANT_ID='11122233-1122-4138-8485-a47dc5d60435' -- Entra tenant id
        )
    )
    ALLOW_WRITES = FALSE;
;

-- Describe the external volume
DESC EXTERNAL VOLUME IRC_EXVOL;

DESC EXTERNAL VOLUME的响应将返回有关外部卷的元数据,包括:

  • AZURE_CONSENT_URL,这是你租户尚未完成的权限请求页面,如果尚未完成,您需要按照页面上的步骤进行操作。
  • AZURE_MULTI_TENANT_APP_NAME,它是需要访问数据项的 Snowflake 客户端应用程序的名称。 确保授予它对数据项的访问权限,以便 Snowflake 能够读取表内容。
-- Create a Snowflake catalog linked database
CREATE OR REPLACE DATABASE IRC_CATALOG_LINKED
    LINKED_CATALOG = (
        CATALOG = 'IRC_CATINT'
    )
    EXTERNAL_VOLUME = 'IRC_EXVOL'
;

SELECT SYSTEM$CATALOG_LINK_STATUS('IRC_CATALOG_LINKED');

SELECT * FROM IRC_CATALOG_LINKED."dbo"."sentiment";

DuckDB

使用以下示例 Python 代码将 DuckDB 配置为列出数据项中的架构和表。

此代码假定当前已登录的用户有一个默认 AzureCredential 可用。 或者,可以使用 MSAL Python 库 来获取令牌。

import duckdb
from azure.identity import DefaultAzureCredential

# Iceberg API base URL at the OneLake table API endpoint
table_api_url = "https://onelake.table.fabric.microsoft.com/iceberg"

# Entra ID token
credential = DefaultAzureCredential()
token = credential.get_token("https://storage.azure.com/.default").token

# Client configuration options
fabric_workspace_id = "12345678-abcd-4fbd-9e50-3937d8eb1915"
fabric_data_item_id = "98765432-dcba-4209-8ac2-0821c7f8bd91"
warehouse = f"{fabric_workspace_id}/{fabric_data_item_id}"

# Connect to DuckDB
con = duckdb.connect()

# Install & load extensions
con.execute("INSTALL iceberg; LOAD iceberg;")
con.execute("INSTALL azure; LOAD azure;")
con.execute("INSTALL httpfs; LOAD httpfs;")

# --- Auth & Catalog ---
# 1) Secret for the Iceberg REST Catalog (use existing bearer token)
con.execute("""
CREATE OR REPLACE SECRET onelake_catalog (
TYPE ICEBERG,
TOKEN ?
);
""", [token])

# 2) Secret for ADLS Gen2 / OneLake filesystem access via Azure extension
#    (access token audience must be https://storage.azure.com; account name is 'onelake')
con.execute("""
CREATE OR REPLACE SECRET onelake_storage (
TYPE AZURE,
PROVIDER ACCESS_TOKEN,
ACCESS_TOKEN ?,
ACCOUNT_NAME 'onelake'
);
""", [token])

# 3) Attach the Iceberg REST catalog
con.execute(f"""
ATTACH '{warehouse}' AS onelake (
TYPE ICEBERG,
SECRET onelake_catalog,
ENDPOINT '{table_api_url}'
);
""")

# --- Explore & Query ---
display(con.execute("SHOW ALL TABLES").fetchdf())

示例请求和响应

这些示例请求和响应说明了 OneLake 表 API 终结点当前支持的 Iceberg REST Catalog (IRC) 操作的使用。 有关 IRC 的详细信息,请参阅 开放标准规范

对于以下每个操作:

  • <BaseUrl>https://onelake.table.fabric.microsoft.com/iceberg
  • <Warehouse><Workspace>/<DataItem>,可以是:
    • <WorkspaceID>/<DataItemID>,例如 12345678-abcd-4fbd-9e50-3937d8eb1915/98765432-dcba-4209-8ac2-0821c7f8bd91
    • <WorkspaceName>/<DataItemName>.<DataItemType>,例如 MyWorkspace/MyItem.Lakehouse,只要两个名称不包含特殊字符。
  • <Prefix> 由获取配置的调用返回,其值通常与 <Warehouse> 相同。
  • <Token> 是 Entra ID 在成功身份验证时返回的访问令牌值。

获取配置

列出 Iceberg 目录配置设置。

  • 请求

    GET <BaseUrl>/v1/config?warehouse=<Warehouse>
    Authorization: Bearer <Token>
    
  • 响应

    200 OK
    {
        "defaults": {},
        "endpoints": [
            "GET /v1/{prefix}/namespaces",
            "GET /v1/{prefix}/namespaces/{namespace}",
            "HEAD /v1/{prefix}/namespaces/{namespace}",
            "GET /v1/{prefix}/namespaces/{namespace}/tables",
            "GET /v1/{prefix}/namespaces/{namespace}/tables/{table}",
            "HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}"
        ],
        "overrides": {
            "prefix": "<Prefix>"
        }
    }
    

    列出模式

    列出 Fabric 数据项中的架构。

    • 请求

      GET <BaseUrl>/v1/<Prefix>/namespaces
      Authorization: Bearer <Token>
      
    • 响应

      200 OK
      {
          "namespaces": [
              [
                  "dbo"
              ]
          ],
          "next-page-token": null
      }
      

    获取架构

    获取给定架构的架构详细信息。

    • 请求

      GET <BaseUrl>/v1/<Prefix>/namespaces/<SchemaName>
      Authorization: Bearer <Token>
      
    • 响应

      200 OK
      {
          "namespace": [
              "dbo"
          ],
          "properties": {
              "location": "d892007b-3216-424a-a339-f3dca61335aa/40ef140a-8542-4f4c-baf2-0f8127fd59c8/Tables/dbo"
          }
      }
      

    列出表格

    列出给定架构中的表。

    • 请求

      GET <BaseUrl>/v1/<Prefix>/namespaces/<SchemaName>/tables
      Authorization: Bearer <Token>
      
    • 响应

      200 OK
      {
          "identifiers": [
              {
                  "namespace": [
                      "dbo"
                  ],
                  "name": "DIM_TestTime"
              },
              {
                  "namespace": [
                      "dbo"
                  ],
                  "name": "DIM_TestTable"
              }
          ],
          "next-page-token": null
      }
      

    获取表

    获取指定表的详细信息。

    • 请求

      GET <BaseUrl>/v1/<Prefix>/namespaces/<SchemaName>/tables/<TableName>
      Authorization: Bearer <Token>
      
    • 响应

      200 OK
      {
          "metadata-location": "abfss://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime/metadata/v3.metadata.json",
          "metadata": {
              "format-version": 2,
              "table-uuid": "...",
              "location": "abfss://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime",
              "last-sequence-number": 2,
              "last-updated-ms": ...,
              "last-column-id": 4,
              "current-schema-id": 0,
              "schemas": [
                  {
                      "type": "struct",
                      "schema-id": 0,
                      "fields": [
                          {
                              "id": 1,
                              "name": "id",
                              "required": false,
                              "type": "int"
                          },
                          {
                              "id": 2,
                              "name": "name",
                              "required": false,
                              "type": "string"
                          },
                          {
                              "id": 3,
                              "name": "age",
                              "required": false,
                              "type": "int"
                          },
                          {
                              "id": 4,
                              "name": "i",
                              "required": false,
                              "type": "boolean"
                          }
                      ]
                  }
              ],
              "default-spec-id": 0,
              "partition-specs": [
                  {
                      "spec-id": 0,
                      "fields": []
                  }
              ],
              "last-partition-id": 999,
              "default-sort-order-id": 0,
              "sort-orders": [
                  {
                      "order-id": 0,
                      "fields": []
                  }
              ],
              "properties": {
                  "schema.name-mapping.default": "[ {\n  \"field-id\" : 1,\n  \"names\" : [ \"id\" ]\n}, {\n  \"field-id\" : 2,\n  \"names\" : [ \"name\" ]\n}, {\n  \"field-id\" : 3,\n  \"names\" : [ \"age\" ]\n}, {\n  \"field-id\" : 4,\n  \"names\" : [ \"i\" ]\n} ]",
                  "write.metadata.delete-after-commit.enabled": "true",
                  "write.data.path": "abfs://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime",
                  "XTABLE_METADATA": "{\"lastInstantSynced\":\"...\",\"instantsToConsiderForNextSync\":[],\"version\":0,\"sourceTableFormat\":\"DELTA\",\"sourceIdentifier\":\"3\"}",
                  "write.parquet.compression-codec": "zstd"
              },
              "current-snapshot-id": ...,
              "refs": {
                  "main": {
                      "snapshot-id": ...,
                      "type": "branch"
                  }
              },
              "snapshots": [
                  {
                      "sequence-number": 2,
                      "snapshot-id": ...,
                      "parent-snapshot-id": ...,
                      "timestamp-ms": ...,
                      "summary": {
                          "operation": "overwrite",
                          "XTABLE_METADATA": "{\"lastInstantSynced\":\"...\",\"instantsToConsiderForNextSync\":[],\"version\":0,\"sourceTableFormat\":\"DELTA\",\"sourceIdentifier\":\"3\"}",
                          "added-data-files": "1",
                          "deleted-data-files": "1",
                          "added-records": "1",
                          "deleted-records": "1",
                          "added-files-size": "2073",
                          "removed-files-size": "2046",
                          "changed-partition-count": "1",
                          "total-records": "6",
                          "total-files-size": "4187",
                          "total-data-files": "2",
                          "total-delete-files": "0",
                          "total-position-deletes": "0",
                          "total-equality-deletes": "0"
                      },
                      "manifest-list": "abfss://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime/metadata/snap-....avro",
                      "schema-id": 0
                  }
              ],
              "statistics": [],
              "snapshot-log": [
                  {
                      "timestamp-ms": ...,
                      "snapshot-id": ...
                  }
              ],
              "metadata-log": [
                  {
                      "timestamp-ms": ...,
                      "metadata-file": "abfss://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime/metadata/v1.metadata.json"
                  },
                  {
                      "timestamp-ms": ...,
                      "metadata-file": "abfss://...@onelake.dfs.fabric.microsoft.com/.../Tables/DIM_TestTime/metadata/v2.metadata.json"
                  }
              ]
          }
      }