Here's a sample Python code snippet to create a dataset and read its schema:
from azure.identity import DefaultAzureCredential
from azure.mgmt.datafactory import DataFactoryManagementClient
from azure.mgmt.datafactory.models import DatasetResource
# Replace with your Azure subscription ID and resource group name
subscription_id = "YOUR_SUBSCRIPTION_ID"
resource_group_name = "YOUR_RESOURCE_GROUP_NAME"
data_factory_name = "YOUR_DATA_FACTORY_NAME"
# Create the Data Factory Management Client
credential = DefaultAzureCredential()
adf_client = DataFactoryManagementClient(credential, subscription_id)
# Define dataset properties
dataset_name = "your_dataset_name"
connection_string = "YOUR_AZURE_SQL_CONNECTION_STRING"
table_name = "YOUR_AZURE_SQL_TABLE_NAME"
dataset = DatasetResource(properties={
"type": "AzureSqlTable",
"typeProperties": {
"connectionString": connection_string,
"tableName": table_name
}
})
# Create the dataset in the Data Factory
adf_client.datasets.create_or_update(resource_group_name, data_factory_name, dataset_name, dataset)
# Fetch the dataset schema
dataset = adf_client.datasets.get(resource_group_name, data_factory_name, dataset_name)
schema = dataset.properties["typeProperties"]["schema"]
# Now you can use the 'schema' dictionary to get information about column names, data types, etc.
print(schema)