Depending on where you want to store the processed data, you need to create a sink dataset.
Define the activities that will be part of your pipeline. This could involve copying data, transforming data... If you're just moving data from Excel to another source, you might use the CopyActivity
.
Create the Pipeline: With datasets and activities defined, create a pipeline that uses these components and then trigger the pipeline run either on-demand or on a schedule.
using Microsoft.Azure.Management.DataFactory;
using Microsoft.Azure.Management.DataFactory.Models;
using System.Security;
string storageLinkedServiceName = "<YourStorageLinkedServiceName>";
string postgreSQLLinkedServiceName = "PostgreSQLLinkedService";
LinkedServiceResource postgreSqlLinkedService = new LinkedServiceResource(
new AzurePostgreSqlLinkedService
{
ConnectionString = new SecureString("<YourConnectionString>")
}
);
client.LinkedServices.CreateOrUpdate(resourceGroup, dataFactoryName, postgreSQLLinkedServiceName, postgreSqlLinkedService);
// Create Excel Dataset for Source Azure Blob
var listOfDataSetElements = GenerateListOfDataElements("<YourDataSourceTableWithColumns>");
string blobDatasetName = "AzureBlobDataSetexcel";
DatasetResource excelDataset = new DatasetResource(
new ExcelDataset
{
LinkedServiceName = new LinkedServiceReference { ReferenceName = storageLinkedServiceName },
FolderPath = new Expression { Value = @"<YourFolderPath>", Type = ExpressionType.String },
Parameters = { { "SheetName", new ParameterSpecification { Type = ParameterType.String } } },
Schema = listOfDataSetElements
}
);
client.Datasets.CreateOrUpdate(resourceGroup, dataFactoryName, blobDatasetName, excelDataset);
string sinkDatasetName = "DatabaseTableDataset";
DatasetResource dbTableDataset = new DatasetResource(
new AzureSqlTableDataset
{
LinkedServiceName = new LinkedServiceReference { ReferenceName = postgreSQLLinkedServiceName },
TableName = "<YourTableName>"
}
);
client.Datasets.CreateOrUpdate(resourceGroup, dataFactoryName, sinkDatasetName, dbTableDataset);
// Define and Create a Copy Activity in a Pipeline
string pipelineName = "ExcelToDbPipeline";
var copyActivity = new CopyActivity
{
Name = "CopyDataFromExcelToDb",
Inputs = new List<DatasetReference> { new DatasetReference { ReferenceName = blobDatasetName } },
Outputs = new List<DatasetReference> { new DatasetReference { ReferenceName = sinkDatasetName } },
Source = new ExcelSource { },
Sink = new SqlSink { }
};
PipelineResource pipeline = new PipelineResource
{
Activities = new List<Activity> { copyActivity }
};
client.Pipelines.CreateOrUpdate(resourceGroup, dataFactoryName, pipelineName, pipeline);
// Run the Pipeline
CreateRunResponse runResponse = client.Pipelines.CreateRunWithHttpMessagesAsync(resourceGroup, dataFactoryName, pipelineName).Result.Body;
string runId = runResponse.RunId;