How to create the Data set, pipeline and run it, for excel format using C sharp for Data factory

Ashish Sharma 0 Reputation points
2024-03-08T08:18:17.3666667+00:00

I want to create and run my pipeline from the repo using C#.

my code is

        string postgreSQLLinkedServiceName = "PostgreSQLLinkedService";

        LinkedServiceResource postgreSqlLinkedService = new LinkedServiceResource(

            new AzurePostgreSqlLinkedService

            {

                ConnectionString = new SecureString(postgreSqlConnString)

            }

        );

        client.LinkedServices.CreateOrUpdate(resourceGroup, dataFactoryName, postgreSQLLinkedServiceName, postgreSqlLinkedService);

        // Create a dataset for source Azure Blob

        var listOfDataSetElements = GenerateListOfDataElements(dataSourceTableWithColumns);

        string blobDatasetName = "AzureBlobDataSetexcel";

        ExcelSource blobDataset = new ExcelSource(

                                                            new ExcelDataset

                                                            {

                                                                LinkedServiceName = new LinkedServiceReference

                                                                {

                                                                    ReferenceName = storageLinkedServiceName

                                                                },

                                                                Folder = $"tagwaye-files/{model.CompanyId}/{model.ProjectId}/DataSources",

                                                                FirstRowAsHeader = model.IsFirstHeaderAsColumn,

                                                                SheetName = model.SheetName,

                                                                Structure = listOfDataSetElements

                                                            }

                                                        );
Azure Data Factory
Azure Data Factory
An Azure service for ingesting, preparing, and transforming data at scale.
9,545 questions
C#
C#
An object-oriented and type-safe programming language that has its roots in the C family of languages and includes support for component-oriented programming.
10,249 questions
0 comments No comments
{count} votes

2 answers

Sort by: Most helpful
  1. Amira Bedhiafi 15,216 Reputation points
    2024-03-10T15:19:53.9066667+00:00

    Depending on where you want to store the processed data, you need to create a sink dataset.

    Define the activities that will be part of your pipeline. This could involve copying data, transforming data... If you're just moving data from Excel to another source, you might use the CopyActivity.

    Create the Pipeline: With datasets and activities defined, create a pipeline that uses these components and then trigger the pipeline run either on-demand or on a schedule.

    
    using Microsoft.Azure.Management.DataFactory;
    
    using Microsoft.Azure.Management.DataFactory.Models;
    
    using System.Security;
    
    
    
    string storageLinkedServiceName = "<YourStorageLinkedServiceName>";
    
    string postgreSQLLinkedServiceName = "PostgreSQLLinkedService";
    
    
    
    LinkedServiceResource postgreSqlLinkedService = new LinkedServiceResource(
    
        new AzurePostgreSqlLinkedService
    
        {
    
            ConnectionString = new SecureString("<YourConnectionString>")
    
        }
    
    );
    
    client.LinkedServices.CreateOrUpdate(resourceGroup, dataFactoryName, postgreSQLLinkedServiceName, postgreSqlLinkedService);
    
    // Create Excel Dataset for Source Azure Blob
    
    var listOfDataSetElements = GenerateListOfDataElements("<YourDataSourceTableWithColumns>");
    
    string blobDatasetName = "AzureBlobDataSetexcel";
    
    DatasetResource excelDataset = new DatasetResource(
    
        new ExcelDataset
    
        {
    
            LinkedServiceName = new LinkedServiceReference { ReferenceName = storageLinkedServiceName },
    
            FolderPath = new Expression { Value = @"<YourFolderPath>", Type = ExpressionType.String },
    
            Parameters = { { "SheetName", new ParameterSpecification { Type = ParameterType.String } } },
    
            Schema = listOfDataSetElements
    
        }
    
    );
    
    client.Datasets.CreateOrUpdate(resourceGroup, dataFactoryName, blobDatasetName, excelDataset);
    
    
    
    string sinkDatasetName = "DatabaseTableDataset";
    
    DatasetResource dbTableDataset = new DatasetResource(
    
        new AzureSqlTableDataset
    
        {
    
            LinkedServiceName = new LinkedServiceReference { ReferenceName = postgreSQLLinkedServiceName },
    
            TableName = "<YourTableName>"
    
        }
    
    );
    
    client.Datasets.CreateOrUpdate(resourceGroup, dataFactoryName, sinkDatasetName, dbTableDataset);
    
    // Define and Create a Copy Activity in a Pipeline
    
    string pipelineName = "ExcelToDbPipeline";
    
    var copyActivity = new CopyActivity
    
    {
    
        Name = "CopyDataFromExcelToDb",
    
        Inputs = new List<DatasetReference> { new DatasetReference { ReferenceName = blobDatasetName } },
    
        Outputs = new List<DatasetReference> { new DatasetReference { ReferenceName = sinkDatasetName } },
    
        Source = new ExcelSource { },
    
        Sink = new SqlSink { }
    
    };
    
    PipelineResource pipeline = new PipelineResource
    
    {
    
        Activities = new List<Activity> { copyActivity }
    
    };
    
    client.Pipelines.CreateOrUpdate(resourceGroup, dataFactoryName, pipelineName, pipeline);
    
    // Run the Pipeline
    
    CreateRunResponse runResponse = client.Pipelines.CreateRunWithHttpMessagesAsync(resourceGroup, dataFactoryName, pipelineName).Result.Body;
    
    string runId = runResponse.RunId;
    
    
    0 comments No comments

  2. ShaikMaheer-MSFT 37,896 Reputation points Microsoft Employee
    2024-03-28T10:06:28.0733333+00:00

    Hi Ashish Sharma,

    Thank you for posting query in Microsoft Q&A Platform.

    Below is the sample code, which may helps you to create and run pipeline in ADF.

    using Microsoft.Azure.Management.DataFactory;
    using Microsoft.Azure.Management.DataFactory.Models;
    using Microsoft.IdentityModel.Clients.ActiveDirectory;
    using Microsoft.Rest;
    using System;
    using System.Collections.Generic;
    using System.Threading.Tasks;
    
    namespace AdfPipeline
    {
        class Program
        {
            static void Main(string[] args)
            {
                // Set the Azure Data Factory properties
                string tenantId = "<your-tenant-id>";
                string clientId = "<your-client-id>";
                string clientSecret = "<your-client-secret>";
                string subscriptionId = "<your-subscription-id>";
                string resourceGroupName = "<your-resource-group-name>";
                string dataFactoryName = "<your-data-factory-name>";
                string pipelineName = "<your-pipeline-name>";
    
                // Authenticate with Azure Active Directory
                var context = new AuthenticationContext($"https://login.microsoftonline.com/{tenantId}");
                var credential = new ClientCredential(clientId, clientSecret);
                var result = context.AcquireTokenAsync("https://management.azure.com/", credential).Result;
                if (result == null)
                {
                    throw new InvalidOperationException("Failed to obtain the JWT token");
                }
    
                // Create the Data Factory management client
                var tokenCredentials = new TokenCredentials(result.AccessToken);
                var dataFactoryManagementClient = new DataFactoryManagementClient(tokenCredentials)
                {
                    SubscriptionId = subscriptionId
                };
    
                // Create the pipeline
                var pipeline = new PipelineResource
                {
                    Activities = new List<Activity>
                    {
                        new CopyActivity
                        {
                            Name = "CopyData",
                            Inputs = new List<DatasetReference>
                            {
                                new DatasetReference
                                {
                                    ReferenceName = "<your-input-dataset-name>"
                                }
                            },
                            Outputs = new List<DatasetReference>
                            {
                                new DatasetReference
                                {
                                    ReferenceName = "<your-output-dataset-name>"
                                }
                            },
                            Source = new BlobSource
                            {
                                // Set the source properties
                            },
                            Sink = new BlobSink
                            {
                                // Set the sink properties
                            },
                            Translator = new TabularTranslator
                            {
                                // Set the translator properties
                            }
                        }
                    }
                };
    
                // Create the pipeline in the Data Factory
                dataFactoryManagementClient.Pipelines.CreateOrUpdate(resourceGroupName, dataFactoryName, pipelineName, pipeline);
    
                // Run