Hi,
We have a pipeline which read data from "Azure Data Explorer". I want to send this data to event hub, for that I am using a foreach loop which will iterate the data I am getting from ADX and then using an web activity to send it to event hub, This is because I need to send the events to proper events using a mapping file. I am sharing the pipeline JSON with this post.
So my concern is if I am getting 700000 rows in a day then the for each activity will be called 700000 times and as we have cost $1 for each 1000 activity run, and the it will cost $700 every day, Is this estimation correct ?
Can you suggest any other ways maybe batching or similar to limit the cost ?
Pipeline Code:
{
"name": "lookup",
"properties": {
"activities": [
{
"name": "f_FanFailureRCA",
"type": "Lookup",
"dependsOn": [
{
"activity": "Read Evenhub Mapping",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureDataExplorerSource",
"query": {
"value": "f_FanFailureRCA(datetime(@{addMinutes(pipeline().parameters.StartDateTime, -05)}), datetime(@{pipeline().parameters.StartDateTime}))| extend SentByAsaTime = now(), RootCauseFailureReason = pack_dictionary(\"DeviceNames\", RootCauseFailureReason.DeviceNames,\"FanInfo\", RootCauseFailureReason.FanInfo,\"EventTimestamp\", RootCauseFailureReason.EventTimestamp,\"GS_RCA_TIMESTAMP\", RootCauseFailureReason.GS_RCA_TIMESTAMP,\"LastOutlierCounterTimeStamp\", now()) | project DeviceName, Region, RootCauseFailureReason, RootCause, SentByAsaTime",
"type": "Expression"
},
"queryTimeout": "00:10:00",
"noTruncation": true
},
"dataset": {
"referenceName": "DS_Azdhbackup",
"type": "DatasetReference",
"parameters": {
"KustoDatabaseName": {
"value": "@pipeline().parameters.KustoSourceDatabaseName",
"type": "Expression"
}
}
},
"firstRowOnly": false
}
},
{
"name": "ForEachErrors",
"type": "ForEach",
"dependsOn": [
{
"activity": "f_FanFailureRCA",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('f_FanFailureRCA').output.value",
"type": "Expression"
},
"activities": [
{
"name": "Send to Event Hub Map",
"type": "WebActivity",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"url": {
"value": "@concat('https://',activity('Read Evenhub Mapping').output.StreamingSources.EventHubSources[item().Region].Namespace, '.servicebus.windows.net/',activity('Read Evenhub Mapping').output.StreamingSources.EventHubSources[item().Region].EventhubName,'/messages')",
"type": "Expression"
},
"method": "POST",
"body": {
"value": "@item()",
"type": "Expression"
},
"authentication": {
"type": "MSI",
"resource": "https://eventhubs.azure.net"
}
}
}
]
}
},
{
"name": "Read Evenhub Mapping",
"type": "WebActivity",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"url": "https://abcd.blob.core.windows.net/datafactory-mappings/eventhub-adf.json",
"method": "GET",
"headers": {
"x-ms-version": "2022-11-02"
},
"authentication": {
"type": "MSI",
"resource": "https://storage.azure.com"
}
}
}
],
"parameters": {
"StartDateTime": {
"type": "string",
"defaultValue": "2023-05-05 07:46:40.1401677"
},
"KustoSourceDatabaseName": {
"type": "string",
"defaultValue": "azdhsd"
},
"EventHubMapping": {
"type": "object",
"defaultValue": {
"values": {
"EventHub": "https://test.servicebus.windows.net/test/messages"
}
}
}
},
"variables": {
"Mapping": {
"type": "String"
}
},
"folder": {
"name": "Testing"
},
"annotations": [],
"lastPublishTime": "2023-05-25T03:44:32Z"
},
"type": "Microsoft.DataFactory/factories/pipelines"
}