Hello @Manoj Sharma ,
Welcome to the MS Q&A platform.
Here is a video tutorial explaining how to process files based on the last modified datetime.
One modification is:
on the get metadata2 activity, you need to filter the file name using the expression @startsWith(item().name, 'Delta')
to filter the files starting with 'Delta'
Pipeline Json:
{
"name": "Pipeline2",
"properties": {
"activities": [
{
"name": "get-files-metadata",
"type": "GetMetadata",
"dependsOn": [],
"policy": {
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataset": {
"referenceName": "ds_gen2_csv",
"type": "DatasetReference"
},
"fieldList": [
"childItems"
],
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
},
"linkedServiceName": {
"referenceName": "bhargava-synapse2-WorkspaceDefaultStorage",
"type": "LinkedServiceReference"
}
},
{
"name": "filter-delta-files",
"type": "ForEach",
"dependsOn": [
{
"activity": "get-files-metadata",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "@activity('get-files-metadata').output.childItems",
"type": "Expression"
},
"isSequential": true,
"activities": [
{
"name": "Get Metadata1",
"type": "GetMetadata",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"dataset": {
"referenceName": "gen2_getmetadata_ds2",
"type": "DatasetReference"
},
"fieldList": [
"lastModified",
"itemName"
],
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
}
},
{
"name": "If Condition1",
"type": "IfCondition",
"dependsOn": [
{
"activity": "Get Metadata1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"expression": {
"value": "@greater(activity('Get Metadata1').output.lastModified,variables('refdatetime'))",
"type": "Expression"
},
"ifTrueActivities": [
{
"name": "Set variable1",
"type": "SetVariable",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"variableName": "refdatetime",
"value": {
"value": "@activity('Get Metadata1').output.lastModified",
"type": "Expression"
}
}
},
{
"name": "Set variable2",
"type": "SetVariable",
"dependsOn": [
{
"activity": "Set variable1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "latestfile",
"value": {
"value": "@activity('Get Metadata1').output.itemName",
"type": "Expression"
}
}
}
]
}
}
]
},
"activities": [
{
"name": "extract-last-modified-datetime",
"type": "SetVariable",
"dependsOn": [
{
"activity": "filter-delta-files",
"dependencyConditions": [
"Succeeded"
]
}
],
"typeProperties": {
"variableName": "lastModifiedDatetime",
"value": {
"value": "@substring(item().name, 16, 14)",
"type": "Expression"
}
}
},
{
"name": "process-delta-file",
"type": "Copy",
"dependsOn": [
{
"activity": "extract-last-modified-datetime",
"dependencyConditions": [
"Succeeded"
]
}
],
"inputs": [
{
"referenceName": "ds_gen2_csv",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "ds_gen2_csv",
"type": "DatasetReference"
}
],
"typeProperties": {
"source": {
"type": "DelimitedText",
"storeSettings": {
"type": "AzureBlobStorageReadSettings",
"recursive": true,
"wildcardFileName": {
"value": "@concat('Delta_*_', variables('lastModifiedDatetime'), '.csv')",
"type": "Expression"
}
}
},
"sink": {
"type": "DelimitedText",
"storeSettings": {
"type": "AzureBlobStorageWriteSettings"
}
}
}
}
]
},
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [
{
"activity": "filter-delta-files",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "DelimitedTextSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "DelimitedTextReadSettings"
}
},
"sink": {
"type": "DelimitedTextSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "DelimitedTextWriteSettings",
"quoteAllText": true,
"fileExtension": ".txt"
}
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"typeConversion": true,
"typeConversionSettings": {
"allowDataTruncation": true,
"treatBooleanAsNumber": false
}
}
},
"inputs": [
{
"referenceName": "gen2_getmetadata_ds2",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "DelimitedText6",
"type": "DatasetReference",
"parameters": {
"filename": {
"value": "@variables('latestfile')",
"type": "Expression"
}
}
}
]
}
],
"variables": {
"latestfile": {
"type": "String"
},
"refdatetime": {
"type": "String",
"defaultValue": "1900-01-01 00:00:00"
}
},
"annotations": []
}
}
I hope this helps. Please let me know if you have any further questions.