Skip to content

Commit

Permalink
Updating pipeline: dataset_processing
Browse files Browse the repository at this point in the history
Adding dataset: workspace
Updating dataset: preprocessed_dataset_sink
  • Loading branch information
mahdikhashan committed Dec 29, 2024
1 parent a46ca8d commit ea2423a
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 1 deletion.
3 changes: 3 additions & 0 deletions dataset/preprocessed_dataset_sink.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"location": {
"type": "AzureBlobStorageLocation",
"container": "sink"
},
"compression": {
"type": "TarGZip"
}
}
}
Expand Down
17 changes: 17 additions & 0 deletions dataset/workspace.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "workspace",
"properties": {
"linkedServiceName": {
"referenceName": "Dataset",
"type": "LinkedServiceReference"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "AzureBlobStorageLocation",
"container": "workspace"
}
}
}
}
121 changes: 120 additions & 1 deletion pipeline/dataset_processing.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobStorageWriteSettings"
"type": "AzureBlobStorageWriteSettings",
"copyBehavior": "FlattenHierarchy"
}
},
"enableStaging": false
Expand All @@ -40,6 +41,119 @@
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "workspace",
"type": "DatasetReference"
}
]
},
{
"name": "preprocess_dataset",
"type": "AzureFunctionActivity",
"dependsOn": [
{
"activity": "unzip",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": []
},
{
"name": "unzip",
"type": "AzureFunctionActivity",
"dependsOn": [
{
"activity": "duplicate_data",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": []
},
{
"name": "compress",
"type": "AzureFunctionActivity",
"dependsOn": [
{
"activity": "preprocess_dataset",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": []
},
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [
{
"activity": "compress",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "BinarySource",
"storeSettings": {
"type": "AzureBlobStorageReadSettings",
"recursive": true,
"deleteFilesAfterCompletion": false
},
"formatSettings": {
"type": "BinaryReadSettings"
}
},
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobStorageWriteSettings",
"copyBehavior": "FlattenHierarchy"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "workspace",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "preprocessed_dataset_sink",
Expand All @@ -48,6 +162,11 @@
]
}
],
"variables": {
"file": {
"type": "Array"
}
},
"annotations": []
}
}

0 comments on commit ea2423a

Please sign in to comment.