Using ADF Collection reference we transfer this into json format . whilst this works for muliple records . It fails with 1 xml record.
The error is ErrorCode=UserErrorInvalidPathForCollectionReferenceNode,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Invalid json path '$['file-data']['files']['row']' for collection reference node
The sample file for 2 rows that works is below
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<file-data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<files>
<row record="01">
<FilePath>/MYDIRECTORY/MYTest/</FilePath>
<ValidatedBy>Joe Blogg</ValidatedBy>
</row>
<row record="02">
<FilePath>/MYDIRECTORY/MYTest01/</FilePath>
<ValidatedBy>Jose</ValidatedBy>
</row>
</files>
</file-data>
This does not work .
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<file-data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<files>
<row record="01">
<FilePath>/MYDIRECTORY/MYTest/</FilePath>
<ValidatedBy>Joe Blogg</ValidatedBy>
</row>
</files>
</file-data>
sample output required
{"FilePath":"/MYDIRECTORY/MYTest/","ValidatedBy":"Joe Blogg"}
This has been reported here too Azure Data Factory - Collection reference failing when only 1 record
any suggestions
When defining collection reference as $['file-data']['files']['row']
if there is only 1 row it treats as object not as array that may be the reason to get above error. To get your required output follow below procedure:
Go to mapping make Collection reference as empty, enable Advanced editor and Define columns as below:
Filepath --> $['file-data']['files']['row']['FilePath']
ValidatedBy --> $['file-data']['files']['row']['ValidatedBy']
The you will get the Json as shown below:
{"FilePath":"/MYDIRECTORY/MYTest/","ValidatedBy":"Joe Blogg"}
Use lookup activity to read the file. Count the rows in file using @length(activity('Lookup1').output.value[0]['file-data'].files.row)
After reading the file add if activity to the lookup activity with condition:
@greater(length(activity('Lookup1').output.value[0]['file-data'].files.row),1)
To check the row count is greater than 1 or not. If true add copy activity with required source with below mapping:
If false add copy activity with required source with below mapping:
Then it will work according to the row count. Here is the pipeline Json for your requirement:
{
"name": "pipeline4",
"properties": {
"activities": [
{
"name": "Lookup1",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "XmlSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "XmlReadSettings",
"validationMode": "none",
"namespaces": true
}
},
"dataset": {
"referenceName": "Xmlmulr",
"type": "DatasetReference"
},
"firstRowOnly": false
}
},
{
"name": "If Condition1",
"type": "IfCondition",
"dependsOn": [
{
"activity": "Lookup1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"expression": {
"value": "@greater(length(activity('Lookup1').output.value[0]['file-data'].files.row),1)",
"type": "Expression"
},
"ifFalseActivities": [
{
"name": "Copy data1_copy1",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "XmlSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "XmlReadSettings",
"validationMode": "none",
"namespaces": true
}
},
"sink": {
"type": "JsonSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "JsonWriteSettings"
}
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "$['file-data']['files']['row']['FilePath']"
},
"sink": {
"path": "FilePath"
}
},
{
"source": {
"path": "$['file-data']['files']['row']['ValidatedBy']"
},
"sink": {
"path": "ValidatedBy"
}
}
],
"collectionReference": ""
}
},
"inputs": [
{
"referenceName": "Xml1",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "Json1",
"type": "DatasetReference"
}
]
}
],
"ifTrueActivities": [
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "XmlSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "XmlReadSettings",
"validationMode": "none",
"namespaces": true
}
},
"sink": {
"type": "JsonSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "JsonWriteSettings"
}
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "['FilePath']"
},
"sink": {
"path": "FilePath"
}
},
{
"source": {
"path": "['ValidatedBy']"
},
"sink": {
"path": "ValidatedBy"
}
}
],
"collectionReference": "$['file-data']['files']['row']"
}
},
"inputs": [
{
"referenceName": "Xmlmulr",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "Json1",
"type": "DatasetReference"
}
]
}
]
}
}
],
"variables": {
"rowcount": {
"type": "Integer"
},
"row": {
"type": "String"
},
"r1": {
"type": "Array"
}
},
"annotations": []
}
}