I have an azure append blob(sharing.json) which is of content-type: application/json. I am trying to ingest this into a kusto database with an azure data factory(ADF) but the ingestion is always failing. I get the following error on the output of ADF:
"errors": [
{
"Code": 23302,
"Message": "ErrorCode=KustoWriteFailed,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Write to Kusto failed with following error: 'An error occurred for source: 'DataReader'. Error: '''.,Source=Microsoft.DataTransfer.Runtime.KustoConnector,''Type=Kusto.Ingest.Exceptions.IngestClientException,Message=An error occurred for source: 'DataReader'. Error: '',Source=Kusto.Ingest,'",
"EventType": 0,
"Category": 5,
"Data": {},
"MsgId": null,
"ExceptionType": null,
"Source": null,
"StackTrace": null,
"InnerEventInfos": []
}
]
Tried taking help from chatGPT and other online resource but no luck so far.
This is my ADF activity config:
{
"name": "CopyPipeline_k0h",
"properties": {
"activities": [
{
"name": "Copy_k0h",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 3,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [
{
"name": "Source",
"value": "sil-xms-load-max-data//sharing.json"
},
{
"name": "Destination",
"value": "AggregatedSharingTest_v1"
}
],
"typeProperties": {
"source": {
"type": "JsonSource",
"storeSettings": {
"type": "AzureBlobStorageReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "JsonReadSettings"
}
},
"sink": {
"type": "AzureDataExplorerSink",
"ingestionMappingName": "",
"additionalProperties": {
"tags": "drop-by:loadtest",
"format": "multijson"
}
},
"enableStaging": false,
"validateDataConsistency": false,
"logSettings": {
"enableCopyActivityLog": true,
"copyActivityLogSettings": {
"logLevel": "Info",
"enableReliableLogging": true
},
"logLocationSettings": {
"linkedServiceName": {
"referenceName": "LoadTestBlob",
"type": "LinkedServiceReference"
},
"path": "debug-logs"
}
},
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "$['deviceId']"
},
"sink": {
"name": "deviceId",
"type": "String"
}
},
{
"source": {
"path": "$['tenant']"
},
"sink": {
"name": "tenant",
"type": "String"
}
},
{
"source": {
"path": "$['tagsSerialNo']"
},
"sink": {
"name": "tagsSerialNo",
"type": "String"
}
},
{
"source": {
"path": "$['metricSum']"
},
"sink": {
"name": "metricSum",
"type": "Int64"
}
},
{
"source": {
"path": "$['metricCount']"
},
"sink": {
"name": "metricCount",
"type": "Int64"
}
},
{
"source": {
"path": "$['notMetricCount']"
},
"sink": {
"name": "notMetricCount",
"type": "Int64"
}
},
{
"source": {
"path": "$['timestamp']"
},
"sink": {
"name": "timestamp",
"type": "DateTime"
}
}
],
"collectionReference": ""
}
},
"inputs": [
{
"referenceName": "SourceDataset_k0h",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "DestinationDataset_k0h",
"type": "DatasetReference"
}
]
}
],
"annotations": [],
"lastPublishTime": "2023-04-18T11:30:35Z"
},
"type": "Microsoft.DataFactory/factories/pipelines"
}
This is the destination dataset config on ADF:
{
"name": "DestinationDataset_k0h",
"properties": {
"linkedServiceName": {
"referenceName": "LoadTestDump",
"type": "LinkedServiceReference"
},
"annotations": [],
"type": "AzureDataExplorerTable",
"schema": [
{
"name": "deviceId",
"type": "string"
},
{
"name": "tenant",
"type": "string"
},
{
"name": "tagsSerialNo",
"type": "string"
},
{
"name": "metricSum",
"type": "long"
},
{
"name": "metricCount",
"type": "long"
},
{
"name": "notMetricCount",
"type": "long"
},
{
"name": "timestamp",
"type": "datetime"
}
],
"typeProperties": {
"table": "AggregatedSharingTest_v1"
}
},
"type": "Microsoft.DataFactory/factories/datasets"
}
This is the Azure blob storage config on ADF:
{
"name": "SourceDataset_k0h",
"properties": {
"linkedServiceName": {
"referenceName": "LoadTestBlob",
"type": "LinkedServiceReference"
},
"annotations": [],
"type": "Json",
"typeProperties": {
"location": {
"type": "AzureBlobStorageLocation",
"fileName": "sharing.json",
"container": "sil-xms-load-max-data"
}
},
"schema": {
"type": "object",
"properties": {
"deviceId": {
"type": "string"
},
"tenant": {
"type": "string"
},
"tagsSerialNo": {
"type": "string"
},
"metricSum": {
"type": "integer"
},
"metricCount": {
"type": "integer"
},
"notMetricCount": {
"type": "integer"
},
"timestamp": {
"type": "string"
}
}
}
},
"type": "Microsoft.DataFactory/factories/datasets"
}
I have tested both the source and destination connections on azure portal and they look good. Not sure what exactly is going wrong since the pipeline runs and run details shows data read and data written but the data is never available on Kusto table for querying and eventually fails with above error
I tried with your input JSON from storage account and your pipeline JSON and ended up with same error.
In your case, the reason for this error is additionalProperties
in the copy activity sink.
When I removed the additionalProperties
, I am able to copy the data successfully.
I have 4 rows data in kustos table and you can see two rows inserted from the source using copy activity after removing additonal properties
.
Data in target table:
This is my Pipeline JSON for your reference:
{
"name": "pipeline2",
"properties": {
"activities": [
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [
{
"name": "Source",
"value": "data//myjson.json"
},
{
"name": "Destination",
"value": "table1"
}
],
"typeProperties": {
"source": {
"type": "JsonSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "JsonReadSettings"
}
},
"sink": {
"type": "AzureDataExplorerSink",
"ingestionMappingName": ""
},
"enableStaging": false,
"logSettings": {
"enableCopyActivityLog": true,
"copyActivityLogSettings": {
"logLevel": "Info",
"enableReliableLogging": true
},
"logLocationSettings": {
"linkedServiceName": {
"referenceName": "AzureDataLakeStorage2",
"type": "LinkedServiceReference"
},
"path": "data/debug-logs"
}
},
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "$['deviceId']"
},
"sink": {
"name": "deviceId",
"type": "String"
}
},
{
"source": {
"path": "$['tenant']"
},
"sink": {
"name": "tenant",
"type": "Guid"
}
},
{
"source": {
"path": "$['tagsSerialNo']"
},
"sink": {
"name": "tagsSerialNo",
"type": "String"
}
},
{
"source": {
"path": "$['metricSum']"
},
"sink": {
"name": "metricSum",
"type": "Int64"
}
},
{
"source": {
"path": "$['metricCount']"
},
"sink": {
"name": "metricCount",
"type": "Int64"
}
},
{
"source": {
"path": "$['notMetricCount']"
},
"sink": {
"name": "notMetricCount",
"type": "Int64"
}
},
{
"source": {
"path": "$['timestamp']"
},
"sink": {
"name": "timestamp",
"type": "DateTime"
}
}
],
"collectionReference": ""
}
},
"inputs": [
{
"referenceName": "Json1",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "AzureDataExplorerTable1",
"type": "DatasetReference"
}
]
}
],
"annotations": []
}
}