Search code examples
azureazure-blob-storageazure-data-factoryazure-data-explorerazureportal

Error while ingesting data from azure append blob to kusto database using an azure data factory


I have an azure append blob(sharing.json) which is of content-type: application/json. I am trying to ingest this into a kusto database with an azure data factory(ADF) but the ingestion is always failing. I get the following error on the output of ADF:

"errors": [
        {
            "Code": 23302,
            "Message": "ErrorCode=KustoWriteFailed,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Write to Kusto failed with following error: 'An error occurred for source: 'DataReader'. Error: '''.,Source=Microsoft.DataTransfer.Runtime.KustoConnector,''Type=Kusto.Ingest.Exceptions.IngestClientException,Message=An error occurred for source: 'DataReader'. Error: '',Source=Kusto.Ingest,'",
            "EventType": 0,
            "Category": 5,
            "Data": {},
            "MsgId": null,
            "ExceptionType": null,
            "Source": null,
            "StackTrace": null,
            "InnerEventInfos": []
        }
    ]

Tried taking help from chatGPT and other online resource but no luck so far.

This is my ADF activity config:

{
    "name": "CopyPipeline_k0h",
    "properties": {
        "activities": [
            {
                "name": "Copy_k0h",
                "type": "Copy",
                "dependsOn": [],
                "policy": {
                    "timeout": "0.12:00:00",
                    "retry": 3,
                    "retryIntervalInSeconds": 30,
                    "secureOutput": false,
                    "secureInput": false
                },
                "userProperties": [
                    {
                        "name": "Source",
                        "value": "sil-xms-load-max-data//sharing.json"
                    },
                    {
                        "name": "Destination",
                        "value": "AggregatedSharingTest_v1"
                    }
                ],
                "typeProperties": {
                    "source": {
                        "type": "JsonSource",
                        "storeSettings": {
                            "type": "AzureBlobStorageReadSettings",
                            "recursive": true,
                            "enablePartitionDiscovery": false
                        },
                        "formatSettings": {
                            "type": "JsonReadSettings"
                        }
                    },
                    "sink": {
                        "type": "AzureDataExplorerSink",
                        "ingestionMappingName": "",
                        "additionalProperties": {
                            "tags": "drop-by:loadtest",
                            "format": "multijson"
                        }
                    },
                    "enableStaging": false,
                    "validateDataConsistency": false,
                    "logSettings": {
                        "enableCopyActivityLog": true,
                        "copyActivityLogSettings": {
                            "logLevel": "Info",
                            "enableReliableLogging": true
                        },
                        "logLocationSettings": {
                            "linkedServiceName": {
                                "referenceName": "LoadTestBlob",
                                "type": "LinkedServiceReference"
                            },
                            "path": "debug-logs"
                        }
                    },
                    "translator": {
                        "type": "TabularTranslator",
                        "mappings": [
                            {
                                "source": {
                                    "path": "$['deviceId']"
                                },
                                "sink": {
                                    "name": "deviceId",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tenant']"
                                },
                                "sink": {
                                    "name": "tenant",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['tagsSerialNo']"
                                },
                                "sink": {
                                    "name": "tagsSerialNo",
                                    "type": "String"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricSum']"
                                },
                                "sink": {
                                    "name": "metricSum",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['metricCount']"
                                },
                                "sink": {
                                    "name": "metricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['notMetricCount']"
                                },
                                "sink": {
                                    "name": "notMetricCount",
                                    "type": "Int64"
                                }
                            },
                            {
                                "source": {
                                    "path": "$['timestamp']"
                                },
                                "sink": {
                                    "name": "timestamp",
                                    "type": "DateTime"
                                }
                            }
                        ],
                        "collectionReference": ""
                    }
                },
                "inputs": [
                    {
                        "referenceName": "SourceDataset_k0h",
                        "type": "DatasetReference"
                    }
                ],
                "outputs": [
                    {
                        "referenceName": "DestinationDataset_k0h",
                        "type": "DatasetReference"
                    }
                ]
            }
        ],
        "annotations": [],
        "lastPublishTime": "2023-04-18T11:30:35Z"
    },
    "type": "Microsoft.DataFactory/factories/pipelines"
}

This is the destination dataset config on ADF:

{
    "name": "DestinationDataset_k0h",
    "properties": {
        "linkedServiceName": {
            "referenceName": "LoadTestDump",
            "type": "LinkedServiceReference"
        },
        "annotations": [],
        "type": "AzureDataExplorerTable",
        "schema": [
            {
                "name": "deviceId",
                "type": "string"
            },
            {
                "name": "tenant",
                "type": "string"
            },
            {
                "name": "tagsSerialNo",
                "type": "string"
            },
            {
                "name": "metricSum",
                "type": "long"
            },
            {
                "name": "metricCount",
                "type": "long"
            },
            {
                "name": "notMetricCount",
                "type": "long"
            },
            {
                "name": "timestamp",
                "type": "datetime"
            }
        ],
        "typeProperties": {
            "table": "AggregatedSharingTest_v1"
        }
    },
    "type": "Microsoft.DataFactory/factories/datasets"
}

This is the Azure blob storage config on ADF:

{
    "name": "SourceDataset_k0h",
    "properties": {
        "linkedServiceName": {
            "referenceName": "LoadTestBlob",
            "type": "LinkedServiceReference"
        },
        "annotations": [],
        "type": "Json",
        "typeProperties": {
            "location": {
                "type": "AzureBlobStorageLocation",
                "fileName": "sharing.json",
                "container": "sil-xms-load-max-data"
            }
        },
        "schema": {
            "type": "object",
            "properties": {
                "deviceId": {
                    "type": "string"
                },
                "tenant": {
                    "type": "string"
                },
                "tagsSerialNo": {
                    "type": "string"
                },
                "metricSum": {
                    "type": "integer"
                },
                "metricCount": {
                    "type": "integer"
                },
                "notMetricCount": {
                    "type": "integer"
                },
                "timestamp": {
                    "type": "string"
                }
            }
        }
    },
    "type": "Microsoft.DataFactory/factories/datasets"
}

I have tested both the source and destination connections on azure portal and they look good. Not sure what exactly is going wrong since the pipeline runs and run details shows data read and data written but the data is never available on Kusto table for querying and eventually fails with above error


Solution

  • I tried with your input JSON from storage account and your pipeline JSON and ended up with same error.

    enter image description here

    In your case, the reason for this error is additionalProperties in the copy activity sink.

    When I removed the additionalProperties, I am able to copy the data successfully.

    enter image description here

    I have 4 rows data in kustos table and you can see two rows inserted from the source using copy activity after removing additonal properties.

    enter image description here

    Data in target table:

    enter image description here

    This is my Pipeline JSON for your reference:

    {
        "name": "pipeline2",
        "properties": {
            "activities": [
                {
                    "name": "Copy data1",
                    "type": "Copy",
                    "dependsOn": [],
                    "policy": {
                        "timeout": "0.12:00:00",
                        "retry": 0,
                        "retryIntervalInSeconds": 30,
                        "secureOutput": false,
                        "secureInput": false
                    },
                    "userProperties": [
                        {
                            "name": "Source",
                            "value": "data//myjson.json"
                        },
                        {
                            "name": "Destination",
                            "value": "table1"
                        }
                    ],
                    "typeProperties": {
                        "source": {
                            "type": "JsonSource",
                            "storeSettings": {
                                "type": "AzureBlobFSReadSettings",
                                "recursive": true,
                                "enablePartitionDiscovery": false
                            },
                            "formatSettings": {
                                "type": "JsonReadSettings"
                            }
                        },
                        "sink": {
                            "type": "AzureDataExplorerSink",
                            "ingestionMappingName": ""
                        },
                        "enableStaging": false,
                        "logSettings": {
                            "enableCopyActivityLog": true,
                            "copyActivityLogSettings": {
                                "logLevel": "Info",
                                "enableReliableLogging": true
                            },
                            "logLocationSettings": {
                                "linkedServiceName": {
                                    "referenceName": "AzureDataLakeStorage2",
                                    "type": "LinkedServiceReference"
                                },
                                "path": "data/debug-logs"
                            }
                        },
                        "translator": {
                            "type": "TabularTranslator",
                            "mappings": [
                                {
                                    "source": {
                                        "path": "$['deviceId']"
                                    },
                                    "sink": {
                                        "name": "deviceId",
                                        "type": "String"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['tenant']"
                                    },
                                    "sink": {
                                        "name": "tenant",
                                        "type": "Guid"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['tagsSerialNo']"
                                    },
                                    "sink": {
                                        "name": "tagsSerialNo",
                                        "type": "String"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['metricSum']"
                                    },
                                    "sink": {
                                        "name": "metricSum",
                                        "type": "Int64"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['metricCount']"
                                    },
                                    "sink": {
                                        "name": "metricCount",
                                        "type": "Int64"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['notMetricCount']"
                                    },
                                    "sink": {
                                        "name": "notMetricCount",
                                        "type": "Int64"
                                    }
                                },
                                {
                                    "source": {
                                        "path": "$['timestamp']"
                                    },
                                    "sink": {
                                        "name": "timestamp",
                                        "type": "DateTime"
                                    }
                                }
                            ],
                            "collectionReference": ""
                        }
                    },
                    "inputs": [
                        {
                            "referenceName": "Json1",
                            "type": "DatasetReference"
                        }
                    ],
                    "outputs": [
                        {
                            "referenceName": "AzureDataExplorerTable1",
                            "type": "DatasetReference"
                        }
                    ]
                }
            ],
            "annotations": []
        }
    }