I have an existing datasource in Druid. I am trying to delete some records by reindexing the data with filter and overwriting the existing data. If the dataSource within ioConfig is my_datasource and the dataSource within the dataSchema is other_datasource, it works just fine and the other_datasource shows expected result. But when both the dataSources (ioConfig and dataSchema) are the same, the existing data does not change according to the filters applied. Here is the configuration sample:
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false"
},
"transformSpec" : {
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26"
},
"appendToExisting":"false"
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}
What am I missing here? Is there a better way of achieving what I am trying to do? Appreciate your help. Thank you.
I made it to work by adding dropExisting=true to the ioConfig. Also, I moved the filter to the dataSource within the ioConfig block. Here is the complete config.
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "my_datasource",
"timestampSpec": {
"column": "RecordDate",
"format": "YYYY-MM-DD"
},
"dimensionsSpec": {
"dimensions":["RecordDate", "Column1", "Column2"]
},
"metricsSpec": [
],
"granularitySpec": {
"type": "uniform",
"queryGranularity": "none",
"segmentGranularity": "day",
"rollup": "false",
"intervals":["1970-01-01/2021-12-27"]
},
"transformSpec" : {
"transforms" : [ ]
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "druid",
"dataSource": "my_datasource",
"interval": "1970-01-01/2021-12-26",
"filter" :{"type":"not", "field":{"type":"expression", "expression":"RecordDate >='1997-02-01' && RecordDate<='1997-02-28'"}},
},
"appendToExisting":false,
"dropExisting":true
},
"tuningConfig": {
"type": "index_parallel",
"partitionsSpec": {
"type": "dynamic"
},
"maxNumConcurrentSubTasks": 4
}
}
}