When running the following
aws glue create-crawler --debug --cli-input-json '{
"Name": "crawler",
"Role": "arn:...",
"DatabaseName": "db",
"Description": "table crawler",
"Targets": {
"CatalogTargets": [{
"DatabaseName": "db",
"Tables": ["tab"]
}]
},
"SchemaChangePolicy": {
"UpdateBehavior": "LOG",
"DeleteBehavior": "LOG"
},
"RecrawlPolicy": {
"RecrawlBehavior": "CRAWL_EVERYTHING"
},
"Configuration": {
"Version": 1.0,
"CrawlerOutput": {
"Partitions": { "AddOrUpdateBehavior": "InheritFromTable" }
},
"Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas" }
},
"Schedule": "Cron(1 * * * ? *)"
}'
It fails on
Parameter validation failed:
Invalid type for parameter Configuration, value: {'Version': 1.0, 'CrawlerOutput': {'Partitions': {'AddOrUpdateBehavior': 'InheritFromTable'}}, 'Grouping': {'TableGroupingPolicy': 'CombineCompatibleSchemas'}}, type: <class 'dict'>, valid types: <class 'str'>
I'm getting the format from here and if I remove Configuration
it works. I tried all kind of quoting options to make this dictionary into one string but everything fails. Would love some help spotting the issue.
The solution is to move the configuration to an argument in that command
aws glue create-crawler --configuration '{
"Version": 1.0,
"CrawlerOutput": {
"Partitions": { "AddOrUpdateBehavior": "InheritFromTable" }
},
"Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas" }
}' --debug --cli-input-json '{
"Name": "crawler",
"Role": "arn:...",
"DatabaseName": "db",
"Description": "table crawler",
"Targets": {
"CatalogTargets": [{
"DatabaseName": "db",
"Tables": ["tab"]
}]
},
"SchemaChangePolicy": {
"UpdateBehavior": "LOG",
"DeleteBehavior": "LOG"
},
"RecrawlPolicy": {
"RecrawlBehavior": "CRAWL_EVERYTHING"
},
"Schedule": "Cron(1 * * * ? *)"
}'