Is there a way to create a temporary job cluster with a custom Docker image in Azure Databricks? I can only find information on creating normal clusters with the Docker service.
My job definition json I want to send to the azuredatabricks.net/api/2.0/jobs/create API looks like following:
{
"databricks_pool_name": "test",
"job_settings": {
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}
}
The structure of your JSON is incorrect - if you look into documentation for Jobs API, you will see that you just need to send only content of your job_settings
field:
{
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}