自定义 Docker 带有 Databricks 作业的图像 API
Custom Docker Image with Databricks jobs API
有没有办法在 Azure Databricks 中使用自定义 Docker 图像创建临时作业集群?
我只能找到有关使用 Docker 服务创建普通集群的信息。
我的工作定义 json 我想发送到 azuredatabricks。net/api/2.0/jobs/create API 如下所示:
{
"databricks_pool_name": "test",
"job_settings": {
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}
}
您的 JSON 结构不正确 - 如果您查看 documentation for Jobs API,您会发现您只需要发送 job_settings
字段的内容:
{
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}
有没有办法在 Azure Databricks 中使用自定义 Docker 图像创建临时作业集群? 我只能找到有关使用 Docker 服务创建普通集群的信息。
我的工作定义 json 我想发送到 azuredatabricks。net/api/2.0/jobs/create API 如下所示:
{
"databricks_pool_name": "test",
"job_settings": {
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}
}
您的 JSON 结构不正确 - 如果您查看 documentation for Jobs API,您会发现您只需要发送 job_settings
字段的内容:
{
"name": "job-test",
"new_cluster": {
"num_workers": 1,
"spark_version": "7.3.x-scala2.12",
"instance_pool_id": "<INSTANCE_POOL_PLACEHOLDER>",
"docker_image": {
"url": "<ACR_HOST_NAME>",
"basic_auth": {
"username": "<ACR_USER>",
"password": "<ACR_TOKEN>"
}
}
},
"max_concurrent_runs": 1,
"max_retries": 0,
"schedule": {
"quartz_cron_expression": "0 0 0 2 * ?",
"timezone_id": "UTC"
},
"spark_python_task": {
"python_file": "dbfs:/poc.py"
},
"timeout_seconds": 5400
}