Spark + Docker-Compose:非法执行程序位置格式

Spark + Docker-Compose: Illegal executor location format

将 Spark 1.6.0 与 docker-compose 和 submit 作业一起使用时,驱动程序抛出错误: Illegal executor location format: executor_dockerdevenv_sparkWorker_1.bridge_0

来自: https://github.com/apache/spark/blob/d83c2f9f0b08d6d5d369d9fae04cdb15448e7f0d/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala#L70

我了解到“_”是执行者 ID 的保留字符。

要解决此问题,只需在 docker-compose.yml:

中使用 container_name:"spark-worker" 重命名您的 spark worker
#
# Spark master
#
sparkMaster:
  image: mesosphere/spark:1.6.0
  command: ./bin/spark-class org.apache.spark.deploy.master.Master -i spark-master
  hostname: "spark-master"
  environment:
    SERVICE_NAME: "spark-master"
    SPARK_USER: "root"
    SPARK_MASTER_OPTS: "-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
    SPARK_MASTER_PORT: 7077
    SPARK_MASTER_WEBUI_PORT: 8080
  dns:
  - 172.17.42.1
  - 8.8.8.8
  expose:
  - 8080
  - 7077
  - 8888
  - 8081
  - 4040
  - 7001
  - 7002
  - 7003
  - 7004
  - 7005
  - 7006
  dns_search: service.consul
  volumes:
  - ./spark/spark-master:/opt/spark/dist/logs/
  ports:
  - "4040:4040"
  - "6066:6066"
  - "7077:7077"
  - "8080:8080"

#
# Spark worker
#
sparkWorker:
  image: mesosphere/spark:1.6.0
  command: ./bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
  hostname: "spark-worker"
  container_name: "spark-worker"
  environment:
    SERVICE_NAME: "spark-worker"
    SPARK_USER: "root"
    SPARK_WORKER_OPTS: "-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory"
    SPARK_WORKER_PORT: 8888
    SPARK_WORKER_WEBUI_PORT: 8081
  dns:
  - 172.17.42.1
  - 8.8.8.8
  expose:
  - 8080
  - 7077
  - 8888
  - 8081
  - 4040
  - 7001
  - 7002
  - 7003
  - 7004
  - 7005
  - 7006
  dns_search: service.consul
  links:
  - sparkMaster
  volumes:
  - ./spark/spark-worker:/opt/spark/dist/logs/
  ports:
  - "8081:8081"

#
# Spark Driver
#
spark-driver:
  build: ./my-driver
  hostname: "spark-driver"
  environment:
    SERVICE_NAME: "spark-driver"
    SPARK_USER: "root"
  dns:
  # First DNS is the local DNS for all services
  - 172.17.42.1
  # Secound DNS is Google DNS to allow external calls if needed
  - 8.8.8.8
  dns_search: service.consul
  command: ./bin/spark-submit --master spark://spark-master:7077 --class my.Class --conf spark.driver.port=7001 --conf spark.fileserver.port=7003 --conf spark.ui.port=4040 /my-packaged-ap.jar
  expose:
  - 8080
  - 7077
  - 8888
  - 8081
  - 4040
  - 7001
  - 7002
  - 7003
  - 7004
  - 7005
  - 7006