Mac M1 上的 Apache Airflow 安装问题

Apache Airflow installation issue on Mac M1

我正在尝试在 docker 中设置 Apache Airflow 以进行本地开发。我让它在基于英特尔的 Mac.

上工作

我关注他们official documentation。当我尝试使用 docker compose up 运行 服务时,我看到以下回溯。

# docker-compose.yaml
---
version: '3'
x-airflow-common:
  &airflow-common
  image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:2.1.0}
  environment:
    &airflow-common-env
    AIRFLOW__CORE__EXECUTOR: CeleryExecutor
    AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow
    AIRFLOW__CELERY__RESULT_BACKEND: db+postgresql://airflow:airflow@postgres/airflow
    AIRFLOW__CELERY__BROKER_URL: redis://:@redis:6379/0
    AIRFLOW__CORE__FERNET_KEY: ''
    AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true'
    AIRFLOW__CORE__LOAD_EXAMPLES: 'true'
    AIRFLOW__API__AUTH_BACKEND: 'airflow.api.auth.backend.basic_auth'
  volumes:
    - ./dags:/opt/airflow/dags
    - ./logs:/opt/airflow/logs
    - ./plugins:/opt/airflow/plugins
  user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}"
  depends_on:
    redis:
      condition: service_healthy
    postgres:
      condition: service_healthy

services:
  postgres:
    image: postgres:13
#    platform: linux/amd64
    environment:
      POSTGRES_USER: airflow
      POSTGRES_PASSWORD: airflow
      POSTGRES_DB: airflow
    volumes:
      - postgres-db-volume:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "airflow"]
      interval: 5s
      retries: 5
    restart: always

  redis:
    image: redis:latest
    ports:
      - 6379:6379
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 30s
      retries: 50
    restart: always

  airflow-webserver:
    <<: *airflow-common
    command: webserver
    ports:
      - 8080:8080
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-scheduler:
    <<: *airflow-common
    command: scheduler
    healthcheck:
      test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-worker:
    <<: *airflow-common
    command: celery worker
    healthcheck:
      test:
        - "CMD-SHELL"
        - 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

  airflow-init:
    <<: *airflow-common
    command: version
    environment:
      <<: *airflow-common-env
      _AIRFLOW_DB_UPGRADE: 'true'
      _AIRFLOW_WWW_USER_CREATE: 'true'
      _AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
      _AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow}

  flower:
    <<: *airflow-common
    command: celery flower
    ports:
      - 5555:5555
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:5555/"]
      interval: 10s
      timeout: 10s
      retries: 5
    restart: always

volumes:
  postgres-db-volume:

这是docker compose up

的回溯
[+] Running 7/7
 ⠿ Container test_redis_1              Recreated                                                                                                                                                                                         1.2s
 ⠿ Container test_postgres_1           Recreated                                                                                                                                                                                         0.2s
 ⠿ Container test_airflow-worker_1     Created                                                                                                                                                                                           0.1s
 ⠿ Container test_airflow-init_1       Recreated                                                                                                                                                                                         0.2s
 ⠿ Container test_airflow-webserver_1  Created                                                                                                                                                                                           0.1s
 ⠿ Container test_flower_1             Created                                                                                                                                                                                           0.1s
 ⠿ Container test_airflow-scheduler_1  Created                                                                                                                                                                                           0.1s
Attaching to airflow-init_1, airflow-scheduler_1, airflow-webserver_1, airflow-worker_1, flower_1, postgres_1, redis_1
postgres_1           | 
postgres_1           | PostgreSQL Database directory appears to contain a database; Skipping initialization
postgres_1           | 
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  starting PostgreSQL 13.3 (Debian 13.3-1.pgdg100+1) on aarch64-unknown-linux-gnu, compiled by gcc (Debian 8.3.0-6) 8.3.0, 64-bit
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  listening on IPv4 address "0.0.0.0", port 5432
postgres_1           | 2021-06-05 19:35:20.148 UTC [1] LOG:  listening on IPv6 address "::", port 5432
postgres_1           | 2021-06-05 19:35:20.150 UTC [1] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
postgres_1           | 2021-06-05 19:35:20.154 UTC [27] LOG:  database system was shut down at 2021-06-05 19:35:16 UTC
postgres_1           | 2021-06-05 19:35:20.158 UTC [1] LOG:  database system is ready to accept connections
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # Redis version=6.2.4, bits=64, commit=00000000, modified=0, pid=1, just started
redis_1              | 1:C 05 Jun 2021 19:35:21.647 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
redis_1              | 1:M 05 Jun 2021 19:35:21.647 * monotonic clock: POSIX clock_gettime
redis_1              | 1:M 05 Jun 2021 19:35:21.647 * Running mode=standalone, port=6379.
redis_1              | 1:M 05 Jun 2021 19:35:21.647 # Server initialized
redis_1              | 1:M 05 Jun 2021 19:35:21.649 * Ready to accept connections
airflow-init_1       | BACKEND=postgresql+psycopg2
airflow-init_1       | DB_HOST=postgres
airflow-init_1       | DB_PORT=5432
airflow-init_1       | 
airflow-scheduler_1  | BACKEND=postgresql+psycopg2
airflow-scheduler_1  | DB_HOST=postgres
airflow-scheduler_1  | DB_PORT=5432
airflow-scheduler_1  | 
airflow-scheduler_1  | BACKEND=postgresql+psycopg2
airflow-scheduler_1  | DB_HOST=postgres
airflow-scheduler_1  | DB_PORT=5432
airflow-scheduler_1  | 
airflow-worker_1     | BACKEND=postgresql+psycopg2
airflow-worker_1     | DB_HOST=postgres
airflow-worker_1     | DB_PORT=5432
flower_1             | BACKEND=postgresql+psycopg2
flower_1             | DB_HOST=postgres
flower_1             | DB_PORT=5432
airflow-worker_1     | 
airflow-worker_1     | BACKEND=postgresql+psycopg2
airflow-worker_1     | DB_HOST=postgres
airflow-worker_1     | DB_PORT=5432
flower_1             | 
flower_1             | BACKEND=postgresql+psycopg2
flower_1             | DB_HOST=postgres
flower_1             | DB_PORT=5432
airflow-webserver_1  | BACKEND=postgresql+psycopg2
airflow-webserver_1  | DB_HOST=postgres
airflow-webserver_1  | DB_PORT=5432
airflow-worker_1     | 
flower_1             | 
airflow-webserver_1  | 
airflow-init_1       | DB: postgresql+psycopg2://airflow:***@postgres/airflow
airflow-init_1       | [2021-06-05 19:35:59,163] {db.py:695} INFO - Creating tables
airflow-init_1       | INFO  [alembic.runtime.migration] Context impl PostgresqlImpl.
airflow-init_1       | INFO  [alembic.runtime.migration] Will assume transactional DDL.
flower_1             | [2021-06-05 19:36:08,037] {command.py:137} INFO - Visit me at http://0.0.0.0:5555
flower_1             | [2021-06-05 19:36:08,359] {command.py:142} INFO - Broker: redis://redis:6379/0
flower_1             | [2021-06-05 19:36:08,391] {command.py:145} INFO - Registered tasks: 
flower_1             | ['airflow.executors.celery_executor.execute_command',
flower_1             |  'celery.accumulate',
flower_1             |  'celery.backend_cleanup',
flower_1             |  'celery.chain',
flower_1             |  'celery.chord',
flower_1             |  'celery.chord_unlock',
flower_1             |  'celery.chunks',
flower_1             |  'celery.group',
flower_1             |  'celery.map',
flower_1             |  'celery.starmap']
flower_1             | [2021-06-05 19:36:08,666] {mixins.py:229} INFO - Connected to redis://redis:6379/0
flower_1             | [2021-06-05 19:36:11,593] {inspector.py:42} WARNING - Inspect method scheduled failed
flower_1             | [2021-06-05 19:36:11,609] {inspector.py:42} WARNING - Inspect method conf failed
flower_1             | [2021-06-05 19:36:11,617] {inspector.py:42} WARNING - Inspect method reserved failed
flower_1             | [2021-06-05 19:36:11,619] {inspector.py:42} WARNING - Inspect method registered failed
flower_1             | [2021-06-05 19:36:11,655] {inspector.py:42} WARNING - Inspect method active_queues failed
flower_1             | [2021-06-05 19:36:11,659] {inspector.py:42} WARNING - Inspect method stats failed
flower_1             | [2021-06-05 19:36:11,662] {inspector.py:42} WARNING - Inspect method revoked failed
flower_1             | [2021-06-05 19:36:11,664] {inspector.py:42} WARNING - Inspect method active failed
airflow-scheduler_1  |   ____________       _____________
airflow-scheduler_1  |  ____    |__( )_________  __/__  /________      __
airflow-scheduler_1  | ____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
airflow-scheduler_1  | ___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
airflow-scheduler_1  |  _/_/  |_/_/  /_/    /_/    /_/  \____/____/|__/
airflow-scheduler_1  | [2021-06-05 19:36:19,515] {scheduler_job.py:1253} INFO - Starting the scheduler
airflow-scheduler_1  | [2021-06-05 19:36:19,527] {scheduler_job.py:1258} INFO - Processing each file at most -1 times
airflow-scheduler_1  | [2021-06-05 19:36:19,588] {dag_processing.py:254} INFO - Launched DagFileProcessorManager with pid: 107
airflow-scheduler_1  | [2021-06-05 19:36:19,604] {scheduler_job.py:1822} INFO - Resetting orphaned tasks for active dag runs
airflow-scheduler_1  | [2021-06-05 19:36:19,657] {settings.py:52} INFO - Configured default timezone Timezone('UTC')
airflow-worker_1     | Starting flask
airflow-worker_1     |  * Serving Flask app "airflow.utils.serve_logs" (lazy loading)
airflow-worker_1     |  * Environment: production
airflow-worker_1     |    WARNING: This is a development server. Do not use it in a production deployment.
airflow-worker_1     |    Use a production WSGI server instead.
airflow-worker_1     |  * Debug mode: off
airflow-worker_1     | [2021-06-05 19:36:28,740] {_internal.py:113} INFO -  * Running on http://0.0.0.0:8793/ (Press CTRL+C to quit)
airflow-init_1       | Upgrades done
airflow-worker_1     | /home/airflow/.local/lib/python3.6/site-packages/celery/platforms.py:801 RuntimeWarning: You're running the worker with superuser privileges: this is
airflow-worker_1     | absolutely not recommended!
airflow-worker_1     | 
airflow-worker_1     | Please specify a different user using the --uid option.
airflow-worker_1     | 
airflow-worker_1     | User information: uid=501 euid=501 gid=0 egid=0
airflow-worker_1     | 
airflow-webserver_1  |   ____________       _____________
airflow-webserver_1  |  ____    |__( )_________  __/__  /________      __
airflow-webserver_1  | ____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
airflow-webserver_1  | ___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
airflow-webserver_1  |  _/_/  |_/_/  /_/    /_/    /_/  \____/____/|__/
airflow-webserver_1  | [2021-06-05 19:36:32,432] {dagbag.py:487} INFO - Filling up the DagBag from /dev/null
postgres_1           | 2021-06-05 19:36:38.577 UTC [150] LOG:  unexpected EOF on client connection with an open transaction
airflow-worker_1     | [2021-06-05 19:36:39,670: INFO/MainProcess] Connected to redis://redis:6379/0
airflow-worker_1     | [2021-06-05 19:36:39,819: INFO/MainProcess] mingle: searching for neighbors
airflow-webserver_1 exited with code 137
airflow-worker_1     | [2021-06-05 19:36:41,084: INFO/MainProcess] mingle: all alone
airflow-worker_1     | [2021-06-05 19:36:41,196: INFO/MainProcess] celery@a2177ac7a506 ready.
postgres_1           | 2021-06-05 19:36:43.263 UTC [131] LOG:  unexpected EOF on client connection with an open transaction
airflow-worker_1     | [2021-06-05 19:36:43,502: INFO/MainProcess] Events of group {task} enabled by remote.
postgres_1           | 2021-06-05 19:36:48.829 UTC [132] LOG:  unexpected EOF on client connection with an open transaction
flower_1 exited with code 137
airflow-worker_1 exited with code 137

然后进入循环重试。作为实验,当我尝试一项一项地 运行 服务时,我观察到一旦我启动 airflow-worker 所有其他服务就开始失败。 airflow-webservice 工人立即死亡并尝试再次重生但不能。

我遇到了同样的问题。增加 docker 偏好中的资源分配对我来说解决了这个问题。 CPU 2 -> 4,内存 2GB -> 6GB,交换 1GB -> 2GB。