无法在 Docker Swarm 中发现其他 Neo4J 因果集群实例

Unable to discover other Neo4J causal cluster instances in Docker Swarm

运行 稍作修改的演示 docker-compose 取自 here, thanks GraphAware guys

我使用 docker-compose up 得到了一个成功的因果集群 运行ning。但是,我无法使用 docker swarm 得到同样的结果。

合成文件相同:

version: '3.3'

networks:
  neonet:
    driver: overlay
    attachable: true
    ipam:
      config:
        - subnet: 10.161.0.0/24

services:

  neo-1:
    image: neo4j:3.3.4-enterprise
    networks:
      - neonet
    volumes:
      - /srv/neo4j/neo4j-core1/data:/data
      - /srv/neo4j/neo4j-core1/logs:/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_dbms_mode=CORE
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connector_http_listen__address=:7474
      - NEO4J_dbms_connector_https_listen__address=:6477
      - NEO4J_dbms_connector_bolt_listen__address=:7687

  neo-2:
    image: neo4j:3.3.4-enterprise
    networks:
      - neonet
    volumes:
      - /srv/neo4j/neo4j-core2/data:/data
      - /srv/neo4j/neo4j-core2/logs:/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_dbms_mode=CORE
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connector_http_listen__address=:7474
      - NEO4J_dbms_connector_https_listen__address=:6477
      - NEO4J_dbms_connector_bolt_listen__address=:7687

  neo-3:
    image: neo4j:3.3.4-enterprise
    networks:
      - neonet
    volumes:
      - /srv/neo4j/neo4j-core3/data:/data
      - /srv/neo4j/neo4j-core3/logs:/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_dbms_mode=CORE
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connector_http_listen__address=:7474
      - NEO4J_dbms_connector_https_listen__address=:6477
      - NEO4J_dbms_connector_bolt_listen__address=:7687

.. 除了 docker-compose up 我既没有指定覆盖网络细节,也没有部署细节。两个集群 运行 在一台机器上。

如果我 shell 进入独立 docker-compose 的容器,IP 地址看起来没问题,端口 5000 是 'curlable';对 swarm 部署的容器执行相同的操作 (curl ip:5000) 会导致 拒绝连接

运行 netstat -ntlp 给出:

/var/lib/neo4j # netstat -ntlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
tcp        0      0 10.161.0.166:5000       0.0.0.0:*               LISTEN      -
tcp        0      0 127.0.0.11:44137        0.0.0.0:*               LISTEN      -
tcp        0      0 0.0.0.0:7000            0.0.0.0:*               LISTEN      -

给端口 5000 监听一个不是本机任何接口的 IP 地址 (ifconfig):

eth0      Link encap:Ethernet  HWaddr 02:42:0A:A1:00:A7
          inet addr:10.161.0.167  Bcast:10.161.0.255  Mask:255.255.255.0
          UP BROADCAST RUNNING MULTICAST  MTU:1450  Metric:1
          RX packets:119 errors:0 dropped:0 overruns:0 frame:0
          TX packets:119 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0
          RX bytes:7110 (6.9 KiB)  TX bytes:7110 (6.9 KiB)

eth1      Link encap:Ethernet  HWaddr 02:42:AC:12:00:06
          inet addr:172.18.0.6  Bcast:172.18.255.255  Mask:255.255.0.0
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:8 errors:0 dropped:0 overruns:0 frame:0
          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0
          RX bytes:648 (648.0 B)  TX bytes:0 (0.0 B)

lo        Link encap:Local Loopback
          inet addr:127.0.0.1  Mask:255.0.0.0
          UP LOOPBACK RUNNING  MTU:65536  Metric:1
          RX packets:58 errors:0 dropped:0 overruns:0 frame:0
          TX packets:58 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1
          RX bytes:3604 (3.5 KiB)  TX bytes:3604 (3.5 KiB)

..如您所见,有 2 个接口,我的 neonet 网络,以及(我假设)docker 的 ingress.

此外,neo4j 已通过配置指示自身侦听 所有 接口上的发现:

causal_clustering.transaction_listen_address=0.0.0.0:6000
causal_clustering.transaction_advertised_address=2a9e1683a92e:6000
causal_clustering.raft_listen_address=0.0.0.0:7000
causal_clustering.raft_advertised_address=2a9e1683a92e:7000
causal_clustering.initial_discovery_members=neo1:5000,neo2:5000,neo3:5000
causal_clustering.expected_core_cluster_size=3
causal_clustering.discovery_listen_address=0.0.0.0:5000
causal_clustering.discovery_advertised_address=2a9e1683a92e:5000
EDITION=enterprise
ACCEPT.LICENSE.AGREEMENT=yes

...但不知何故决定在某个 IP 上进行侦听 - 它为 5000 而不是偶然为 7000 所做的。

我没有网络基础,但在绑定到本机上任何接口的 IP 上监听看起来不正确。

如何让Neo4J绑定所有接口?或者至少是有效的?

原来有多个修复,核心是设置 deploy.endpoint_node: dnsrr 以防止创建 docker 虚拟 IP。最后,我的工作群文件如下所示。

Working = 多节点工作 Neo4j 核心因果集群(仅); 100% 使用 Neo4J OGM v3 客户端连接 url bolt+routing://neo-1:7687。我还没有足够的勇气尝试对初始连接进行故障转移;所以 Neo-1 上的 SPF(最初)。

version: '3.3'

services:
  neo-1:
    image: neo4j:3.3.4-enterprise
    volumes:
      - neo-data:/data
      - neo-logs:/var/lib/neo4j/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_causalClustering_discoveryAdvertisedAddress=neo-1:5000
      - NEO4J_causalClustering_transactionAdvertisedAddress=neo-1:6000
      - NEO4J_causalClustering_raftAdvertisedAddress=neo-1:7000
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connectors_default__advertised__address=neo-1
      - NEO4J_dbms_connector_bolt_advertised__address=:7687
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_dbms_mode=CORE

    deploy:
      mode: global
      endpoint_mode: dnsrr
      placement:
        constraints:
          - node.labels.neodb == 1
    networks:
      - neonet

  neo-2:
    image: neo4j:3.3.4-enterprise
    volumes:
      - neo-data:/data
      - neo-logs:/var/lib/neo4j/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_causalClustering_discoveryAdvertisedAddress=neo-2:5000
      - NEO4J_causalClustering_transactionAdvertisedAddress=neo-2:6000
      - NEO4J_causalClustering_raftAdvertisedAddress=neo-2:7000
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connectors_default__advertised__address=neo-2
      - NEO4J_dbms_connector_bolt_advertised__address=:7687
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_dbms_mode=CORE

    deploy:
      mode: global
      endpoint_mode: dnsrr
      placement:
        constraints:
          - node.labels.neodb == 2
    networks:
      - neonet

  neo-3:
    image: neo4j:3.3.4-enterprise
    volumes:
      - neo-data:/data
      - neo-logs:/var/lib/neo4j/logs
    environment:
      - NEO4J_AUTH=neo4j/blah
      - NEO4J_causalClustering_discoveryAdvertisedAddress=neo-3:5000
      - NEO4J_causalClustering_transactionAdvertisedAddress=neo-3:6000
      - NEO4J_causalClustering_raftAdvertisedAddress=neo-3:7000
      - NEO4J_causalClustering_expectedCoreClusterSize=3
      - NEO4J_causalClustering_initialDiscoveryMembers=neo-1:5000,neo-2:5000,neo-3:5000
      - NEO4J_dbms_connectors_default__advertised__address=neo-3
      - NEO4J_dbms_connector_bolt_advertised__address=:7687
      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
      - NEO4J_dbms_mode=CORE

    deploy:
      mode: global
      endpoint_mode: dnsrr
      placement:
        constraints:
          - node.labels.neodb == 3
    networks:
      - neonet

networks:
  neonet:
    driver: overlay

volumes:
  neo-data:
  neo-logs:

我很确定这太冗长了;到目前为止,可能有一种解决方案只允许声明一个服务(具有多个副本)。