两个服务无法通过 swarm overlay 看到对方
Two services cannot see each other through a swarm overlay
我觉得这很简单,但我想不通。我在同一主机上的单个节点群中有两个服务,consul 和 traefik。
> docker service ls
ID NAME MODE REPLICAS IMAGE PORTS
3g1obv9l7a9q consul_consul replicated 1/1 progrium/consul:latest
ogdnlfe1v8qx proxy_proxy global 1/1 traefik:alpine *:80->80/tcp, *:443->443/tcp
> docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
090f1ed90972 progrium/consul:latest "/bin/start -server …" 12 minutes ago Up 12 minutes 53/tcp, 53/udp, 8300-8302/tcp, 8400/tcp, 8500/tcp, 8301-8302/udp consul_consul.1.o0j8kijns4lag6odmwkvikexv
20f03023d511 traefik:alpine "/entrypoint.sh -c /…" 12 minutes ago Up 12 minutes 80/tcp
两个容器都可以访问 "consul" 覆盖网络,它是这样创建的。
> docker network create --driver overlay --attachable consul
ypdmdyx2ulqt8l8glejfn2t25
Traefik 抱怨无法联系到 consul。
time="2019-03-18T18:58:08Z" level=error msg="Load config error: Get http://consul:8500/v1/kv/traefik?consistent=&recurse=&wait=30000ms: dial tcp 10.0.2.2:8500: connect: connection refused, retrying in 7.492175404s"
我可以进入 traefik 容器并确认我无法通过覆盖网络到达 consul,尽管它是可 ping 通的。
> docker exec -it 20f03023d511 ash
/ # nslookup consul
Name: consul
Address 1: 10.0.2.2
/ # curl consul:8500
curl: (7) Failed to connect to consul port 8500: Connection refused
# ping consul
PING consul (10.0.2.2): 56 data bytes
64 bytes from 10.0.2.2: seq=0 ttl=64 time=0.085 ms
但是,如果我看得更深一点,我会发现它们是相连的,只是覆盖网络出于某种原因没有将流量传输到实际目的地。如果我直接转到实际的 consul ip,它就可以了。
/ # nslookup tasks.consul
Name: tasks.consul
Address 1: 10.0.2.3 0327c8e1bdd7.consul
/ # curl tasks.consul:8500
<a href="/ui/">Moved Permanently</a>.
我可以解决这个问题,从技术上讲,consul 只会有一个副本 运行,但我想知道为什么在我深入研究之前数据没有首先路由.我想不出还有什么可以尝试的。这是与此设置相关的各种信息。
> docker --version
Docker version 18.09.2, build 6247962
> docker network ls
NETWORK ID NAME DRIVER SCOPE
cee3cdfe1194 bridge bridge local
ypdmdyx2ulqt consul overlay swarm
5469e4538c2d docker_gwbridge bridge local
5fd928ea1e31 host host local
9v22k03pg9sl ingress overlay swarm
> docker network inspect consul
[
{
"Name": "consul",
"Id": "ypdmdyx2ulqt8l8glejfn2t25",
"Created": "2019-03-18T14:44:27.213690506-04:00",
"Scope": "swarm",
"Driver": "overlay",
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": null,
"Config": [
{
"Subnet": "10.0.2.0/24",
"Gateway": "10.0.2.1"
}
]
},
"Internal": false,
"Attachable": true,
"Ingress": false,
"ConfigFrom": {
"Network": ""
},
"ConfigOnly": false,
"Containers": {
"0327c8e1bdd7ebb5a7871d16cf12df03240996f9e590509984783715a4c09193": {
"Name": "consul_consul.1.8v4bshotrco8fv3sclwx61106",
"EndpointID": "ae9d5ef1d19b67e297ebf40f6db410c33e4e3c0266c56e539e696be3ed4c81a5",
"MacAddress": "02:42:0a:00:02:03",
"IPv4Address": "10.0.2.3/24",
"IPv6Address": ""
},
"c21f5dfa93a2f43b747aedc64a343d94d6c1c2e6558d81bd4a52e2ba4b5fa90f": {
"Name": "proxy_proxy.sb6oindhmfukq4gcne6ynb2o2.4zvco02we58i3ulbyrsw1b2ok",
"EndpointID": "7596a208e0b05ba688f318814e24a2a1a3401765ed53ca421bf61c73e65c235a",
"MacAddress": "02:42:0a:00:02:06",
"IPv4Address": "10.0.2.6/24",
"IPv6Address": ""
},
"lb-consul": {
"Name": "consul-endpoint",
"EndpointID": "23e74716ef54f3fb6537b305176b790b4bc4132dda55f20588d7ce4ca71d7372",
"MacAddress": "02:42:0a:00:02:04",
"IPv4Address": "10.0.2.4/24",
"IPv6Address": ""
}
},
"Options": {
"com.docker.network.driver.overlay.vxlanid_list": "4099"
},
"Labels": {},
"Peers": [
{
"Name": "e11b9bd30b31",
"IP": "10.8.0.1"
}
]
}
]
> cat consul/docker-compose.yml
version: '3.1'
services:
consul:
image: progrium/consul
command: -server -bootstrap
networks:
- consul
volumes:
- consul:/data
deploy:
labels:
- "traefik.enable=false"
networks:
consul:
external: true
> cat proxy/docker-compose.yml
version: '3.3'
services:
proxy:
image: traefik:alpine
command: -c /traefik.toml
networks:
# We need an external proxy network and the consul network
# - proxy
- consul
ports:
# Send HTTP and HTTPS traffic to the proxy service
- 80:80
- 443:443
configs:
- traefik.toml
volumes:
- /var/run/docker.sock:/var/run/docker.sock
deploy:
# Deploy the service to all nodes that match our constraints
mode: global
placement:
constraints:
- "node.role==manager"
- "node.labels.proxy==true"
labels:
# Traefik uses labels to configure routing to your services
# Change the domain to your own
- "traefik.frontend.rule=Host:proxy.mcwebsite.net"
# Route traffic to the web interface hosted on port 8080 in the container
- "traefik.port=8080"
# Name the backend (not required here)
- "traefik.backend=traefik"
# Manually set entrypoints (not required here)
- "traefik.frontend.entryPoints=http,https"
configs:
# Traefik configuration file
traefik.toml:
file: ./traefik.toml
# This service will be using two external networks
networks:
# proxy:
# external: true
consul:
external: true
我的内核中禁用了两个可选的内核配置 CONFIG_IP_VS_PROTO_TCP 和 CONFIG_IP_VS_PROTO_UDP,您猜对了,它们启用了 tcp 和 udp 负载平衡。
我希望我能比我早四个小时检查一下。
我觉得这很简单,但我想不通。我在同一主机上的单个节点群中有两个服务,consul 和 traefik。
> docker service ls
ID NAME MODE REPLICAS IMAGE PORTS
3g1obv9l7a9q consul_consul replicated 1/1 progrium/consul:latest
ogdnlfe1v8qx proxy_proxy global 1/1 traefik:alpine *:80->80/tcp, *:443->443/tcp
> docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
090f1ed90972 progrium/consul:latest "/bin/start -server …" 12 minutes ago Up 12 minutes 53/tcp, 53/udp, 8300-8302/tcp, 8400/tcp, 8500/tcp, 8301-8302/udp consul_consul.1.o0j8kijns4lag6odmwkvikexv
20f03023d511 traefik:alpine "/entrypoint.sh -c /…" 12 minutes ago Up 12 minutes 80/tcp
两个容器都可以访问 "consul" 覆盖网络,它是这样创建的。
> docker network create --driver overlay --attachable consul
ypdmdyx2ulqt8l8glejfn2t25
Traefik 抱怨无法联系到 consul。
time="2019-03-18T18:58:08Z" level=error msg="Load config error: Get http://consul:8500/v1/kv/traefik?consistent=&recurse=&wait=30000ms: dial tcp 10.0.2.2:8500: connect: connection refused, retrying in 7.492175404s"
我可以进入 traefik 容器并确认我无法通过覆盖网络到达 consul,尽管它是可 ping 通的。
> docker exec -it 20f03023d511 ash
/ # nslookup consul
Name: consul
Address 1: 10.0.2.2
/ # curl consul:8500
curl: (7) Failed to connect to consul port 8500: Connection refused
# ping consul
PING consul (10.0.2.2): 56 data bytes
64 bytes from 10.0.2.2: seq=0 ttl=64 time=0.085 ms
但是,如果我看得更深一点,我会发现它们是相连的,只是覆盖网络出于某种原因没有将流量传输到实际目的地。如果我直接转到实际的 consul ip,它就可以了。
/ # nslookup tasks.consul
Name: tasks.consul
Address 1: 10.0.2.3 0327c8e1bdd7.consul
/ # curl tasks.consul:8500
<a href="/ui/">Moved Permanently</a>.
我可以解决这个问题,从技术上讲,consul 只会有一个副本 运行,但我想知道为什么在我深入研究之前数据没有首先路由.我想不出还有什么可以尝试的。这是与此设置相关的各种信息。
> docker --version
Docker version 18.09.2, build 6247962
> docker network ls
NETWORK ID NAME DRIVER SCOPE
cee3cdfe1194 bridge bridge local
ypdmdyx2ulqt consul overlay swarm
5469e4538c2d docker_gwbridge bridge local
5fd928ea1e31 host host local
9v22k03pg9sl ingress overlay swarm
> docker network inspect consul
[
{
"Name": "consul",
"Id": "ypdmdyx2ulqt8l8glejfn2t25",
"Created": "2019-03-18T14:44:27.213690506-04:00",
"Scope": "swarm",
"Driver": "overlay",
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": null,
"Config": [
{
"Subnet": "10.0.2.0/24",
"Gateway": "10.0.2.1"
}
]
},
"Internal": false,
"Attachable": true,
"Ingress": false,
"ConfigFrom": {
"Network": ""
},
"ConfigOnly": false,
"Containers": {
"0327c8e1bdd7ebb5a7871d16cf12df03240996f9e590509984783715a4c09193": {
"Name": "consul_consul.1.8v4bshotrco8fv3sclwx61106",
"EndpointID": "ae9d5ef1d19b67e297ebf40f6db410c33e4e3c0266c56e539e696be3ed4c81a5",
"MacAddress": "02:42:0a:00:02:03",
"IPv4Address": "10.0.2.3/24",
"IPv6Address": ""
},
"c21f5dfa93a2f43b747aedc64a343d94d6c1c2e6558d81bd4a52e2ba4b5fa90f": {
"Name": "proxy_proxy.sb6oindhmfukq4gcne6ynb2o2.4zvco02we58i3ulbyrsw1b2ok",
"EndpointID": "7596a208e0b05ba688f318814e24a2a1a3401765ed53ca421bf61c73e65c235a",
"MacAddress": "02:42:0a:00:02:06",
"IPv4Address": "10.0.2.6/24",
"IPv6Address": ""
},
"lb-consul": {
"Name": "consul-endpoint",
"EndpointID": "23e74716ef54f3fb6537b305176b790b4bc4132dda55f20588d7ce4ca71d7372",
"MacAddress": "02:42:0a:00:02:04",
"IPv4Address": "10.0.2.4/24",
"IPv6Address": ""
}
},
"Options": {
"com.docker.network.driver.overlay.vxlanid_list": "4099"
},
"Labels": {},
"Peers": [
{
"Name": "e11b9bd30b31",
"IP": "10.8.0.1"
}
]
}
]
> cat consul/docker-compose.yml
version: '3.1'
services:
consul:
image: progrium/consul
command: -server -bootstrap
networks:
- consul
volumes:
- consul:/data
deploy:
labels:
- "traefik.enable=false"
networks:
consul:
external: true
> cat proxy/docker-compose.yml
version: '3.3'
services:
proxy:
image: traefik:alpine
command: -c /traefik.toml
networks:
# We need an external proxy network and the consul network
# - proxy
- consul
ports:
# Send HTTP and HTTPS traffic to the proxy service
- 80:80
- 443:443
configs:
- traefik.toml
volumes:
- /var/run/docker.sock:/var/run/docker.sock
deploy:
# Deploy the service to all nodes that match our constraints
mode: global
placement:
constraints:
- "node.role==manager"
- "node.labels.proxy==true"
labels:
# Traefik uses labels to configure routing to your services
# Change the domain to your own
- "traefik.frontend.rule=Host:proxy.mcwebsite.net"
# Route traffic to the web interface hosted on port 8080 in the container
- "traefik.port=8080"
# Name the backend (not required here)
- "traefik.backend=traefik"
# Manually set entrypoints (not required here)
- "traefik.frontend.entryPoints=http,https"
configs:
# Traefik configuration file
traefik.toml:
file: ./traefik.toml
# This service will be using two external networks
networks:
# proxy:
# external: true
consul:
external: true
我的内核中禁用了两个可选的内核配置 CONFIG_IP_VS_PROTO_TCP 和 CONFIG_IP_VS_PROTO_UDP,您猜对了,它们启用了 tcp 和 udp 负载平衡。
我希望我能比我早四个小时检查一下。