I'm setting up a multi-node Apache Kafka cluster, but it seems my Zookeeper instances are having trouble communicating with each other.
I'm deploying the cluster as a Docker Swarm stack. I'm aware that, according to Confluent's docs, networking through swarm overlay drivers is not supported. So I'm publishing the ports on all interfaces. However, my Zookeeper instances keep throwing this exception:
EndOfStreamException: Unable to read additional data from client, it probably closed the socket: address = /127.0.0.1:{some_random_port}, session = 0x100fc7d362c0002
When I try to execute zookeeper-shell localhost:12181 ls /brokers/ids
against my Zookeeper instances, it seems that some of my Zookeeper instances manage to connect to a Kafka broker or two, but the instances are not aware of the brokers connected to other Zookeeper instances.
Here's my partial setup:
x-zookeeper-envs: &zookeeper-envs
ZOOKEEPER_TICK_TIME: 2000
ZOOKEEPER_SERVER_1: zookeeper-1:12888:13888;12181
ZOOKEEPER_SERVER_2: zookeeper-2:22888:23888;22181
ZOOKEEPER_SERVER_3: zookeeper-3:32888:33888;32181
services:
zookeeper-1:
image: confluentinc/cp-zookeeper:7.1.1
<<: *network
ports:
- 12181:12181
- 12888:2888
- 13888:3888
deploy:
placement:
constraints: [node.labels.worker==1]
environment:
<<: *zookeeper-envs
ZOOKEEPER_CLIENT_PORT: 12181
ZOOKEEPER_SERVER_ID: 1
zookeeper-2:
image: confluentinc/cp-zookeeper:7.1.1
<<: *network
ports:
- 22181:22181
- 22888:2888
- 23888:3888
deploy:
placement:
constraints: [node.labels.worker==2]
environment:
<<: *zookeeper-envs
ZOOKEEPER_CLIENT_PORT: 22181
ZOOKEEPER_SERVER_ID: 2
zookeeper-3:
image: confluentinc/cp-zookeeper:7.1.1
<<: *network
ports:
- 32181:32181
- 32888:2888
- 33888:3888
deploy:
placement:
constraints: [node.labels.worker==3]
environment:
<<: *zookeeper-envs
ZOOKEEPER_CLIENT_PORT: 32181
ZOOKEEPER_SERVER_ID: 3
I have also tried setting the Zookeeper environment variables in this format: ZOOKEEPER_SERVER_1: zookeeper-1:12888:13888::1;12181
, but to no avail.
I managed to get it running using zookeeper
image instead of confluentinc/cp-zookeeper
. Here's my setup in case anyone else struggled with setting multi-node Zookeeper/Kafka on Swarm.
version: '3.7'
x-zookeeper-envs: &zookeeper-envs
ZOO_TICK_TIME: 2000
ZOO_SERVERS: server.1=zookeeper-1:12888:13888;12181 server.2=zookeeper-2:22888:23888;22181 server.3=zookeeper-3:32888:33888;32181
x-kafka-envs: &kafka-envs
KAFKA_ZOOKEEPER_CONNECT: zookeeper-1:12181,zookeeper-2:22181,zookeeper-3:32181
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 3
KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 3
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3
KAFKA_JMX_PORT: 9101
KAFKA_JMX_HOSTNAME: localhost
KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-1:29092,kafka-2:29093,kafka-3:29094
CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 3
CONFLUENT_METRICS_ENABLE: 'true'
CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
services:
zookeeper-1:
image: zookeeper
hostname: zookeeper-1
<<: *network
ports:
- 12181:2181
- 12888:2888
- 13888:3888
deploy:
placement:
constraints: [node.labels.worker==1]
environment:
<<: *zookeeper-envs
ZOO_MY_ID: 1
zookeeper-2:
image: zookeeper
hostname: zookeeper-2
<<: *network
ports:
- 22181:2181
- 22888:2888
- 23888:3888
deploy:
placement:
constraints: [node.labels.worker==2]
environment:
<<: *zookeeper-envs
ZOO_MY_ID: 2
zookeeper-3:
image: zookeeper
hostname: zookeeper-3
<<: *network
ports:
- 32181:2181
- 32888:2888
- 33888:3888
deploy:
placement:
constraints: [node.labels.worker==3]
environment:
<<: *zookeeper-envs
ZOO_MY_ID: 3
kafka-1:
image: confluentinc/cp-kafka
deploy:
placement:
constraints: [node.labels.worker==1]
ports:
- 9092:9092
- 29092:29092
environment:
<<: *kafka-envs
KAFKA_BROKER_ID: 1
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:29092,PLAINTEXT_HOST://localhost:9092
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-1:29092
kafka-2:
image: confluentinc/cp-kafka
deploy:
placement:
constraints: [node.labels.worker==2]
ports:
- 9093:9093
- 29093:29093
environment:
<<: *kafka-envs
KAFKA_BROKER_ID: 2
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:29093,PLAINTEXT_HOST://localhost:9093
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-2:29093
kafka-3:
image: confluentinc/cp-kafka
deploy:
placement:
constraints: [node.labels.worker==3]
ports:
- 9094:9094
- 29094:29094
environment:
<<: *kafka-envs
KAFKA_BROKER_ID: 3
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:29094,PLAINTEXT_HOST://localhost:9094
CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-3:29094