Search code examples
dockerapache-zookeeperdocker-swarm

Multi-node Zookeeper on Docker Swarm


I'm setting up a multi-node Apache Kafka cluster, but it seems my Zookeeper instances are having trouble communicating with each other.

I'm deploying the cluster as a Docker Swarm stack. I'm aware that, according to Confluent's docs, networking through swarm overlay drivers is not supported. So I'm publishing the ports on all interfaces. However, my Zookeeper instances keep throwing this exception:

EndOfStreamException: Unable to read additional data from client, it probably closed the socket: address = /127.0.0.1:{some_random_port}, session = 0x100fc7d362c0002

When I try to execute zookeeper-shell localhost:12181 ls /brokers/ids against my Zookeeper instances, it seems that some of my Zookeeper instances manage to connect to a Kafka broker or two, but the instances are not aware of the brokers connected to other Zookeeper instances.

Here's my partial setup:

    x-zookeeper-envs: &zookeeper-envs
      ZOOKEEPER_TICK_TIME: 2000
      ZOOKEEPER_SERVER_1: zookeeper-1:12888:13888;12181
      ZOOKEEPER_SERVER_2: zookeeper-2:22888:23888;22181
      ZOOKEEPER_SERVER_3: zookeeper-3:32888:33888;32181
    
    services:
      zookeeper-1:
        image: confluentinc/cp-zookeeper:7.1.1
        <<: *network
        ports:
          - 12181:12181
          - 12888:2888
          - 13888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==1]
        environment:
          <<: *zookeeper-envs
          ZOOKEEPER_CLIENT_PORT: 12181
          ZOOKEEPER_SERVER_ID: 1
    
      zookeeper-2:
        image: confluentinc/cp-zookeeper:7.1.1
        <<: *network
        ports:
          - 22181:22181
          - 22888:2888
          - 23888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==2]
        environment:
          <<: *zookeeper-envs
          ZOOKEEPER_CLIENT_PORT: 22181
          ZOOKEEPER_SERVER_ID: 2
    
      zookeeper-3:
        image: confluentinc/cp-zookeeper:7.1.1
        <<: *network
        ports:
          - 32181:32181
          - 32888:2888
          - 33888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==3]
        environment:
          <<: *zookeeper-envs
          ZOOKEEPER_CLIENT_PORT: 32181
          ZOOKEEPER_SERVER_ID: 3

I have also tried setting the Zookeeper environment variables in this format: ZOOKEEPER_SERVER_1: zookeeper-1:12888:13888::1;12181, but to no avail.


Solution

  • I managed to get it running using zookeeper image instead of confluentinc/cp-zookeeper. Here's my setup in case anyone else struggled with setting multi-node Zookeeper/Kafka on Swarm.

    version: '3.7'
    
    x-zookeeper-envs: &zookeeper-envs
      ZOO_TICK_TIME: 2000
      ZOO_SERVERS: server.1=zookeeper-1:12888:13888;12181 server.2=zookeeper-2:22888:23888;22181 server.3=zookeeper-3:32888:33888;32181
    
    x-kafka-envs: &kafka-envs
      KAFKA_ZOOKEEPER_CONNECT: zookeeper-1:12181,zookeeper-2:22181,zookeeper-3:32181
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
      KAFKA_CONFLUENT_LICENSE_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_CONFLUENT_BALANCER_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3
      KAFKA_JMX_PORT: 9101
      KAFKA_JMX_HOSTNAME: localhost
      KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081
      CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-1:29092,kafka-2:29093,kafka-3:29094
      CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 3
      CONFLUENT_METRICS_ENABLE: 'true'
      CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
    
    services:
      zookeeper-1:
        image: zookeeper
        hostname: zookeeper-1
        <<: *network
        ports:
          - 12181:2181
          - 12888:2888
          - 13888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==1]
        environment:
          <<: *zookeeper-envs
          ZOO_MY_ID: 1
    
      zookeeper-2:
        image: zookeeper
        hostname: zookeeper-2
        <<: *network
        ports:
          - 22181:2181
          - 22888:2888
          - 23888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==2]
        environment:
          <<: *zookeeper-envs
          ZOO_MY_ID: 2
    
      zookeeper-3:
        image: zookeeper
        hostname: zookeeper-3
        <<: *network
        ports:
          - 32181:2181
          - 32888:2888
          - 33888:3888
        deploy:
          placement:
            constraints: [node.labels.worker==3]
        environment:
          <<: *zookeeper-envs
          ZOO_MY_ID: 3
    
      kafka-1:
        image: confluentinc/cp-kafka
        deploy:
          placement:
            constraints: [node.labels.worker==1]
        ports:
          - 9092:9092
          - 29092:29092
        environment:
          <<: *kafka-envs
          KAFKA_BROKER_ID: 1
          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:29092,PLAINTEXT_HOST://localhost:9092
          CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-1:29092
    
      kafka-2:
        image: confluentinc/cp-kafka
        deploy:
          placement:
            constraints: [node.labels.worker==2]
        ports:
          - 9093:9093
          - 29093:29093
        environment:
          <<: *kafka-envs
          KAFKA_BROKER_ID: 2
          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:29093,PLAINTEXT_HOST://localhost:9093
          CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-2:29093
    
      kafka-3:
        image: confluentinc/cp-kafka
        deploy:
          placement:
            constraints: [node.labels.worker==3]
        ports:
          - 9094:9094
          - 29094:29094
        environment:
          <<: *kafka-envs
          KAFKA_BROKER_ID: 3
          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:29094,PLAINTEXT_HOST://localhost:9094
          CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-3:29094