Search code examples
goredisgo-redisredis-stack-server

go-redis always get dial tcp i/o timeout


I have built a redis cluster with 6 nodes with port from 7000-7005 by using docker with each redis using redis.conf like this:

bind 0.0.0.0
cluster-enabled yes
cluster-config-file nodes.conf
cluster-node-timeout 5000
protected-mode no

In file `docker-compose.yaml':

version: "3.9"
services:
  redis-node-1:
    container_name: redis-node-1
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-1:/data
    ports:
      - "7000:7000"
      - "17000:17000"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7000" ]
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.5

  redis-node-2:
    container_name: redis-node-2
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-2:/data
    ports:
      - "7001:7001"
      - "17001:17001"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7001" ]
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.6

  redis-node-3:
    container_name: redis-node-3
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-3:/data
    ports:
      - "7002:7002"
      - "17002:17002"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7002" ]
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.7

  redis-node-4:
    container_name: redis-node-4
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-4:/data
    ports:
      - "7003:7003"
      - "17003:17003"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7003" ]
    depends_on:
      - redis-node-1
      - redis-node-2
      - redis-node-3
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.8

  redis-node-5:
    container_name: redis-node-5
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-5:/data
    ports:
      - "7004:7004"
      - "17004:17004"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7004" ]
    depends_on:
      - redis-node-1
      - redis-node-2
      - redis-node-3
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.9

  redis-node-6:
    container_name: redis-node-6
    image: redis/redis-stack-server:latest
    volumes:
      - ./build/redis/redis.conf:/usr/local/etc/redis/redis.conf
      - ./data/redis-node-6:/data
    ports:
      - "7005:7005"
      - "17005:17005"
    entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"7005" ]
    depends_on:
      - redis-node-1
      - redis-node-2
      - redis-node-3
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.10

  redis-cluster-creator:
    container_name: redis-cluster-creator
    image: redis/redis-stack-server:latest
    command: 'redis-cli -p 7002 --cluster create 173.18.0.5:7000 173.18.0.6:7001 173.18.0.7:7002 173.18.0.8:7003 173.18.0.9:7004 173.18.0.10:7005 --cluster-replicas 1 --cluster-yes'
    depends_on:
      - redis-node-1
      - redis-node-2
      - redis-node-3
      - redis-node-4
      - redis-node-5
      - redis-node-6
    networks:
      redis_cluster_network:
        ipv4_address: 173.18.0.11
networks:
  redis_cluster_network:
    driver: bridge
    ipam:
      driver: default
      config:
          - subnet: 173.18.0.0/16

But when I try to connect with go-redis by using:

redisClient := redis.NewClusterClient(&redis.ClusterOptions{
    Addrs:         cfg.RedisCache.Address,
    Password:      cfg.RedisCache.Password,
    PoolSize:      cfg.RedisCache.PoolSize,
    MaxRetries:    cfg.RedisCache.MaxRetries,
    ReadOnly:      true,
    RouteRandomly: true,
})
err = redisClient.ForEachShard(ctx, func(ctx context.Context, shard *redis.Client) error {
    return shard.Ping(ctx).Err()
})
if err != nil {
    apiLogger.Fatal(err)
}

I always get error from one of each node like: dial tcp 173.18.0.7:7000: i/o timeout

Have I miss some configurations?

I have read go-redis guide. It tells me that I should wait to all redis node run before the app running in docker-compose. But I only use docker-compose for redis, not include my app. I'm sure that all redis nodes are completed running. And that error still happens.


Solution

  • There are too many moving parts. The network part for example is totally unnecessary to get this running:

    version: "3.9"
    
    x-vars:
      redis-shared-config: &redis-shared-config
        entrypoint: [ "redis-server", "/usr/local/etc/redis/redis.conf", --port,"6379" ]
        image: redis/redis-stack-server:latest
        healthcheck:
          test: [ "CMD", "redis-cli", "ping" ]
          interval: 5s
          timeout: 5s
          retries: 5
        configs:
          - source: redis-conf
            target: /usr/local/etc/redis/redis.conf
    
    configs:
      redis-conf:
        file: ./build/redis/redis.conf
    volumes:
      redis-data-1:
      redis-data-2:
      redis-data-3:
      redis-data-4:
      redis-data-5:
      redis-data-6:
    
    services:
      redis-node-1:
        container_name: redis-1
        <<: *redis-shared-config
        volumes:
          - "redis-data-1:/data"
      redis-node-2:
        container_name: redis-2
        <<: *redis-shared-config
        volumes:
          - "redis-data-2:/data"
      redis-node-3:
        container_name: redis-3
        <<: *redis-shared-config
        volumes:
          - "redis-data-3:/data"
      redis-node-4:
        container_name: redis-4
        <<: *redis-shared-config
        volumes:
          - "redis-data-4:/data"
      redis-node-5:
        container_name: redis-5
        <<: *redis-shared-config
        volumes:
          - "redis-data-5:/data"
      redis-node-6:
        container_name: redis-6
        <<: *redis-shared-config
        volumes:
          - "redis-data-6:/data"
      redis-cluster-creator:
        container_name: redis-cluster-creator
        image: redis/redis-stack-server:latest
    
        # Make the run of redis-cluster-creator service idempotent
        # by checking the cluster health before creating it.
        # We do this so that we can make our app a dependent service
        # of the redis-cluster-creator service.
        #
        # Explanation of the command:
        # If the cluster is already created the part before || will be
        # successful and exit with 0 and the part after || will not be executed.
        #
        # If the cluster is not created the part before || will
        # fail and the part after || will be executed.
        # If successful, the create command will exit with 0.
        command: |
          bash -c "redis-cli --cluster check redis-node-1:6379 || (echo Setting up redis cluster; redis-cli --cluster create redis-node-1:6379 redis-node-2:6379 redis-node-3:6379 redis-node-4:6379 redis-node-5:6379 redis-node-6:6379 --cluster-replicas 1 --cluster-yes)"
        depends_on:
          redis-node-1:
            condition: service_healthy
          redis-node-2:
            condition: service_healthy
          redis-node-3:
            condition: service_healthy
          redis-node-4:
            condition: service_healthy
          redis-node-5:
            condition: service_healthy
          redis-node-6:
            condition: service_healthy
    
      app:
        image: myapp:latest
        platform: linux/amd64
        build:
          context: ./build/app
        environment:
          - MY_APP_REDIS_ADDRESSES=redis-node-1:6379,redis-node-2:6379,redis-node-3:6379,redis-node-4:6379,redis-node-5:6379,redis-node-6:6379
        depends_on:
          redis-cluster-creator:
            condition: service_completed_successfully
          redis-node-1:
            condition: service_healthy
          redis-node-2:
            condition: service_healthy
          redis-node-3:
            condition: service_healthy
          redis-node-4:
            condition: service_healthy
          redis-node-5:
            condition: service_healthy
          redis-node-6:
            condition: service_healthy
    

    I put the app in here for not for mere convenience, but as a necessity: You instructed redis to bind to 0.0.0.0, which roughly translates to

    Every IP address you can get a hold of.

    Now, here is what happens under the hood, as far as I understand it: You contact one of the cluster nodes, ask for a key (everything is behind a key in redis) and you will be redirected to the master of the shard holding said value. But the cluster does only know the internal IP of said master, in the example you have given 173.18.0.7. However, its IP range is most likely not available on your network (those are PUBLIC IPs) let alone the host machine, unless you happen to work for or a customer of MediaCom:

    $ whois 173.18.0.7
    % IANA WHOIS server
    % for more information on IANA, visit http://www.iana.org
    % This query returned 1 object
    
    refer:        whois.arin.net
    
    inetnum:      173.0.0.0 - 173.255.255.255
    organisation: ARIN
    status:       ALLOCATED
    
    whois:        whois.arin.net
    
    changed:      2008-02
    source:       IANA
    
    # whois.arin.net
    
    NetRange:       173.16.0.0 - 173.31.255.255
    CIDR:           173.16.0.0/12
    NetName:        MEDIACOM-RESIDENTIAL-CUST
    NetHandle:      NET-173-16-0-0-1
    Parent:         NET173 (NET-173-0-0-0-0)
    NetType:        Direct Allocation
    OriginAS:       
    Organization:   Mediacom Communications Corp (MCC-244)
    RegDate:        2008-05-19
    Updated:        2012-02-24
    Ref:            https://rdap.arin.net/registry/ip/173.16.0.0
    
    
    OrgName:        Mediacom Communications Corp
    OrgId:          MCC-244
    Address:        1 Mediacom Way
    City:           Mediacom Park
    StateProv:      NY
    PostalCode:     10918
    Country:        US
    RegDate:        2008-02-05
    Updated:        2018-08-29
    Comment:        For abuse issues contact [email protected]
    Ref:            https://rdap.arin.net/registry/entity/MCC-244
    
    <skipped for brevity>
    

    Hence, the most probable culprit is the rather weird network config.

    You can find the docker compose as well as a small Go application for testing and an accompanying Dockerfile at https://github.com/mwmahlberg/redis-78155633.