Search code examples
docker-composeconsulhashicorp-vault

Hashi Corp Vault Error : "check unable to talk with Consul backend"


I have a 3 node consul cluster running on docker with the following config :

{
  "bootstrap_expect": 3,
  "client_addr": "0.0.0.0",
  "datacenter": "Datacenter1",
  "data_dir": "/consul/data",
  "domain": "consul",
  "enable_script_checks": true,
  "dns_config": {
    "enable_truncate": true,
    "only_passing": true
  },
  "enable_syslog": false,
  "encrypt": "myfinetoken==",
  "leave_on_terminate": true,
  "log_level": "INFO",
  "rejoin_after_leave": true,
  "server": true,
  "ui": true,
  "connect": {
    "enabled": true
  },
  "start_join": [
    "consul-server-1",
    "consul-server-2",
    "consul-server-3"
  ]
}

in addition I have a consul-client to talk to vault with the following config

{
  "server": false,
  "datacenter": "Datacenter1",
  "data_dir": "/consul/data",
  "client_addr": "0.0.0.0",
  "bind_addr": "0.0.0.0",
  "encrypt": "myfinetoken==",
  "log_level": "INFO",
  "enable_script_checks": true,
  "enable_syslog": false,
  "leave_on_terminate": true,
  "connect": {
    "enabled": true
  },
  "retry_join": [
    "consul-server-1",
    "consul-server-2",
    "consul-server-3"
  ]
}

This is accomplished by a vault container with the following

  "storage": {
    "consul": {
      "address": "consul-client-1:8500",
      "path": "vault/"
    }
  },
  "ui": true
}

The consul cluster runs fine. Even with the joined client. I can use the gui, the api ... no problem but when I start the vault-server (one node with one corresponding consul-client) I reveive the following error in Vault :

2022-09-12T10:37:41.303Z [WARN]  service_registration.consul: reconcile unable to talk with Consul backend: error="service registration failed: Unexpected response code: 400 (Invalid service address)"
2022-09-12T10:37:42.160862000Z 2022-09-12T10:37:42.160Z [WARN]  service_registration.consul: check unable to talk with Consul backend: error="Unexpected response code: 404 (Unknown check ID \"vault:0.0.0.0:8200:vault-sealed-check\". Ensure that the check ID is passed, not the check name.)"

The consule client says :

2022-09-12T10:42:49.231Z [ERROR] agent.http: Request error: method=PUT url=/v1/agent/check/fail/vault:0.0.0.0:8200:vault-sealed-check?note=Vault+Sealed from=172.27.0.7:49160 error="Unknown check ID "vault:0.0.0.0:8200:vault-sealed-check". Ensure that the check ID is passed, not the check name."
2022-09-12T10:42:50.236301300Z 2022-09-12T10:42:50.235Z [ERROR] agent.http: Request error: method=PUT url=/v1/agent/check/fail/vault:0.0.0.0:8200:vault-sealed-check?note=Vault+Sealed from=172.27.0.7:49160 error="Unknown check ID "vault:0.0.0.0:8200:vault-sealed-check". Ensure that the check ID is passed, not the check name."

Any help kindly appreciated


Solution

  • The raft backend is working so far I am using the following docker-compose :

    vault-server-1:
        build:
          context: ./vault
          dockerfile: Dockerfile
        ports:
          - "8200:8200"
          - "8201:8201"
        command: server -config=/vault/config/vault-config-server-1.hcl
        environment:
          VAULT_ADDR: "http://vault-server-1:8200"
          VAULT_API_ADDR: "https://vault-server-1:8201"
        cap_add:
          - IPC_LOCK
        volumes:
          - raft-data:/var/raftdata
        depends_on:
          - consul-client-1
    

    Beside the vault-config:

    storage "raft" {
      path = "/var/raftdata"
      node_id = "raft_node_1"
    }
    
    cluster_addr = "http://vault-server-1:8200"
    

    The docker image is build with :

    # base image
    FROM vault:1.11.3
    
    RUN apk add curl
    
    RUN mkdir -p /var/raftdata
    RUN chmod o+w /var/raftdata
    
    COPY ./config/vault-config.json /vault/config/vault-config.json
    COPY ./config/vault-config-server-1.hcl /vault/config/vault-config-server-1.hcl
    

    Based on this Solution for a dockerized production Vault I now have a suitable Version see => https://github.com/ahmetkaftan/docker-vault

    Full setup

    dockerfile

    FROM vault:1.11.9
    LABEL maintainer="[email protected]"
    
    ADD --chown=vault:vault config.hcl /vault/config/config.hcl
    ADD --chown=vault:vault certs /vault/config/certs
    
    # add Digicert root certificate, so alpine will be able to validate vaults certificate chain https://de.ssl-tools.net/subjects/253b2763b69868d3e868968efbcc68c6c444d411
    ADD digicert_68f22b1a6298f7da191e6149ed8de0efff54ad8c.pem /usr/local/share/ca-certificates/
    RUN update-ca-certificates
    

    config.hcl

    listener "tcp" {
      address     = "0.0.0.0:8200"
      tls_cert_file = "/vault/config/certs/cert.pem"
      tls_key_file  = "/vault/config/certs/key.pem"
    }
    
    storage "raft" {
      path = "/vault/file"
    }
    
    # basiert auf :   https://developer.hashicorp.com/vault/tutorials/monitoring/monitor-telemetry-grafana-prometheus
    
    telemetry {
      disable_hostname = true
      prometheus_retention_time = "12h"
    }
    
    disable_mlock = "true"
    

    docker-compose.yml

    version: "3.4"
    
    services:
      vault:
        image: docker.acme.de/acme/vault:1.11.9.1
        restart: always
        environment:
          - VAULT_RAFT_NODE_ID=${NODE_ID} # https://developer.hashicorp.com/vault/docs/configuration/storage/raft#node_id
          - VAULT_UI=true # https://developer.hashicorp.com/vault/docs/configuration#ui
          - VAULT_API_ADDR=https://${VAULT_DNS}:8200 # https://developer.hashicorp.com/vault/docs/configuration#api_addr
          - VAULT_CLUSTER_ADDR=https://${VAULT_DNS}:8201 # https://developer.hashicorp.com/vault/docs/configuration#cluster_addr
          - VAULT_LOG_LEVEL=warn
        ports:
          - 8200:8200
          - 8201:8201
        volumes:
          - /var/lib/vault-${DEPLOYMENT_ENV}/file:/vault/file
        cap_add:
          - IPC_LOCK
        command: vault server --config=/vault/config/config.hcl
        healthcheck:
          test: wget -qS https://${VAULT_DNS}:8200/v1/sys/health?standbyok=true 2>&1 | awk 'NR==1{print $$2}' | grep 200 # https://developer.hashicorp.com/vault/api-docs/system/health#429
          interval: 30s
          timeout: 3s
          retries: 50
          start_period: 30s