Search code examples
postgresqldockerdocker-network

Docker network: Connect to Postgres DB from DataHub or OpenMetadata locally


I try to run DataHub (https://datahub.io/) and / or OpenMetaData (https://open-metadata.org/) locally for testing. Both are used via docker-compose files.

For OpenMetaData I used:

version: "3.9"
volumes:
  ingestion-volume-dag-airflow:
  ingestion-volume-dags:
  ingestion-volume-tmp:
services:
  mysql:
    container_name: openmetadata_mysql
    image: openmetadata/db:0.13.0
    restart: always
    environment:
      MYSQL_ROOT_PASSWORD: password
    expose:
      - 3306
    volumes:
     - ./docker-volume/db-data:/var/lib/mysql
    networks:
      - app_net
    healthcheck:
      test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
      interval: 15s
      timeout: 10s
      retries: 10

  elasticsearch:
    container_name: openmetadata_elasticsearch
    image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
    environment:
      - discovery.type=single-node
      - ES_JAVA_OPTS=-Xms1024m -Xmx1024m
    networks:
      - app_net
    ports:
      - "9200:9200"
      - "9300:9300"

  openmetadata-server:
    container_name: openmetadata_server
    restart: always
    image: openmetadata/server:0.13.0
    environment:
      # OpenMetadata Server Authentication Configuration
      AUTHORIZER_CLASS_NAME: ${AUTHORIZER_CLASS_NAME:-org.openmetadata.service.security.DefaultAuthorizer}
      AUTHORIZER_REQUEST_FILTER: ${AUTHORIZER_REQUEST_FILTER:-org.openmetadata.service.security.JwtFilter}
      AUTHORIZER_ADMIN_PRINCIPALS: ${AUTHORIZER_ADMIN_PRINCIPALS:-[admin]}
      AUTHORIZER_ALLOWED_REGISTRATION_DOMAIN: ${AUTHORIZER_ALLOWED_REGISTRATION_DOMAIN:-["all"]}
      AUTHORIZER_INGESTION_PRINCIPALS: ${AUTHORIZER_INGESTION_PRINCIPALS:-[ingestion-bot]}
      AUTHORIZER_PRINCIPAL_DOMAIN: ${AUTHORIZER_PRINCIPAL_DOMAIN:-"openmetadata.org"}
      AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN: ${AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN:-false}
      AUTHORIZER_ENABLE_SECURE_SOCKET: ${AUTHORIZER_ENABLE_SECURE_SOCKET:-false}
      AUTHENTICATION_PROVIDER: ${AUTHENTICATION_PROVIDER:-basic}
      CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME: ${CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME:-""}
      AUTHENTICATION_PUBLIC_KEYS: ${AUTHENTICATION_PUBLIC_KEYS:-[http://localhost:8585/api/v1/config/jwks]}
      AUTHENTICATION_AUTHORITY: ${AUTHENTICATION_AUTHORITY:-https://accounts.google.com}
      AUTHENTICATION_CLIENT_ID: ${AUTHENTICATION_CLIENT_ID:-""}
      AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
      AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
      AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
      # JWT Configuration
      RSA_PUBLIC_KEY_FILE_PATH: ${RSA_PUBLIC_KEY_FILE_PATH:-"./conf/public_key.der"}
      RSA_PRIVATE_KEY_FILE_PATH: ${RSA_PRIVATE_KEY_FILE_PATH:-"./conf/private_key.der"}
      JWT_ISSUER: ${JWT_ISSUER:-"open-metadata.org"}
      JWT_KEY_ID: ${JWT_KEY_ID:-"Gb389a-9f76-gdjs-a92j-0242bk94356"}
      # OpenMetadata Server Airflow Configuration
      AIRFLOW_HOST: ${AIRFLOW_HOST:-http://ingestion:8080}
      SERVER_HOST_API_URL: ${SERVER_HOST_API_URL:-http://openmetadata-server:8585/api}
      AIRFLOW_AUTH_PROVIDER: ${AIRFLOW_AUTH_PROVIDER:-no-auth}
      # OpenMetadata Airflow Azure SSO Configuration
      OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET:-""}
      OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL: ${OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL:-""}
      OM_AUTH_AIRFLOW_AZURE_SCOPES: ${OM_AUTH_AIRFLOW_AZURE_SCOPES:-[]}
      OM_AUTH_AIRFLOW_AZURE_CLIENT_ID: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_ID:-""}
      # OpenMetadata Airflow Google SSO Configuration
      OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH: ${OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH:- ""}
      OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE: ${OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE:-"https://www.googleapis.com/oauth2/v4/token"}
      # OpenMetadata Airflow Okta SSO Configuration
      OM_AUTH_AIRFLOW_OKTA_CLIENT_ID: ${OM_AUTH_AIRFLOW_OKTA_CLIENT_ID:-""}
      OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL: ${OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL:-""}
      OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY: ${OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY:-""}
      OM_AUTH_AIRFLOW_OKTA_SA_EMAIL: ${OM_AUTH_AIRFLOW_OKTA_SA_EMAIL:-""}
      OM_AUTH_AIRFLOW_OKTA_SCOPES: ${OM_AUTH_AIRFLOW_OKTA_SCOPES:-[]}
      # OpenMetadata Airflow Auth0 SSO Configuration
      OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID:-""}
      OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET:-""}
      OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL: ${OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL:-""}
      # OpenMetadata Airflow Custom OIDC SSO Configuration
      OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID:-""}
      OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY:-""}
      OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL:-""}
      # OpenMetadata Airflow JWT Token Configuration
      OM_AUTH_JWT_TOKEN: ${OM_AUTH_JWT_TOKEN:-""}
      # Database configuration for MySQL
      DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver}
      DB_SCHEME: ${DB_SCHEME:-mysql}
      DB_USE_SSL: ${DB_USE_SSL:-false}
      DB_USER: ${DB_USER:-openmetadata_user}
      DB_USER_PASSWORD: ${DB_USER_PASSWORD:-openmetadata_password}
      DB_HOST: ${DB_HOST:-mysql}
      DB_PORT: ${DB_PORT:-3306}
      OM_DATABASE: ${OM_DATABASE:-openmetadata_db}
      # Airflow SSL Configurations
      AIRFLOW_VERIFY_SSL: ${AIRFLOW_VERIFY_SSL:-"no-ssl"}
      AIRFLOW_SSL_CERT_PATH: ${AIRFLOW_SSL_CERT_PATH:-""}
      # ElasticSearch Configurations
      ELASTICSEARCH_HOST: ${ELASTICSEARCH_HOST:- elasticsearch}
      ELASTICSEARCH_PORT: ${ELASTICSEARCH_PORT:-9200}
      ELASTICSEARCH_SCHEME: ${ELASTICSEARCH_SCHEME:-http}
      ELASTICSEARCH_USER: ${ELASTICSEARCH_USER:-""}
      ELASTICSEARCH_PASSWORD: ${ELASTICSEARCH_PASSWORD:-""}
      # Heap OPTS Configurations
      OPENMETADATA_HEAP_OPTS: ${OPENMETADATA_HEAP_OPTS:--Xmx1G -Xms1G}

    expose:
      - 8585
      - 8586
    ports:
      - "8585:8585"
      - "8586:8586"
    depends_on:
      elasticsearch:
        condition: service_started
      mysql:
        condition: service_healthy
    networks:
      - app_net
    healthcheck:
      test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]

  ingestion:
    container_name: openmetadata_ingestion
    image: openmetadata/ingestion:0.13.0
    depends_on:
      elasticsearch:
        condition: service_started
      mysql:
        condition: service_healthy
      openmetadata-server:
        condition: service_healthy
    environment:
      AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
      AIRFLOW__CORE__EXECUTOR: LocalExecutor
      AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
      DB_HOST: ${AIRFLOW_DB_HOST:-mysql}
      DB_PORT: ${AIRFLOW_DB_PORT:-3306}
      AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
      AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
      DB_USER: ${AIRFLOW_DB_USER:-airflow_user}
      DB_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow_pass}
    entrypoint: /bin/bash
    command:
      - "/opt/airflow/ingestion_dependency.sh"
    expose:
      - 8080
    ports:
      - "8080:8080"
    networks:
      - app_net
    volumes:
      - ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
      - ingestion-volume-dags:/opt/airflow/dags
      - ingestion-volume-tmp:/tmp


networks:
  app_net:
    ipam:
      driver: default
      config:
        - subnet: "172.16.240.0/24"

and for DataHub I followed the steps described here: https://datahubproject.io/docs/quickstart/

Everything runs locally on my Mac.

Now my problem - I have also a local postgres up and running. I can access the database via PGAdmin.

version: '3.8'
services:
  db:
    container_name: pg_container
    image: postgres
    restart: always
    environment:
      POSTGRES_USER: admin
      POSTGRES_PASSWORD: admin
      POSTGRES_DB: phd_test
    ports:
      - "5432:5432"
  pgadmin:
    container_name: pgadmin4_container
    image: dpage/pgadmin4
    restart: always
    environment:
      PGADMIN_DEFAULT_EMAIL: [email protected]
      PGADMIN_DEFAULT_PASSWORD: admin
    ports:
      - "5050:80"

In both cases DataHub & OpenMetaData I can not connect to this local Postgres DB. In both cases I get errors like for example:

'sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) connection to server at "127.0.0.1", port 5432 failed: Connection refused\n'
       '\tIs the server running on that host and accepting TCP/IP connections?\n'
       '\n'
       '(Background on this error at: https://sqlalche.me/e/14/e3q8)\n',
       "2022-12-04 13:58:08.971818 [exec_id=9e2a2163-24e0-4afb-8d2d-cb1b3033bc91] INFO: Failed to execute 'datahub ingest'",

I also tried different endpoints within DataHub and OpenMetadata for the Postgres like

  • 127.0.0.1:5432
  • 0.0.0.0:5432
  • pg_container:5432 (Name of the postgres container - this works in PG Admin)
  • 172.25.0.2:5432 (Ip Address of the pg_container)

Does anybody else have a local connection between one of these tools up and running? As the errors are similar in both tools I think that it could be a docker network error, so that the postgres container can not be seen by other containers (like DataHub or OpenMetaData).


Solution

  • Maybe you figured it out, but the problem here is that the open metadata is running in a docker container; thus, reference to localhost in open metadata refers to the container where open metadata is running and not to the host machine of the open metadata docker. If you are running Postgres locally you need to access the host's localhost and not the container's localhost. You can refer to host's localhost via host.docker.internal.

    Similarly, if you are using a docker container for Postgres and not exposing the port to your local network you need to find out the host of that container which is usually the id. For this, you can run docker network ls to see all the containers in docker bridge network and then inspect the container to see the IP address and container id's using docker inspect <NETWORK ID>.