I have an error message:
python3 load_data.py
Traceback (most recent call last):
File "/Users/sarahlenet/Desktop/MLOPS/Repo_MLOPS_data_atelier/src/scripts/data/load_data.py", line 113, in <module>
insert_tsunamis_data('../../../data/tsunamis.csv')
File "/Users/sarahlenet/Desktop/MLOPS/Repo_MLOPS_data_atelier/src/scripts/data/load_data.py", line 19, in insert_tsunamis_data
conn = psycopg2.connect(**DB_PARAMS)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/lib/python3.11/site-packages/psycopg2/__init__.py", line 122, in connect
conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
psycopg2.OperationalError: connection to server at "172.25.0.2", port 5432 failed: Operation timed out
Is the server running on that host and accepting TCP/IP connections?
I have a load_data.py file that is this one:
import pandas as pd
import psycopg2
from psycopg2 import sql
# Database connection parameters
DB_PARAMS = {
'dbname': 'tsunamis',
'user': 'my_user',
'password': 'my_password',
'host': '172.25.0.2', # or your PostgreSQL server address
'port': 5432
}
def insert_tsunamis_data(csv_file):
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)
# Create a connection to the database
conn = psycopg2.connect(**DB_PARAMS)
cursor = conn.cursor()
# Prepare the SQL insert statement for the tsunamis table
insert_query = sql.SQL("""
INSERT INTO tsunamis (
type, x, y, fid, id, tsevent_id, year, month, day, date_str,
arr_day, arr_hour, arr_min, trav_hours, trav_mins, period,
first_moti, latitude, longitude, location_n, area,
country, region_cod, runup_ht, runup_ht_r, runup_hori,
type_measu, type_mea_1, damage_mil, damage_amo, damage_des,
deaths, deaths_amo, deaths_des, injuries, injuries_a,
injuries_d, houses_des, houses_amo, houses_d_1, houses_dam,
houses_d_2, houses_d_3, comments, doubtful, dist_from,
event_regi, event_re_1, pred_trav, pred_trav1, url,
tsevent_ur, hour, minute, second, region, cause_code,
cause, event_vali, event_va_1, eq_mag_unk, eq_mag_mb,
eq_mag_ms, eq_mag_mw, eq_mag_ml, eq_mag_mfa, eq_magnitu,
eq_magni_1, eq_depth, max_event_, ts_mt_abe, ts_mt_ii,
ts_intensi, missing, missing_am, missing_de, warning_st,
num_runup, id_runup, has_ref, id_ref, slides_url,
num_slides, map_slide_, map_eq_id, damage_m_1, damage_a_1,
damage_tot, houses_d_4, houses_a_1, houses_tot,
deaths_tot, deaths_a_1, deaths_t_1, injuries_t, injuries_1,
injuries_2, missing_to, missing__1, missing__2, map_vol_id,
houses_d_5, houses_d_6, houses_d_7, num_deposi
) VALUES (%s)
""")
# Insert each row from the DataFrame
for index, row in df.iterrows():
cursor.execute(insert_query, tuple(row))
# Commit the transaction
conn.commit()
cursor.close()
conn.close()
def insert_gdp_data(csv_file):
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)
# Create a connection to the database
conn = psycopg2.connect(**DB_PARAMS)
cursor = conn.cursor()
# Prepare the SQL insert statement for the gdp table
insert_query = sql.SQL("""
INSERT INTO gdp (
type, country_name, country_code, indicator_name, indicator_code,
"1960", "1961", "1962", "1963", "1964", "1965", "1966", "1967", "1968", "1969",
"1970", "1971", "1972", "1973", "1974", "1975", "1976", "1977", "1978", "1979",
"1980", "1981", "1982", "1983", "1984", "1985", "1986", "1987", "1988", "1989",
"1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999",
"2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019",
"2020", "2021", "2022", "2023"
) VALUES (%s)
""")
# Insert each row from the DataFrame
for index, row in df.iterrows():
cursor.execute(insert_query, tuple(row))
# Commit the transaction
conn.commit()
cursor.close()
conn.close()
def insert_population_data(csv_file):
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file)
# Create a connection to the database
conn = psycopg2.connect(**DB_PARAMS)
cursor = conn.cursor()
# Prepare the SQL insert statement for the population table
insert_query = sql.SQL("""
INSERT INTO population (
type, country, density, density_mi, population, area
) VALUES (%s)
""")
# Insert each row from the DataFrame
for index, row in df.iterrows():
cursor.execute(insert_query, tuple(row))
# Commit the transaction
conn.commit()
cursor.close()
conn.close()
# Example usage:
insert_tsunamis_data('../../../data/tsunamis.csv')
insert_gdp_data('../../../data/gdp.csv')
insert_population_data('../../../data/population.csv')
and a docker-compose that is this one:
# ''' TODO: change the credentials and names'''
version: '3.8'
services:
postgres:
image: postgres:13 # Version de PostgreSQL
container_name: postgres_container
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: admin
POSTGRES_DB: postgres
volumes:
- postgres_data:/var/lib/postgresql/data
- ../src/scripts/data/init.sql:/docker-entrypoint-initdb.d/init.sql # Montez le script
ports:
- "5432:5432"
pgadmin:
image: dpage/pgadmin4:latest # Image officielle de pgAdmin 4
container_name: pgadmin_container
environment:
PGADMIN_DEFAULT_EMAIL: [email protected] # Email de connexion à pgAdmin
PGADMIN_DEFAULT_PASSWORD: admin # Mot de passe pour pgAdmin
ports:
- "8080:80" # Expose l'interface pgAdmin sur le port 8080
depends_on:
- postgres
volumes:
- pgadmin_data:/var/lib/pgadmin
volumes:
postgres_data:
pgadmin_data:
I tried to correct pg_hba.conf and add all accesses to 0.0.0.0/0.
Assuming you're running the container locally, update your python script to use the correct connection details:
DB_PARAMS = {
'dbname': 'tsunamis',
'user': 'admin',
'password': 'admin',
'host': 'localhost',
'port': 5432
}
And update the Dockerfile to use the correct database:
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: admin
POSTGRES_DB: tsunamis