This code enters data to a sql table that already exists for data from the previous day (09/10/23), the primary key in this table is a column called "id". When trying to enter yesterdays data and previous days for example changing the api parameters to 06/10/23-09/10/23 it will not enter any of the data as the data for 09/10/23 already exists. I have tried multiple solutions already on here and none work. I basically need a loop that can iterate through rows and if the primary key already exists in the table then carry on and ignore integrity errors and if it's not in the table insert the entry. Any help would be much appreciated.
### Libraries ###
import json
import requests
import pandas as pd
from sqlalchemy import create_engine, exc
from datetime import datetime, timedelta
import pyodbc
### API Call ###
# Calculate the date for previous day
previous_day = datetime.now() - timedelta(days=1)
date_from = previous_day.strftime('%Y-%m-%dT00:00:00')
date_to = previous_day.strftime('%Y-%m-%dT23:59:59')
# API Credentials
url = f"XXXX"
payload = {}
headers = {'Authorization': 'XXXX'}
# API Request
response = requests.request("GET", url, headers=headers, data=payload)
# Check if the request was successful
if response.status_code == 200:
# Parse the JSON response into a DataFrame
data = response.json()
# Extract relevant data from dictionaries within dictionaries
df = pd.json_normalize(data['data'])
# Modify 'charging_periods' column
def extract_tariff_id(charging_periods):
if isinstance(charging_periods, list) and len(charging_periods) > 0:
# Get the first dictionary in the list (if it exists)
first_entry = charging_periods[0]
# Extract 'tariff_id' from the first dictionary
tariff_id = first_entry.get('tariff_id', 'Unknown Tariff ID')
return tariff_id
else:
return 'Unknown Tariff ID'
# Apply the modification to 'charging_periods' and rename the column
df['tariff_id'] = df['charging_periods'].apply(extract_tariff_id)
# Drop the original 'charging_periods' column
df.drop(columns=['charging_periods'], inplace=True)
### Azure DB ###
# Parameters
server = 'XXXX'
database = 'XXXX'
username = 'XXXX'
password = 'XXXX'
driver = 'XXXX'
# Create a connection to the Azure SQL Database
conn = pyodbc.connect(f'DRIVER={{XXXX}};SERVER={server};DATABASE={database};UID={username};PWD={password}')
# Create a cursor
cursor = conn.cursor()
# Engine (Mini SQL Enviroment)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={f'DRIVER={{XXXX}};SERVER={server};DATABASE={database};UID={username};PWD={password}'}")
# Insert to SQL
df.to_sql('TABLE_NAME', engine, if_exists='append', index=False, schema='ev')
# Commit the changes and close the connection
conn.commit()
conn.close()
# Print tabular format and data types
print('Success!')
print(df)
print(df.info())
else:
print(f"API request failed with status code: {response.status_code}")
# Remove Duplicates
for i in range(len(df)):
try:
df.iloc[i:i+1].to_sql(name="TABLE_NAME",if_exists='append',con = Engine)
except exc.IntegrityError as e:
pass
Needed to remove periods in title:
#Remove periods from column names
df.columns = df.columns.str.replace("[.]","_", regex=True)
Then to remove duplicates did the following:
#Engine (Mini SQL Enviroment)
engine = create_engine(f"mssql+pyodbc:///?odbc_connect={f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};UID={username};PWD={password}'}")
try:
# Insert to SQL
df.to_sql('table_name', engine, if_exists='append', index=False, schema='blah')
except:
# Remove Duplicates
for index, row in df.iterrows():
# Insert the row into the database
columns = ", ".join(df.columns)
values = ", ".join([f"'{row[col]}'" for col in df.columns])
statement = f"INSERT INTO blah.table_name({columns}) values({values})"
status = row["status"]
statement_2 = f'SELECT id FROM blah.table_name WHERE id = {row["id"]}'
with conn as connection:
with conn.cursor() as cursor:
cursor.execute(statement_2)
row = cursor.fetchone()
if not row and status !=
"variable_messing_script":
# cursor.execute(statement)
try:
cursor.execute(statement)
except:
print(statement)