PostGres SQL will not accept data which is in violation of primary key. To ignore the duplicate data, I have this code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}@localhost:5433/stockdata'.format(123))
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo',start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(Symbol, Date, Volume, Open, Close, High, Low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
I am getting this error:
ProgrammingError: (psycopg2.errors.UndefinedColumn) column "symbol" of relation "stockprices" does not exist
LINE 2: (Symbol,Date, Volume, Open, Close, H...
^
[SQL: INSERT INTO stockprices
Could you please advise as to what does this error mean? I got rid of all the double quotes as advised in the comment.
I had used this code to create the table:
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['Symbol'] = symbol
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
create_price_table('amzn')
Also as was mentioned in the comment. I used this to check the table name:
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE';
I changed the code as suggested in the comment, now the column name is in small case. Below is the code: import pandas as pd import psycopg2 import os import matplotlib from sqlalchemy import create_engine from tqdm import tqdm_notebook from pandas_datareader import data as web import datetime from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}@localhost:5433/stockdata'.format(123))
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['symbol'] = symbol
df = df.rename(columns= {'Open':'open'})
df = df.rename(columns= {'Close':'close'})
df = df.rename(columns= {'High':'high'})
df = df.rename(columns= {'Low':'low'})
df = df.rename(columns= {'Volume':'volume'})
df = df.rename(columns= {'Adj Close':'adj_close'})
df.index.name ='date'
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
# create_price_table('amzn')
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(symbol, date, volume, open, close, high, low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
This code however is producing a new error:
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type bigint: "166.14999389648438"
LINE 4: ('aapl','2022-02-23 00:00:00','166.14999...
^
Per my comment you have two issues:
select 166.14999389648438::bigint; 166
Though as you see it gets truncated.
It will not work if entered as a string:
ERROR: invalid input syntax for type bigint: "166.14999389648438"
Which is what you are doing. This leads to the second issue below.
Warning
Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.
For the purposes of this question the important part is that using parameter passing will result in proper type adaptation.