Search code examples
pythonpandasdataframesqlalchemypymysql

List object to DataFrame to MySQL DB on Azure


I use Selenium and Pandas to scrape a table from a website that results in a list object. I then try to convert the list into a DataFrame to write it to MySQL.

When I print the result of my scrape it's in a nice tabular format with clear row numbers and column headers but when I do 'len' the result is 1.

I have tried a bunch of methods and could really use some help.

    from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#import csv
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame
import time
from sqlalchemy import create_engine

# base url
baseurl = "https://bitcoincharts.com/charts/bitstampUSD#igDailyztgSzm1g10zm2g25zv"



# selenium arguments
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path="C:/Users/mande/OneDrive/Knowledge/Python/chromedriver.exe")

# navigates to website
driver.get(baseurl)

# clicks "show raw data"
rawdata = driver.find_element_by_xpath("/html/body/div[5]/div/div[2]/a").click()
print("Sleeping 10 seconds")
time.sleep(10)
print("Continue")


soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find(id='chart_table')
df = pd.read_html(str(tables), header=0)
print(df[0])

driver.quit()

print(type(df))
print(len(df))

df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']

engine = create_engine("mysql+pymysql://user:password@hostname/dbname"
                       .format(user="user",
                               pw="password",
                               db="dbname"))

DataFrame.to_sql(df2, name='bitcoin',  con = engine, if_exists='append', chunksize=10000, index=False)


console:
ValueError: Length mismatch: Expected axis has 1 element, new values have 9 elements

Solution

  • Replacing these two lines:

    df2 = pd.DataFrame([df])
    df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
    

    with

    df2 = df[0]
    

    fixes it for me.