I use Selenium and Pandas to scrape a table from a website that results in a list object. I then try to convert the list into a DataFrame to write it to MySQL.
When I print the result of my scrape it's in a nice tabular format with clear row numbers and column headers but when I do 'len' the result is 1.
I have tried a bunch of methods and could really use some help.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#import csv
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame
import time
from sqlalchemy import create_engine
# base url
baseurl = "https://bitcoincharts.com/charts/bitstampUSD#igDailyztgSzm1g10zm2g25zv"
# selenium arguments
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path="C:/Users/mande/OneDrive/Knowledge/Python/chromedriver.exe")
# navigates to website
driver.get(baseurl)
# clicks "show raw data"
rawdata = driver.find_element_by_xpath("/html/body/div[5]/div/div[2]/a").click()
print("Sleeping 10 seconds")
time.sleep(10)
print("Continue")
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find(id='chart_table')
df = pd.read_html(str(tables), header=0)
print(df[0])
driver.quit()
print(type(df))
print(len(df))
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
engine = create_engine("mysql+pymysql://user:password@hostname/dbname"
.format(user="user",
pw="password",
db="dbname"))
DataFrame.to_sql(df2, name='bitcoin', con = engine, if_exists='append', chunksize=10000, index=False)
console:
ValueError: Length mismatch: Expected axis has 1 element, new values have 9 elements
Replacing these two lines:
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
with
df2 = df[0]
fixes it for me.