I have been trying to get the following code to work to no avail. I have looked everywhere and I am just not getting what I am doing wrong. I am fairly new to python so here is my code:
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.reader.excel import load_workbook, InvalidFileException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
from models import *
import os # File I/O
import xlrd
import time
import shutil
import glob
import configparser
config_parser = configparser.ConfigParser()
config_parser.read("config.ini")
#Var
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
download_dir = os.path.abspath('./downloads/')
ids = ['xxxxxxx']
filelist = glob.glob(download_dir + '/*.html')
def store_data(download_dir):
print('Setting up ' + DATABASE)
database.connect(reuse_if_open=True)
# if os.path.isfile(DATABASE):
# print('Database already exists, removing all rows before moving forward')
# Charge.delete().execute()
for root, dir, files in os.walk(download_dir):
for file in files:
print('Parsing {0}'.format(file))
file_markup = ''
with open(os.path.abspath(os.path.join(download_dir, file)), 'r') as html:
file_markup = html.read()
if file_markup == '':
print('ERROR: File was not read')
continue
print('Parsing HTML...'.format(file))
soup = BeautifulSoup(file_markup, 'html.parser')
print('HTML parsed')
data = []
table = soup.find('table') #, attrs={'class':'lineItemsTable'}
# First 56 tr's are headings
rows = table.find_all('tr') # 18th row is header row
cols = rows[18].find_all('td')
cols = [ele.text.strip() for ele in cols]
for row in rows[19:]:
d = row.find_all('td')
d = [ele.text.strip() for ele in d]
data.append([ele for ele in d if ele]) # Get rid of empty values
Charge.create(pmt_id=(d[1]),
prism_id=(d[2]),
owner=file.split('.')[0],
date=d[11],
reg_hours=float(d[17]),
ot_hours=float(d[18]),
rate=int(d[42]),
resource=(d[14]),
pmt_status=(d[24]),
resource_status=(d[15]))
database.close()
def load_home_page(driver):
driver.get('https://intra.att.com/cmpm/main.cfm')
elem = driver.find_element_by_css_selector('input[value="Show Options"]')
elem.click()
elem = driver.find_element_by_css_selector('input[value="Enable Link"]')
elem.click()
elem = driver.find_element_by_css_selector('input[name="successOK"]')
elem.click()
def type_supervisor_id(driver, supervisor):
elem = driver.find_element_by_css_selector('input[name="sattuid"]')
elem.clear()
elem.send_keys(supervisor)
def select_date(driver, date):
for date in months:
select = Select(driver.find_element_by_name('startdate'))
select.select_by_visible_text('Jan 2018')
def results_display(driver,results):
elem = driver.find_element_by_css_selector('select[name="DontDisplay"]')
elem.click()
ActionChains(driver).key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
driver.find_element_by_css_selector('img[src="/cmpmrptstatic/images/right.jpg"]').click()
def fetch_data():
opts = webdriver.ChromeOptions()
print('Download Directory: {0}'.format(download_dir))
prefs = {'download.default_directory' : download_dir}
opts.add_experimental_option('prefs', prefs)
print('Opening Chrome')
driver = webdriver.Chrome(chrome_options=opts)
print('Authenicating')
load_home_page(driver)
time.sleep(2)
print('Load CMPM home')
print('Opening CMPM Datamart reports')
print('elem clicked')
print('Attemting to switch to frame 0')
driver.switch_to_frame('main')
driver.find_element_by_css_selector('button[name="btndm"]').click()
print('New window should be opening')
wait_time = 60
try:
for handle in driver.window_handles:
driver.switch_to_window(handle)
print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
elem = WebDriverWait(driver, wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnCDH006"]')))
elem = driver.find_element_by_css_selector('button[name="btnCDH006"]')
elem.click()
print('Found elem: {0}'.format(elem))
except Exception:
print('Something went wrong')
for id in ids:
print('Fililng out form for: ' +id)
type_supervisor_id(driver, id)
select_date(driver, months)
results_display(driver,results_display)
driver.find_element_by_css_selector('button[name="btnSubmit"]').click()
print('Sleeping for 5s')
time.sleep(5)
for root, dir, files in os.walk(download_dir):
for file in files:
if file[:2] == 'XL':
print('Renaming {0} to {1}'.format(file, id))
os.rename(os.path.abspath(os.path.join(download_dir, file)), os.path.abspath(os.path.join(download_dir, id+'.html')))
print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
elem = WebDriverWait(driver,
wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnSubmit"]')))
print('All Data from CMPM has been downloaded')
driver.quit()
def clr_dir():
if download_dir == '.html':
print(download_dir + 'is empty')
else:
print('download_dir is not empty! Will now attempt to delete all files')
for file in filelist:
os.remove(file)
print('All files have been removed from ' + download_dir)
#clr_dir() --- WORKS ---
#fetch_data() --- WORKS ---
store_data('/download')
Here is the error I keep getting when I run this:
C:\Users>python prism.py
Traceback (most recent call last): File "prism.py", line 111, in
<module>
fetch_data() TypeError: fetch_data() missing 1 required positional argument: 'download_dir'
Any thoughts?
Updated with the complete code for this module!
Thank you for the answer now I am getting this error:
C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome>python cmpm.py
Setting up
C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome\db\budget.db
Traceback (most recent call last):
File "cmpm.py", line 162, in <module>
store_data('/downloads')
File "cmpm.py", line 34, in store_data
database.connect()
File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\peewee.py", line 2439, in connect
self._state.set_connection(self._connect())
File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site-
packages\peewee.py", line 2666, in _connect
**self.connect_params)
TypeError: 'threadlocals' is an invalid keyword argument for this function
any thought or should I start a new thread?
You have defined a function, fetch_data()
, that takes one argument:
def fetch_data(download_dir):
...
The last line of the script calls fetch_data()
, but with no download_dir
argument:
fetch_data()
That is your problem. You need to tell fetch_data()
what the download_dir
is. For example, you could hard-code like this:
fetch_data('/home/daeyiele/Downloads')