Search code examples
pythonpython-3.xbeautifulsouppeewee

Getting constant TypeError: fetch_data() missing 1 required positional argument: 'download_dir'


I have been trying to get the following code to work to no avail. I have looked everywhere and I am just not getting what I am doing wrong. I am fairly new to python so here is my code:

from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.reader.excel import load_workbook, InvalidFileException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup 
from models import *

import os # File I/O
import xlrd
import time
import shutil
import glob
import configparser
config_parser = configparser.ConfigParser()
config_parser.read("config.ini")

#Var
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 download_dir = os.path.abspath('./downloads/')
ids = ['xxxxxxx']
filelist = glob.glob(download_dir + '/*.html')

def store_data(download_dir):
    print('Setting up ' + DATABASE)
    database.connect(reuse_if_open=True)
    # if os.path.isfile(DATABASE):
    #   print('Database already exists, removing all rows before moving forward')
    #   Charge.delete().execute()

    for root, dir, files in os.walk(download_dir):
        for file in files:
            print('Parsing {0}'.format(file))
            file_markup = ''
            with open(os.path.abspath(os.path.join(download_dir, file)), 'r') as html:
                file_markup = html.read()
            if file_markup == '':
                print('ERROR: File was not read')
                continue

            print('Parsing HTML...'.format(file))
            soup = BeautifulSoup(file_markup, 'html.parser')
            print('HTML parsed')

            data = []
            table = soup.find('table') #, attrs={'class':'lineItemsTable'}

        # First 56 tr's are headings 
            rows = table.find_all('tr') # 18th row is header row 
            cols = rows[18].find_all('td')
            cols = [ele.text.strip() for ele in cols]

            for row in rows[19:]:
                d = row.find_all('td')
                d = [ele.text.strip() for ele in d]
                data.append([ele for ele in d if ele]) # Get rid of empty values 
                Charge.create(pmt_id=(d[1]),
                    prism_id=(d[2]),
                    owner=file.split('.')[0],
                    date=d[11],
                    reg_hours=float(d[17]),
                    ot_hours=float(d[18]),
                    rate=int(d[42]),
                    resource=(d[14]),
                    pmt_status=(d[24]),
                    resource_status=(d[15]))
database.close()

def load_home_page(driver):
    driver.get('https://intra.att.com/cmpm/main.cfm')
    elem = driver.find_element_by_css_selector('input[value="Show Options"]')
    elem.click()
    elem = driver.find_element_by_css_selector('input[value="Enable Link"]')
    elem.click()
    elem = driver.find_element_by_css_selector('input[name="successOK"]')
    elem.click()

 def type_supervisor_id(driver, supervisor):
    elem = driver.find_element_by_css_selector('input[name="sattuid"]')
    elem.clear()
    elem.send_keys(supervisor)

def select_date(driver, date):      
    for date in months:     
        select = Select(driver.find_element_by_name('startdate'))
        select.select_by_visible_text('Jan 2018')

def results_display(driver,results):
    elem = driver.find_element_by_css_selector('select[name="DontDisplay"]')
    elem.click()
ActionChains(driver).key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()
driver.find_element_by_css_selector('img[src="/cmpmrptstatic/images/right.jpg"]').click()

def fetch_data():
    opts = webdriver.ChromeOptions()
    print('Download Directory: {0}'.format(download_dir))
    prefs = {'download.default_directory' : download_dir}
    opts.add_experimental_option('prefs', prefs)    
    print('Opening Chrome')
    driver = webdriver.Chrome(chrome_options=opts)
    print('Authenicating')
    load_home_page(driver)
    time.sleep(2)
    print('Load CMPM home')
    print('Opening CMPM Datamart reports')
    print('elem clicked')
    print('Attemting to switch to frame 0')
    driver.switch_to_frame('main')
    driver.find_element_by_css_selector('button[name="btndm"]').click()

    print('New window should be opening')
    wait_time = 60
    try:
        for handle in driver.window_handles:
            driver.switch_to_window(handle)
        print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
        elem = WebDriverWait(driver, wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnCDH006"]')))
        elem = driver.find_element_by_css_selector('button[name="btnCDH006"]')
        elem.click()
        print('Found elem: {0}'.format(elem))
    except Exception:
        print('Something went wrong')
    for id in ids:
        print('Fililng out form for: ' +id)
        type_supervisor_id(driver, id)
        select_date(driver, months)
        results_display(driver,results_display)
     driver.find_element_by_css_selector('button[name="btnSubmit"]').click()
    print('Sleeping for 5s')
    time.sleep(5)
    for root, dir, files in os.walk(download_dir):
        for file in files:
            if file[:2] == 'XL':
                print('Renaming {0} to {1}'.format(file, id))
                os.rename(os.path.abspath(os.path.join(download_dir, file)), os.path.abspath(os.path.join(download_dir, id+'.html')))
    print('Waiting for window to load, waiting {0} seconds'.format(wait_time))
        elem = WebDriverWait(driver, 
wait_time).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[name="btnSubmit"]')))
    print('All Data from CMPM has been downloaded')
        driver.quit()

def clr_dir():
    if download_dir == '.html':
        print(download_dir + 'is empty')
    else:
        print('download_dir is not empty! Will now attempt to delete all files')
        for file in filelist:
            os.remove(file)
        print('All files have been removed from ' + download_dir)
#clr_dir()          --- WORKS ---
#fetch_data()       --- WORKS ---
store_data('/download')

Here is the error I keep getting when I run this:

C:\Users>python prism.py

Traceback (most recent call last): File "prism.py", line 111, in
<module>
    fetch_data() TypeError: fetch_data() missing 1 required positional argument: 'download_dir'

Any thoughts?

Updated with the complete code for this module!

Thank you for the answer now I am getting this error:

C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome>python cmpm.py
    Setting up 
C:\Users\daeyiele\Documents\NetBeansProjects\BudgetHome\db\budget.db
Traceback (most recent call last):
File "cmpm.py", line 162, in <module>
    store_data('/downloads')
File "cmpm.py", line 34, in store_data
    database.connect()
File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site- 
    packages\peewee.py", line 2439, in connect 
    self._state.set_connection(self._connect())
File "C:\Users\daeyiele\AppData\Local\Programs\Python\Python36-32\lib\site- 
    packages\peewee.py", line 2666, in _connect
    **self.connect_params)
TypeError: 'threadlocals' is an invalid keyword argument for this function

any thought or should I start a new thread?


Solution

  • You have defined a function, fetch_data(), that takes one argument:

    def fetch_data(download_dir):
        ...
    

    The last line of the script calls fetch_data(), but with no download_dir argument:

    fetch_data()
    

    That is your problem. You need to tell fetch_data() what the download_dir is. For example, you could hard-code like this:

    fetch_data('/home/daeyiele/Downloads')