Search code examples
listseleniumwebdropdownscreen-scraping

Selenium web scraping under dropdown list


Selenium web scraping :

  1. Dropdown list change
  2. tried to changed results scraping
  3. failed

Code:

'''
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
import time

driver=webdriver.Chrome(executable_path=r'C:\Program Files\Python39\chromedriver.exe')
driver.maximize_window()
driver.get("https://www.gastite.com/locator/?cats=109")

for i in range(1,3,1):
    state=driver.find_element(By.NAME, 'state')
    stateDD=Select(state)
    stateDD.select_by_index(i)
    driver.find_element(By.XPATH,'//*[@id="content"]/div[3]/form/input[2]')
    time.sleep(2)
    lists=driver.find_elements_by_css_selector("div.repcontent > a")
    #print(lists)
    for list in lists:
        company=list.find_element_by_class_name('namelink company_title').text
        address=list.find_element_by_class_name('address').text
        address1=list.find_element_by_class_name('address2').text
        tel=list.find_element_by_tag_name('span').text
        fax=list.find_element_by_tag_name('span').text
        web=list.get_attribute('href')
        print(company, address, address1, tel, fax, web)
    
'''

Solution

  • I select one state from the dropdownlist for an example and rest of the your attempt.

    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait 
    from selenium.webdriver.support import expected_conditions as EC
    
    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager
    from selenium.webdriver.support.select import Select
    from bs4 import BeautifulSoup
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    
    url ='https://www.gastite.com/locator/?cats=109' 
    driver.maximize_window()
    
    Select(WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.XPATH, "//select[@name='state']")))).select_by_value("AL")
    
    soup = BeautifulSoup(driver.page_source,'lxml')
    data=[]
    for card in soup.select('#resultscontainer > ul > li > div.repcontent'):
        company = card.select_one('h3.company_title').text
        print(company)
        address = card.select_one('div.address').text
        address2 = card.select_one('div.address2').text
        phone = card.select('span').contents[0]
        fax = card.select('span').contents[1]
    
        data.append({
            'company':company,
            'address':address,
            'address2':address2,
            'phone':phone,
            'fax':fax
            })
            
    print(data)