I have the following code that scrapes some information I need from a website. However, there are 61 pages I need to go through and scrape the same data that requires me to click on the 'Next' button to go to the next page with the url
remaining the same.
I know it is possible to use driver.find_element_by_link_text('Next').click()
to go to the next page but I am not sure how to include this in my code.
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
driver = webdriver.Chrome()
driver.get('https://mspotrace.org.my/Sccs_list')
time.sleep(20)
# Get list of elements
elements = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//a[@title='View on Map']")))
# Loop through element popups and pull details of facilities into DF
pos = 0
df = pd.DataFrame(columns=['facility_name','other_details'])
for element in elements:
try:
data = []
element.click()
time.sleep(10)
facility_name = driver.find_element_by_xpath('//h4[@class="modal-title"]').text
other_details = driver.find_element_by_xpath('//div[@class="modal-body"]').text
time.sleep(5)
data.append(facility_name)
data.append(other_details)
df.loc[pos] = data
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[aria-label='Close'] > span"))).click() # close popup window
print("Scraping info for",facility_name,"")
time.sleep(15)
pos+=1
except Exception:
alert = driver.switch_to.alert
print("No geo location information")
alert.accept()
pass
print(df)
Answering to your question, "I don't know how I would put it in my code"
Counter iii is used to repeat your existing code 60 times. I cannot test the entire code, but I tested the loops.
For the sake of simplicity, in the code below I removed the element scraping so I could focus the test on repeating the clicks in the Next button, which is your question.
If you are going to test on your side, ensure you replace
print('your stuff would stay here!')
with the actual element scraping block that you have in your original code.
Hope it helps!
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
driver = webdriver.Chrome()
driver.get('https://mspotrace.org.my/Sccs_list')
time.sleep(20)
# Get list of elements
elements = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//a[@title='View on Map']")))
# Loop through element popups and pull details of facilities into DF
pos = 0
df = pd.DataFrame(columns=['facility_name','other_details'])
for iii in range(1,60):
for element in elements:
print('your stuff would stay here!')
#click next
btnNext = driver.find_element(By.XPATH,'//*[@id="dTable_next"]/a')
driver.execute_script("arguments[0].scrollIntoView();", btnNext)
driver.execute_script("arguments[0].click();", btnNext)
time.sleep(5)
#print current df. You may want to store it and print in the end only?
print(df)
# Get list of elements again
elements = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//a[@title='View on Map']")))
# Resetting vars again
pos = 0
df = pd.DataFrame(columns=['facility_name','other_details'])