I'd need to extract information from a website. This website has information inside the following path:
<div class="accordion-block__question">
<div class="accordion-block__text">Server</div></div>
...
<div class="block__col"><b>Country</b></div>
Running
try:
# Country
c=driver.find_element_by_xpath("//div[contains(@class,'block__col') and contains(text(),'Country')]").get_attribute('textContent')
country.append(c)
except:
country.append("Error")
I create a df with all errors. I'd interest in all the fields (but for fixing this issue, just one would be great), included the Trustscore (number), but I don't know if it'd possible to get it. I'm using selenium, web driver on Chrome. The website is https://www.scamadviser.com/check-website.
CODE
This is the entire code:
def scam(df):
chrome_options = webdriver.ChromeOptions()
trust=[]
country = []
isp_country = []
query=df['URL'].unique().tolist()
driver=webdriver.Chrome('mypath',chrome_options=chrome_options))
for x in query:
wait = WebDriverWait(driver, 10)
response=driver.get('https://www.scamadviser.com/check-website/'+x)
try:
wait = WebDriverWait(driver, 30)
# missing trustscore
# Country
c=driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", driver.find_element_by_xpath("//div[contains(@class,'block__col') and contains(text(),'Country')]")).get_attribute('innerText')
country.append(c)
# ISP country
ic=driver.find_element_by_xpath("//div[contains(@class,'block__col') and contains(text(),'ISP')]").get_attribute('innerText')
isp_country.append(ic)
except:
# missing trustscore
country.append("Error")
isp_country.append("Error")
# Create dataframe
dict = {'URL': query, 'Trustscore':trust, 'Country': country, 'ISP': isp_country}
df=pd.DataFrame(dict)
driver.quit()
return df
You can try for example with df['URL'] equal to
stackoverflow.com
gitHub.com
You are looking for innerText
not textContent
.
Code :
try:
# Country
c = driver.find_element_by_xpath("//div[contains(@class,'block__col') and contains(text(),'Country')]").get_attribute('innerText')
print(c)
country.append(c)
except:
country.append("Error")
Updated 1 :
In case already used locator is correct.
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", driver.find_element_by_xpath("//div[contains(@class,'block__col') and contains(text(),'Country')]"))
or may be try with both the options with this xpath :-
//div[contains(@class,'block__col')]/b[text()='Country']
Udpated 2 :
try: wait = WebDriverWait(driver, 30) # missing trustscore
# Country
time.sleep(2)
ele = driver.find_element_by_xpath("//div[contains(@class,'block__col')]/b[text()='Country']")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
country.append(ele.get_attribute('innerText'))
time.sleep(2)
# ISP country
ic = driver.find_element_by_xpath("//div[contains(@class,'block__col')]/b[text()='ISP']")
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
isp_country.append(ic.get_attribute('innerText'))
Udpate 3 :
to get the Company data
, Country name
.
use this xpath :
//div[text()='Company data']/../following-sibling::div/descendant::b[text()='Country']/../following-sibling::div
also, make sure few things before using this xpath.
Code :-
driver.maximize_window()
time.sleep(2)
driver.execute_script("window.scrollTo(0, 1000)")
time.sleep(2)
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[text()='Company data']"))))
# now use the mentioned xpath.
company_data_country_name` = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[text()='Company data']/../following-sibling::div/descendant::b[text()='Country']/../following-sibling::div")))
print(company_data_country_name.text)