python selenium getting urls from google search results

I am trying to get firt 10 urls from google search results with selenium. I knew that there was other term than inerHTML which will give me the text inside cite tags.

here is code

#open google
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys

chrome_options = Options()
chrome_options.headless = False
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get('https://www.google.com/')

#paste - write name
#var_inp=input('Write the name to search:')
var_inp='python google search'
#search for image
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys(var_inp+Keys.RETURN)
#find first 10 companies
res_lst=[]
res=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,'cite')))
print(len(res))
for r in res:
    print(r.get_attribute('innerHTML'))

#take email addresses from company
#send email

the result is below

https://github.com<span class="dyjrff qzEoUe" role="text"> › opsdisk</span>
https://blog.apilayer.com<span class="dyjrff qzEoUe" role="text"> › h...</span>
https://blog.apilayer.com<span class="dyjrff qzEoUe" role="text"> › h...</span>

I want to get rid of <span... as I need only urls. I can get off them with reg.ex but I need get_attribute('TEXT') or sth else that will easily give the result.

Solution

The best way to get the value of the node to use javascripts executor and use the firstchild of the node to get the value.

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get('https://www.google.com/')

#paste - write name
#var_inp=input('Write the name to search:')
var_inp='python google search'
#search for image
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys(var_inp+Keys.RETURN)
#find first 10 companies
res_lst=[]
res=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,'cite')))
print(len(res))
for r in res:
    print(driver.execute_script("return arguments[0].firstChild.textContent;", r))

Output:

27
https://pypi.org
https://pypi.org
https://www.geeksforgeeks.org
https://www.geeksforgeeks.org
https://stackoverflow.com
https://stackoverflow.com
https://www.geeksforgeeks.org
https://www.geeksforgeeks.org
https://www.geeksforgeeks.org
https://www.geeksforgeeks.org
https://www.jcchouinard.com
https://www.jcchouinard.com
https://www.educative.io
https://www.educative.io
https://python-googlesearch.readthedocs.io
https://python-googlesearch.readthedocs.io
https://medium.com
https://medium.com
https://medium.com
https://medium.com
https://github.com
https://github.com
https://github.com
https://github.com