Search code examples
pythonselenium-webdriverweb-scrapingbeautifulsoupwebdriver

How i can scrape a mobile number and Email from an hidden html tags?


I work on a small project to scrape some data from a website, everything seems to work well, but I can't scrape the mobile number it shows me a blank output in some cases and full HTML tags with the mobile phone in other cases.

I want to scrape the phone number along with other data, all the data get scraped correctly except the mobile phone. Here is the output I get:

Name: Klinik Seeschau AG Address: Bernrainstrasse 17, 8280 Kreuzlingen Phone:

Here is my code:

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time

# initialize the Chrome driver
driver = webdriver.Chrome()

# navigate to the URL
driver.get("https://www.local.ch/en/",)

# Searching for "Clinic"
def search_query(query):
    search = driver.find_element("name", "what")
    search.clear()
    time.sleep(3)
    search.send_keys(query)
    time.sleep(3)
    search.send_keys(Keys.RETURN)
    time.sleep(3)

# extract the source code
def source():
    source_code = driver.page_source
    # Sleep for 3 second 
    time.sleep(3)
    # parse the source code with BeautifulSoup
    soup = BeautifulSoup(source_code, "html.parser")
    time.sleep(3)

# Extracting the data
def datasearch():
    searchResult = driver.find_element(By.CLASS_NAME, "search-header-results")
    data = searchResult.text
    print(f"there's {data}\n")
    time.sleep(2)

# Get the phone_numbers elements
def data_scrape():
    # data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8")
    # Loop in data end extract phone numbers
    components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m")
    for component in components:
        name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text
        addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text
        phone = component.find_element(By.CLASS_NAME, "lui-sm-margin-left-xxs").text
        print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n")



# Sleep for 2 second
search_query("Clinique")
source()
datasearch()
data_scrape()
time.sleep(2)
driver.quit()


Solution

  • Here is one way to get that information, based on your existing code:

    from bs4 import BeautifulSoup
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.by import By
    import time
    
    # initialize the Chrome driver
    driver = webdriver.Chrome()
    
    # navigate to the URL
    driver.get("https://www.local.ch/en/",)
    
    # Searching for "Clinic"
    def search_query(query):
        search = driver.find_element("name", "what")
        search.clear()
        time.sleep(3)
        search.send_keys(query)
        time.sleep(3)
        search.send_keys(Keys.RETURN)
        time.sleep(3)
    
    # extract the source code
    def source():
        source_code = driver.page_source
        # Sleep for 3 second 
        time.sleep(3)
        # parse the source code with BeautifulSoup
        soup = BeautifulSoup(source_code, "html.parser")
        time.sleep(3)
    
    # Extracting the data
    def datasearch():
        searchResult = driver.find_element(By.CLASS_NAME, "search-header-results")
        data = searchResult.text
        print(f"there's {data}\n")
        time.sleep(2)
    
    # Get the phone_numbers elements
    def data_scrape():
        # data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8")
        # Loop in data end extract phone numbers
        components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m")
        for component in components:
            name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text
            addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text
            phone = component.find_element(By.XPATH, './/a[@title="Call"]').get_attribute('href').split('tel:')[1] if component.find_element(By.XPATH, './/a[@title="Call"]') else None
            print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n")
    
    
    
    # Sleep for 2 second
    search_query("Clinique")
    source()
    datasearch()
    data_scrape()
    time.sleep(2)
    driver.quit()
    

    Result in terminal:

    there's 2013 results for Clinique in Switzerland, in French
    
    Name: HerzKlinik Hirslanden
    Address: Witellikerstrasse 40, 8008 Zürich
     Phone: +41443879700
    
    Name: Berner Klinik Montana - Clinique Bernoise Montana
    Address: Impasse Palace Bellevue 1, 3963 Crans-Montana
     Phone: +41274855288
    
    Name: PZM Psychiatriezentrum Münsingen AG
    Address: Hunzigenallee 1, 3110 Münsingen
     Phone: +41317208111
    
    Name: Spitalzentrum Biel AG
    Address: Vogelsang 84, 2502 Biel/Bienne
     Phone: +41323242424
    
    Name: LipoFilling
    Address: Dammstrasse 29, 8702 Zollikon
     Phone: +41443971717
    
    Name: Adipositas und StoffwechselZentrum Zürich
    Address: Witellikerstrasse 36, 8008 Zürich
     Phone: +41443874000
    
    Name: Maison Tóā - Clinique esthétique
    Address: Voie du Chariot 6, 1003 Lausanne
     Phone: +41217917070
    
    Name: Klinik Seeschau AG
    Address: Bernrainstrasse 17, 8280 Kreuzlingen
     Phone: +41716775353
    
    Name: Clinica Holistica Engiadina SA
    Address: Plaz 40, 7542 Susch
     Phone: +41813002030
    
    Name: Kantonsspital Baselland Liestal
    Address: Rheinstrasse 26, 4410 Liestal
     Phone: +41619252525
    

    You can find Selenium documentation here.