I work on a small project to scrape some data from a website, everything seems to work well, but I can't scrape the mobile number it shows me a blank output in some cases and full HTML tags with the mobile phone in other cases.
I want to scrape the phone number along with other data, all the data get scraped correctly except the mobile phone. Here is the output I get:
Name: Klinik Seeschau AG Address: Bernrainstrasse 17, 8280 Kreuzlingen Phone:
Here is my code:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
# initialize the Chrome driver
driver = webdriver.Chrome()
# navigate to the URL
driver.get("https://www.local.ch/en/",)
# Searching for "Clinic"
def search_query(query):
search = driver.find_element("name", "what")
search.clear()
time.sleep(3)
search.send_keys(query)
time.sleep(3)
search.send_keys(Keys.RETURN)
time.sleep(3)
# extract the source code
def source():
source_code = driver.page_source
# Sleep for 3 second
time.sleep(3)
# parse the source code with BeautifulSoup
soup = BeautifulSoup(source_code, "html.parser")
time.sleep(3)
# Extracting the data
def datasearch():
searchResult = driver.find_element(By.CLASS_NAME, "search-header-results")
data = searchResult.text
print(f"there's {data}\n")
time.sleep(2)
# Get the phone_numbers elements
def data_scrape():
# data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8")
# Loop in data end extract phone numbers
components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m")
for component in components:
name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text
addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text
phone = component.find_element(By.CLASS_NAME, "lui-sm-margin-left-xxs").text
print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n")
# Sleep for 2 second
search_query("Clinique")
source()
datasearch()
data_scrape()
time.sleep(2)
driver.quit()
Here is one way to get that information, based on your existing code:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
# initialize the Chrome driver
driver = webdriver.Chrome()
# navigate to the URL
driver.get("https://www.local.ch/en/",)
# Searching for "Clinic"
def search_query(query):
search = driver.find_element("name", "what")
search.clear()
time.sleep(3)
search.send_keys(query)
time.sleep(3)
search.send_keys(Keys.RETURN)
time.sleep(3)
# extract the source code
def source():
source_code = driver.page_source
# Sleep for 3 second
time.sleep(3)
# parse the source code with BeautifulSoup
soup = BeautifulSoup(source_code, "html.parser")
time.sleep(3)
# Extracting the data
def datasearch():
searchResult = driver.find_element(By.CLASS_NAME, "search-header-results")
data = searchResult.text
print(f"there's {data}\n")
time.sleep(2)
# Get the phone_numbers elements
def data_scrape():
# data = driver.find_element(By.CLASS_NAME, "col-xs-12.col-md-8")
# Loop in data end extract phone numbers
components = driver.find_elements(By.CSS_SELECTOR, ".js-entry-card-container.row.lui-margin-vertical-xs.lui-sm-margin-vertical-m")
for component in components:
name = component.find_element(By.CSS_SELECTOR, ".lui-margin-vertical-zero.card-info-title").text
addre = component.find_element(By.CSS_SELECTOR, ".card-info-address").text
phone = component.find_element(By.XPATH, './/a[@title="Call"]').get_attribute('href').split('tel:')[1] if component.find_element(By.XPATH, './/a[@title="Call"]') else None
print(f"Name: {name}\nAddress: {addre}\n Phone: {phone}\n")
# Sleep for 2 second
search_query("Clinique")
source()
datasearch()
data_scrape()
time.sleep(2)
driver.quit()
Result in terminal:
there's 2013 results for Clinique in Switzerland, in French
Name: HerzKlinik Hirslanden
Address: Witellikerstrasse 40, 8008 Zürich
Phone: +41443879700
Name: Berner Klinik Montana - Clinique Bernoise Montana
Address: Impasse Palace Bellevue 1, 3963 Crans-Montana
Phone: +41274855288
Name: PZM Psychiatriezentrum Münsingen AG
Address: Hunzigenallee 1, 3110 Münsingen
Phone: +41317208111
Name: Spitalzentrum Biel AG
Address: Vogelsang 84, 2502 Biel/Bienne
Phone: +41323242424
Name: LipoFilling
Address: Dammstrasse 29, 8702 Zollikon
Phone: +41443971717
Name: Adipositas und StoffwechselZentrum Zürich
Address: Witellikerstrasse 36, 8008 Zürich
Phone: +41443874000
Name: Maison Tóā - Clinique esthétique
Address: Voie du Chariot 6, 1003 Lausanne
Phone: +41217917070
Name: Klinik Seeschau AG
Address: Bernrainstrasse 17, 8280 Kreuzlingen
Phone: +41716775353
Name: Clinica Holistica Engiadina SA
Address: Plaz 40, 7542 Susch
Phone: +41813002030
Name: Kantonsspital Baselland Liestal
Address: Rheinstrasse 26, 4410 Liestal
Phone: +41619252525
You can find Selenium documentation here.