I am a beginner in python and I have started with web scraping, I want to extract data from a tourist site I need the names of the hotels, the arrangements available in each hotel and the price but I got stuck in the list of arrangements, each hotel can have several arrangements but it doesn't work and I don't know why. I put at your disposal my code and the output provided if any of you can help me and thank you in advance.
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
PATH = "C:\\Users\\marketing2\\Documents\\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# write script //Your Script Seems fine
script = "document.getElementById('ville_des').value ='Sousse';document.getElementById('depart').value ='05/08/2021';document.getElementById('checkin').value ='05/08/2021';document.getElementById('select_ch').value = '1';"
# Execute script
driver.execute_script(script)
# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)
# click bouton details
btn_plus = driver.find_element_by_id('plus_res')
btn_plus.click()
sleep(10)
#getting the hotel names and by xpath in a loop
hotels=[]
pensions=[]
for v in range(1, 5):
hotel = driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[' + str(v) + ']/div/div[3]/div[1]/div[1]/span/a/h3').get_attribute('innerHTML')
for j in range (1,3):
pension= driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[1]/div/div[3]/div[3]/div[1]/div[1]/form/div[1]/div[' + str(j) + ']/u').get_attribute('innerHTML')
pensions.append((pension))
hotels.append((hotel,pensions))
print(hotels)
You can try that
#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
# create path and start webdriver
PATH = "/mnt/sdc/Work/scripts/Test/chromedriver"
driver = webdriver.Chrome(PATH)
# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)
# params to select
params = {
'destination': 'Sousse',
'date_from': '05/08/2021',
'date_to': '05/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
form = driver.find_element_by_id('hotel_recherch_moteur')
form.submit()
sleep(5)
# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
'//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
# get price object
price_object = hotel_obj.find_element_by_xpath(
'.//div[@class="monaieprix"]'
)
price_value = price_object.find_element_by_xpath(
'.//div[1]'
).text.replace('\n', '')
# get title data
title_data = hotel_obj.find_element_by_xpath(
'.//span[contains(@class, "tittre_hotel")]'
)
# get arrangements
arrangements_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@class, "angle")]//u'
)
arrangements = [ao.text for ao in arrangements_obj]
# create new object
hotels_list.append({
'name': title_data.find_element_by_xpath('.//a//h3').text,
'arrangements': arrangements,
'price': f'{price_value}'
})
# ----------------------------------------------------------------------------
for hotel in hotels_list:
print(json.dumps(hotel, indent=4))
{
"name": "El Mouradi Palace",
"arrangements": [
"Petit dejeuner",
"Demi pension plus",
"All inclusive soft"
],
"price": "67"
}
{
"name": "KANTA",
"arrangements": [
"Petit dejeuner",
"Demi pension",
"All inclusive soft"
],
"price": "43"
}
...
If I help you - please mark answer as correct