Search code examples
pythonseleniumloopsweb-scrapingnested-loops

how can i display nested loops using python


I am a beginner in python and I have started with web scraping, I want to extract data from a tourist site I need the names of the hotels, the arrangements available in each hotel and the price but I got stuck in the list of arrangements, each hotel can have several arrangements but it doesn't work and I don't know why. I put at your disposal my code and the output provided if any of you can help me and thank you in advance.

from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

PATH = "C:\\Users\\marketing2\\Documents\\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)

# write script //Your Script Seems fine
script = "document.getElementById('ville_des').value ='Sousse';document.getElementById('depart').value ='05/08/2021';document.getElementById('checkin').value ='05/08/2021';document.getElementById('select_ch').value = '1';"

# Execute script
driver.execute_script(script)

# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)

# click bouton details
btn_plus = driver.find_element_by_id('plus_res')
btn_plus.click()
sleep(10)

#getting the hotel names and by xpath in a loop
hotels=[]
pensions=[]
for v in range(1, 5):
        hotel = driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[' + str(v) + ']/div/div[3]/div[1]/div[1]/span/a/h3').get_attribute('innerHTML')
        for j in range (1,3):
            pension= driver.find_element_by_xpath('/html/body/div[6]/div[2]/div[1]/div/div[2]/div/div[4]/div[1]/div/div[3]/div[3]/div[1]/div[1]/form/div[1]/div[' + str(j) + ']/u').get_attribute('innerHTML')
        pensions.append((pension))
        hotels.append((hotel,pensions))
        

print(hotels)

Solution

  • You can try that

    #!/usr/bin/env python
    # coding: utf-8
    import json
    from time import sleep
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait, Select
    
    
    # create path and start webdriver
    PATH = "/mnt/sdc/Work/scripts/Test/chromedriver"
    driver = webdriver.Chrome(PATH)
    
    # first get website
    driver.get('https://tn.tunisiebooking.com/')
    wait = WebDriverWait(driver, 20)
    
    # params to select
    params = {
        'destination': 'Sousse',
        'date_from': '05/08/2021',
        'date_to': '05/08/2021',
        'bedroom': '1'
    }
    
    # select destination
    destination_select = Select(driver.find_element_by_id('ville_des'))
    destination_select.select_by_value(params['destination'])
    
    # select bedroom
    bedroom_select = Select(driver.find_element_by_id('select_ch'))
    bedroom_select.select_by_value(params['bedroom'])
    
    # select dates
    script = f"document.getElementById('depart').value ='{params['date_from']}';"
    script += f"document.getElementById('checkin').value ='{params['date_to']}';"
    driver.execute_script(script)
    
    # submit form
    form = driver.find_element_by_id('hotel_recherch_moteur')
    form.submit()
    sleep(5)
    
    # ----------------------------------------------------------------------------
    # get list of all hotels
    hotels_list = []
    hotels_objects = driver.find_elements_by_xpath(
        '//div[contains(@class, "enveloppe_produit")]'
    )
    for hotel_obj in hotels_objects:
        # get price object
        price_object = hotel_obj.find_element_by_xpath(
            './/div[@class="monaieprix"]'
        )
        price_value = price_object.find_element_by_xpath(
            './/div[1]'
        ).text.replace('\n', '')
    
        # get title data
        title_data = hotel_obj.find_element_by_xpath(
            './/span[contains(@class, "tittre_hotel")]'
        )
    
        # get arrangements
        arrangements_obj = hotel_obj.find_elements_by_xpath(
            './/div[contains(@class, "angle")]//u'
        )
        arrangements = [ao.text for ao in arrangements_obj]
    
        # create new object
        hotels_list.append({
            'name': title_data.find_element_by_xpath('.//a//h3').text,
            'arrangements': arrangements,
            'price': f'{price_value}'
        })
    
    # ----------------------------------------------------------------------------
    for hotel in hotels_list:
        print(json.dumps(hotel, indent=4))
    
    {
        "name": "El Mouradi Palace",
        "arrangements": [
            "Petit dejeuner",
            "Demi pension plus",
            "All inclusive soft"
        ],
        "price": "67"
    }
    {
        "name": "KANTA",
        "arrangements": [
            "Petit dejeuner",
            "Demi pension",
            "All inclusive soft"
        ],
        "price": "43"
    }
    ...
    

    If I help you - please mark answer as correct