Search code examples
pythonweb-scrapingbeautifulsouplocation-href

get href with python using find beautiful soup


I'm trying to scrape a real estate website, but can't figure out how to get the URL linked to each property.

here's the code I have right now:

I'm still new to coding, I searched other similar topics but couldn't find an answer that would perfectly fit my question.


import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

options = Options()
options.add_argument("window-size=1400,600")
from fake_useragent import UserAgent

ua = UserAgent()
a = ua.random
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')

driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options)

driver.get('https://www.immoweb.be/fr/recherche/appartement/a-vendre')

import time

time.sleep(10)

Title = []
address = []
price = []
surface = []
desc = []
page = 2
while True:
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    if int(page) >1:
        results = soup.find_all(True, {"class": ["result-xl", "result-l","result-m"]})
        for result in results:
            Title.append(result.find("div", {"class":"title-bar-left"}).get_text().strip())
            address.append(result.find("span", {"result-adress"}).get_text().strip())
            price.append(result.find("div", {"class": ["xl-price rangePrice", "l-price rangePrice", "m-price rangePrice", "xl-price-promotion rangePrice"]}).get_text().strip())
            surface.append(result.find("div", {"class": ["xl-surface-ch", "l-surface-ch", "m-surface-ch"]}).get_text().strip())
            desc.append(result.find("div", {"class": ["xl-desc", "l-desc", "m-desc"]}).get_text().strip())
        if len(driver.find_elements_by_css_selector("a.next")) > 0:
            url = "https://www.immoweb.be/fr/recherche/appartement/a-vendre/?page={}".format(page)
            driver.get(url)
            page += 1
        else:
            break




df = pd.DataFrame({"Title": Title, "Address": address, "Price:": price, "Surface": surface, "Description": desc})
df.to_csv("immo_a.csv")

Here's the Html from where I can get the link:

<a href="https://www.immoweb.be/fr/annonce/immeuble-a-appartements/a-vendre/hoboken/2660/id8135041" title="Immeuble à appartements de 2 façades à vendre à 2660 Hoboken au prix de 545.000 € - (8135041)" target="IWEB_MAIN" xpath="1"></a>

Thanks in advance for your help ! :)


Solution

  • Try this:

    from selenium import webdriver
    import time
    from bs4 import BeautifulSoup
    from bs4.element import Tag
    import pandas as pd
    import traceback
    from selenium.webdriver.chrome.options import Options
    
    
    options = Options()
    options.add_argument("window-size=1400,600")
    from fake_useragent import UserAgent
    
    ua = UserAgent()
    user_agent = ua.random
    
    options.add_argument(f'user-agent={user_agent}')
    driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options)
    
    driver.get('https://www.immoweb.be/fr/recherche/appartement/a-vendre')
    
    time.sleep(4)
    
    title = []
    address = []
    price = []
    surface = []
    desc = []
    link = []
    
    page = 2
    
    try:
        
        soup = BeautifulSoup(driver.page_source, 'lxml')
        companies = soup.find("div", {"id": "result"})
    
        for tag in companies:
    
            if not isinstance(tag, Tag):
                continue
    
            _class = tag.get('class')
    
            if _class is None or "result-xl" not in _class[0]:
                continue
    
            title.append(tag.find("div", {"class":"title-bar-left"}).get_text().strip())
            address.append(tag.find("span", {"result-adress"}).get_text().strip())
            price.append(tag.find("div", {"class": ["xl-price rangePrice", "l-price rangePrice", "m-price rangePrice", "xl-price-promotion rangePrice"]}).get_text().strip())
            surface.append(tag.find("div", {"class": ["xl-surface-ch", "l-surface-ch", "m-surface-ch"]}).get_text().strip())
            desc.append(tag.find("div", {"class": ["xl-desc", "l-desc", "m-desc"]}).get_text().strip())
    
            links = tag.find("a",href=True)
    
            link.append(links['href'])
    
            if len(driver.find_elements_by_css_selector("a.next")) > 0:
                url = "https://www.immoweb.be/fr/recherche/appartement/a-vendre/?page={}".format(page)
                driver.get(url)
                page += 1
            else:
                break
    
    except Exception as e:
        print(e)
        print(traceback.print_exc())
    
    driver.quit()
    
    df = pd.DataFrame({"Title": title, "Address": address, "Price:": price, "Surface": surface, "Description": desc,"Link":link})
    df.to_csv("immo_a.csv")
    

    Where

    links = tag.find("a",href=True)
    link.append(links['href'])
    

    Scrape each property link

    csv file o/p:

    ,Title,Address,Price:,Surface,Description,Link
    0,Appartement,1090 Jette,"260.000 €  
                                          269.000 €",140 m²     3 ch.,JETTE appartement 2 étages,https://www.immoweb.be/fr/annonce/appartement/a-vendre/jette/1090/id8004072
    1,Appartement,6032 Mont-sur-Marchienne,280.000 €,140 m²     4 ch.,Appartement 4 chambres très bien situé,https://www.immoweb.be/fr/annonce/appartement/a-vendre/mont-sur-marchienne/6032/id8137289
    2,Appartement,6700 Arlon,210.000 €,110 m²     3 ch.,Appartement spacieux 3 chambres avec garage,https://www.immoweb.be/fr/annonce/appartement/a-vendre/arlon/6700/id8135774
    3,Appartement,2000 Anvers,289.000 €,80 m²     1 ch.,Appartement renové avec terrace,https://www.immoweb.be/fr/annonce/appartement/a-vendre/anvers/2000/id8135064
    4,Appartement,1200 Woluwe-St-Lambert,"749.000 €  
                                          794.999 €",215 m²     3 ch.,INFOS & VISITE 7/7- SUPERBE DUPLEX PENTHOUSE HOTEL DE MAITRE,https://www.immoweb.be/fr/annonce/appartement/a-vendre/woluwe-st-lambert/1200/id8020453
    5,Appartement,9230 Wetteren,199.000 €,95 m²     2 ch.,TOF LICHTRIJK 2-SLKAPPARTEMENT met BALKON in het CENTRUM!,https://www.immoweb.be/fr/annonce/appartement/a-vendre/wetteren/9230/id8134908
    6,Duplex,8430 Middelkerke,225.000 €,81 m²     2 ch.,Duplex deux chambres moderne et terrasse,https://www.immoweb.be/fr/annonce/duplex/a-vendre/middelkerke/8430/id8132431
    7,Appartement,8400 Ostende,"299.000 €  
                                          320.000 €",80 m²     2 ch.,RESIDENCE OOSTDIJK,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id6976820
    8,Appartement,1000 Bruxelles,450.000 €,121 m²     2 ch.,Appartement ± 135 m² avec terrasse au 4ème étage d'un immeub,https://www.immoweb.be/fr/annonce/appartement/a-vendre/bruxelles/1000/id8132721
    9,Penthouse,1180 Uccle,580.000 €,160 m²     2 ch.,Duplex Penthouse avec grande terrasse orientée sud,https://www.immoweb.be/fr/annonce/penthouse/a-vendre/uccle/1180/id8134873
    10,Appartement,1050 Ixelles,595.000 €,143 m²     2 ch.,Splendide appartement haut de gamme près du Jardin du Roi,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ixelles/1050/id8134869
    11,Appartement,8400 Ostende,"108.000 €  
                                          112.000 €",55 m²     1 ch.,APPARTEMENT DANS UN QUARTIER CALME,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id8042337
    12,Duplex,1180 Uccle,545.000 €,160 m²     4 ch.,Quartier de l'Observatoire DUPLEX-PENTHOUSE 3-4èmes,https://www.immoweb.be/fr/annonce/duplex/a-vendre/uccle/1180/id8131722
    13,Appartement,8400 Ostende,185.000 €,75 m²     2 ch.,Appartement,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id8130087