I'm trying to scrape a real estate website, but can't figure out how to get the URL linked to each property.
here's the code I have right now:
I'm still new to coding, I searched other similar topics but couldn't find an answer that would perfectly fit my question.
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
options = Options()
options.add_argument("window-size=1400,600")
from fake_useragent import UserAgent
ua = UserAgent()
a = ua.random
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options)
driver.get('https://www.immoweb.be/fr/recherche/appartement/a-vendre')
import time
time.sleep(10)
Title = []
address = []
price = []
surface = []
desc = []
page = 2
while True:
time.sleep(10)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
if int(page) >1:
results = soup.find_all(True, {"class": ["result-xl", "result-l","result-m"]})
for result in results:
Title.append(result.find("div", {"class":"title-bar-left"}).get_text().strip())
address.append(result.find("span", {"result-adress"}).get_text().strip())
price.append(result.find("div", {"class": ["xl-price rangePrice", "l-price rangePrice", "m-price rangePrice", "xl-price-promotion rangePrice"]}).get_text().strip())
surface.append(result.find("div", {"class": ["xl-surface-ch", "l-surface-ch", "m-surface-ch"]}).get_text().strip())
desc.append(result.find("div", {"class": ["xl-desc", "l-desc", "m-desc"]}).get_text().strip())
if len(driver.find_elements_by_css_selector("a.next")) > 0:
url = "https://www.immoweb.be/fr/recherche/appartement/a-vendre/?page={}".format(page)
driver.get(url)
page += 1
else:
break
df = pd.DataFrame({"Title": Title, "Address": address, "Price:": price, "Surface": surface, "Description": desc})
df.to_csv("immo_a.csv")
Here's the Html from where I can get the link:
<a href="https://www.immoweb.be/fr/annonce/immeuble-a-appartements/a-vendre/hoboken/2660/id8135041" title="Immeuble à appartements de 2 façades à vendre à 2660 Hoboken au prix de 545.000 € - (8135041)" target="IWEB_MAIN" xpath="1"></a>
Thanks in advance for your help ! :)
Try this:
from selenium import webdriver
import time
from bs4 import BeautifulSoup
from bs4.element import Tag
import pandas as pd
import traceback
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("window-size=1400,600")
from fake_useragent import UserAgent
ua = UserAgent()
user_agent = ua.random
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options)
driver.get('https://www.immoweb.be/fr/recherche/appartement/a-vendre')
time.sleep(4)
title = []
address = []
price = []
surface = []
desc = []
link = []
page = 2
try:
soup = BeautifulSoup(driver.page_source, 'lxml')
companies = soup.find("div", {"id": "result"})
for tag in companies:
if not isinstance(tag, Tag):
continue
_class = tag.get('class')
if _class is None or "result-xl" not in _class[0]:
continue
title.append(tag.find("div", {"class":"title-bar-left"}).get_text().strip())
address.append(tag.find("span", {"result-adress"}).get_text().strip())
price.append(tag.find("div", {"class": ["xl-price rangePrice", "l-price rangePrice", "m-price rangePrice", "xl-price-promotion rangePrice"]}).get_text().strip())
surface.append(tag.find("div", {"class": ["xl-surface-ch", "l-surface-ch", "m-surface-ch"]}).get_text().strip())
desc.append(tag.find("div", {"class": ["xl-desc", "l-desc", "m-desc"]}).get_text().strip())
links = tag.find("a",href=True)
link.append(links['href'])
if len(driver.find_elements_by_css_selector("a.next")) > 0:
url = "https://www.immoweb.be/fr/recherche/appartement/a-vendre/?page={}".format(page)
driver.get(url)
page += 1
else:
break
except Exception as e:
print(e)
print(traceback.print_exc())
driver.quit()
df = pd.DataFrame({"Title": title, "Address": address, "Price:": price, "Surface": surface, "Description": desc,"Link":link})
df.to_csv("immo_a.csv")
Where
links = tag.find("a",href=True)
link.append(links['href'])
Scrape each property link
csv file o/p:
,Title,Address,Price:,Surface,Description,Link
0,Appartement,1090 Jette,"260.000 €
269.000 €",140 m² 3 ch.,JETTE appartement 2 étages,https://www.immoweb.be/fr/annonce/appartement/a-vendre/jette/1090/id8004072
1,Appartement,6032 Mont-sur-Marchienne,280.000 €,140 m² 4 ch.,Appartement 4 chambres très bien situé,https://www.immoweb.be/fr/annonce/appartement/a-vendre/mont-sur-marchienne/6032/id8137289
2,Appartement,6700 Arlon,210.000 €,110 m² 3 ch.,Appartement spacieux 3 chambres avec garage,https://www.immoweb.be/fr/annonce/appartement/a-vendre/arlon/6700/id8135774
3,Appartement,2000 Anvers,289.000 €,80 m² 1 ch.,Appartement renové avec terrace,https://www.immoweb.be/fr/annonce/appartement/a-vendre/anvers/2000/id8135064
4,Appartement,1200 Woluwe-St-Lambert,"749.000 €
794.999 €",215 m² 3 ch.,INFOS & VISITE 7/7- SUPERBE DUPLEX PENTHOUSE HOTEL DE MAITRE,https://www.immoweb.be/fr/annonce/appartement/a-vendre/woluwe-st-lambert/1200/id8020453
5,Appartement,9230 Wetteren,199.000 €,95 m² 2 ch.,TOF LICHTRIJK 2-SLKAPPARTEMENT met BALKON in het CENTRUM!,https://www.immoweb.be/fr/annonce/appartement/a-vendre/wetteren/9230/id8134908
6,Duplex,8430 Middelkerke,225.000 €,81 m² 2 ch.,Duplex deux chambres moderne et terrasse,https://www.immoweb.be/fr/annonce/duplex/a-vendre/middelkerke/8430/id8132431
7,Appartement,8400 Ostende,"299.000 €
320.000 €",80 m² 2 ch.,RESIDENCE OOSTDIJK,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id6976820
8,Appartement,1000 Bruxelles,450.000 €,121 m² 2 ch.,Appartement ± 135 m² avec terrasse au 4ème étage d'un immeub,https://www.immoweb.be/fr/annonce/appartement/a-vendre/bruxelles/1000/id8132721
9,Penthouse,1180 Uccle,580.000 €,160 m² 2 ch.,Duplex Penthouse avec grande terrasse orientée sud,https://www.immoweb.be/fr/annonce/penthouse/a-vendre/uccle/1180/id8134873
10,Appartement,1050 Ixelles,595.000 €,143 m² 2 ch.,Splendide appartement haut de gamme près du Jardin du Roi,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ixelles/1050/id8134869
11,Appartement,8400 Ostende,"108.000 €
112.000 €",55 m² 1 ch.,APPARTEMENT DANS UN QUARTIER CALME,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id8042337
12,Duplex,1180 Uccle,545.000 €,160 m² 4 ch.,Quartier de l'Observatoire DUPLEX-PENTHOUSE 3-4èmes,https://www.immoweb.be/fr/annonce/duplex/a-vendre/uccle/1180/id8131722
13,Appartement,8400 Ostende,185.000 €,75 m² 2 ch.,Appartement,https://www.immoweb.be/fr/annonce/appartement/a-vendre/ostende/8400/id8130087