Search code examples
pythonseleniumselenium-webdriverweb-scrapingdatatables

Python Web scraping, automatically clicking on "load more" button until no more button, and get all the tables in CSV file


I want to download all tables from this website (https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3) that gathers all doctors in Paris. However, so as to get all names, you have to click on the button "afficher plus de résultats" many times until you can't and then scrap all tables (names, adresses etc...)

I tried with selenium method but I did not succeed in. Therefore, does someone know how to do it ? Does someone have some codes to do so ?

  • Mine does not work

from selenium import webdriver
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome("/Users/XXXX/Desktop/chromedriver")

def executeTest():
    global driver
    driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
    time.sleep(7)
    element = driver.find_element_by_xpath('/html/body/div[3]/div/div[5]/div/div[1]/div[1]/div[2]/div[4]/div/div/button/span')
    element.click()
    time.sleep(3)

def startWebDriver():
    global driver
    options = Options()
    options.add_argument("--disable-infobars")
    driver = webdriver.Chrome(chrome_options=options)

if __name__ == "__main__":
    startWebDriver()
    executeTest()
    driver.quit()

'''


Solution

  • You need to use infinite loop and check if button exist, if not break the loop. Then collect all the information.

    code:

    from selenium import webdriver
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    import pandas as pd
    import time
    
    
    driver.get('https://www.doctolib.fr/medecin-generaliste/paris?availabilities=3')
    WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"button#didomi-notice-agree-button>span"))).click() #Accept the cookie button
    while(True):
        try:
           
            WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"div.dl-card-content >button>span.dl-button-label")))
            button=driver.find_element(By.CSS_SELECTOR, "div.dl-card-content >button>span.dl-button-label")
            driver.execute_script("arguments[0].click();", button)
            time.sleep(1)        
        except:
            break
    
    names=[name.text for name in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation h3[data-design-system='oxygen']")]
    addresses=[address.text for address in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >span")]
    cityPostcode=[city.text for city in driver.find_elements(By.CSS_SELECTOR, "div.dl-search-result-presentation div.dl-margin-l-96 >div[class='dl-text dl-text-body dl-text-regular dl-text-s']")]
    df=pd.DataFrame({"Name":names, "Address" : addresses, "City" : cityPostcode})
    print(df)
    df.to_csv("doctos.csv")
    

    Output:

                                                     Name                        Address         City
    0                 Centre de santé Kersanté Rosa Parks           72 Rue Cesária Évora  75019 Paris
    1                          Dr Niloufar ASSEF-ZAMANIAN   12 Rue Notre Dame des Champs  75006 Paris
    2                                   Dr Emilie COUPAUD      299/301 Rue de Belleville  75019 Paris
    3   Centre de Santé Convention - Ministère des Aff...        27 Rue de la Convention  75015 Paris
    4                                       Dr Marc WYDRA          4 Rue du Docteur Roux  75015 Paris
    ..                                                ...                            ...          ...
    64                               Dr Audrey CORNILLEAU              7b Rue de Lesseps  75020 Paris
    65                                   Dr André AZUELOS  43 Rue de la Chaussée d'Antin  75009 Paris
    66                                  Dr Déborah SMADJA         113 Avenue Victor Hugo  75116 Paris
    67                                   Dr Philippe Levy                   35 Rue Vital  75116 Paris
    68                                  Institut Pasquier                44 Rue Pasquier  75008 Paris
    
    [69 rows x 3 columns]