Search code examples
pythonselenium-webdriverweb-scraping

webscraping data doesn't work - selenium and python


I try to scrape games and odds from this url Merkurbets with selenium and python.

The element for "team-name" looks like. Is _ngcontent-ng-c1043474636 in div-tag a problem for selenium to find the text() of div-tag?

<div _ngcontent-ng-c1043474636="" class="team-name">FAC Wien</div>

I've tried a lot of combination to get data out of this url, here are just one example:

# games_elements = driver.find_elements(By.XPATH, '//*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div')
games_elements = driver.find_elements(By.XPATH,'//*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]')
print('games_elements: ', games_elements)
for game in games_elements:

    try:
        # Mannschaftsnamen extrahieren
        teams = game.find_elements(By.XPATH, './/*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]/div/div[1]/div/game-details-summary/div/div/div/div[1]')
        # Quoten extrahieren
        odds = game.find_elements(By.XPATH, './/*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]/div/div[2]/div[1]/market/div/div[2]/div/tip[1]/div/button/span[2]/div/tip-odds/div/div')

        if len(teams) == 2 and len(odds) >= 3:
            home_team = teams[0].text
            away_team = teams[1].text
            home_odds = odds[0].text
            draw_odds = odds[1].text
            away_odds = odds[2].text

            games_data.append({
                "Home Team": home_team,
                "Away Team": away_team,
                "Home Odds": home_odds,
                "Draw Odds": draw_odds,
                "Away Odds": away_odds,
                "Zeitstempel": timestamp  # Zeitstempel
            })
        else:
            logging.warning("Unerwartete Anzahl von Teams oder Quoten für ein Spiel gefunden.")
    except NoSuchElementException as e:
        logging.error(f"Fehler beim Extrahieren der Daten für ein Spiel: {e}")

or here

# Alternativ: Versuche eine längere Wartezeit mit Sichtbarkeitsprüfung
wait = WebDriverWait(driver, 30)
try:
    # Greife auf ein sichtbares 'team-name'-Element zu
    team_name_element = wait.until(
        EC.visibility_of_element_located((By.CLASS_NAME, "team-name"))
    )
    print(team_name_element.text)
except Exception as e:
    print(f"Fehler: {e}")
finally:
    # Speichere die Seite, um den Inhalt zu überprüfen
    page_source = driver.page_source
    with open('page_source.html', 'w', encoding='utf-8') as f:
        f.write(page_source)
    print("HTML-Inhalt gespeichert.")

and always chromedrive accept cookies and scroll down

def scroll(driver):
    height = driver.execute_script("return document.body.scrollHeight")
    print('height: ', height)
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
        time.sleep(1) # <- traditional hack
        hnow = driver.execute_script("return document.body.scrollHeight")
        print('hnow: ', hnow)
        if hnow == height:
            break
        height = hnow

# Starte den Webdriver
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Optional: Ohne Fenster anzeigen
service = Service(executable_path="/usr/bin/chromedriver")  # Pfad zu chromedriver
driver = webdriver.Chrome(service=service, options=chrome_options)

# Öffne die Webseite
driver.get("https://www.merkurbets.de/de/sports/0/s1/1")
driver.maximize_window()
time.sleep(5)
# Cookies akzeptieren
accept_button = driver.find_element(By.XPATH, '//*[@id="cookiescript_accept"]')
accept_button.click()
time.sleep(2)
scroll(driver)
# Warte 10 Sekunden (probiere dies nur zu Debugging-Zwecken)
time.sleep(2)
team = driver.find_elements(By.XPATH,  './/div[contains(@class,"team-name")]')

I've try a lot of variation with XPATH, CSS_SELECTOR, CLASS_NAME etc. but everytime I get empty list, no elements. Please help.


Solution

  • enter image description here

    Or you could send the api request directly.