Search code examples
python-3.xseleniumselenium-webdriverscreenshot

Python selenium just screenshots the first element multiple times throughout the loop


I'm trying to take a screenshot of each comment in a reddit post using selenium python. All comments have the same id/class and that's what I have used to select them.

Here's my code;

import requests
from bs4 import BeautifulSoup
import pyttsx3, pyautogui

from PIL import Image
from io import BytesIO

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(executable_path='C:\Selenium_Drivers\chromedriver.exe')

url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'

driver.get(url)
driver.implicitly_wait(5)

total_height = int(driver.execute_script("return document.body.scrollHeight"))

u = 1
for i in range(1, total_height*2, 50):
    driver.execute_script(f"window.scrollTo(0, {i})")
 
    comment = driver.find_element(By.CSS_SELECTOR, 'div#t1_ikllxsq._3sf33-9rVAO_v4y0pIW_CH')
    comment.screenshot(f'E:\WEB SCRAPING PROJECTS\PROJECTS\Reddit Scraping\shot{u}.png')
    u += 1

Well my code scrolls down the page and saves screenshots in my desired path. But the problem is that all the screenshots are of the first element(comment) in the reddit post.

I want my code to save a screenshot of each comment separately. Need help


Solution

  • Here you have an exmample including the scroll till the end of the page:

    # Needed libs
    from selenium.webdriver import ActionChains, Keys
    import time
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    from selenium import webdriver
    
    # Initialize drivver and navigate
    driver = webdriver.Chrome()
    driver.maximize_window()
    url = 'https://www.reddit.com/user/UoPeople09/comments/wlt4qj/what_made_you_apply_at_uopeople/'
    wait = WebDriverWait(driver, 5)
    driver.get(url)
    
    # Wait for reject cookies button and push on it
    reject_cookies_button = wait.until(EC.presence_of_element_located((By.XPATH, f"(//section[@class='_2BNSty-Ld4uppTeWGfEe8r']//button)[2]")))
    reject_cookies_button.click()
    
    # Make scroll till the end of the page
    while True:
        high_before_scroll = driver.execute_script('return document.body.scrollHeight')
        driver.execute_script('window.scrollTo(100, document.body.scrollHeight);')
        time.sleep(2)
        if driver.execute_script('return document.body.scrollHeight') == high_before_scroll:
            break
    
    # We take how many comments we have
    comments = wait.until(EC.presence_of_all_elements_located((By.XPATH, f"//div[contains(@class, 'Comment')]")))
    
    # We take an screenshot for every comment and we save it
    u = 1
    for comment in comments:
        driver.execute_script("arguments[0].scrollIntoView();", comment)
        comment.screenshot(f'./shot{u}.png')
        u += 1
    

    I hope the comments in the code help you to understand what is happening

    My code is done for linux, but just initialize the driver with your linux chromedriver