Search code examples
playwrightplaywright-python

How can I click all of the node of the category trees by playwright?


I want to use playwright to automatically click and expand all the child nodes. But my code only expands part of the nodes. How should I fix the code? Thank you.

Current: nodes

What I want: nodes

import json
import time
from playwright.sync_api import sync_playwright

p = sync_playwright().start()

browser = p.chromium.launch(headless=False, slow_mo=2000)
context = browser.new_context()
page = context.new_page()  

try:
    # page.add_init_script(js);
    page.goto("https://keepa.com/#!categorytree", timeout=10000)

    # Click text=Log in / register now to subscribe
    page.click("text=Log in / register now to subscribe")
    # Click input[name="username"]
    page.click("input[name=\"username\"]")
    # Fill input[name="username"]
    page.fill("input[name=\"username\"]", "tylrr123@outlook.com")
    # Click input[name="password"]
    page.click("input[name=\"password\"]")
    # Fill input[name="password"]
    page.fill("input[name=\"password\"]", "adnCgL@f$krY9Q9")
    # Click input:has-text("Log in")
    page.click("input:has-text(\"Log in\")")
    page.wait_for_timeout(2000)
    page.goto("https://keepa.com/#!categorytree", timeout=10000)

    while(True):
        #loc.first.click()
        loc = page.locator(".ag-icon.ag-icon-expanded")
        print(loc.count())
        loc.first.click(timeout=5000)
        page.wait_for_timeout(2000)


except Exception as err:
    print(err)
finally:
    print("finished")`


My code only expands part of the nodes. How should I fix the code? Thank you.


Solution

  • Sometimes I try to do some scripts, but being honest, this was one of the most harder ones. It has been a real challenge.

    I think it is finished.

    # Import needed libs
    import time
    from playwright.sync_api import sync_playwright
    import datetime
    
    
    # We save the time when script starts
    init = datetime.datetime.now()
    print(f"{datetime.datetime.now()} - Script starts")
    
    # We initiate the playwright page
    p = sync_playwright().start()
    browser = p.chromium.launch(headless=False)
    context = browser.new_context()
    page = context.new_page()
    
    # Navigate to Keepa and login
    page.goto("https://keepa.com/#!categorytree")
    page.click("text=Log in / register now to subscribe")
    page.fill("#username", "tylrr123@outlook.com")
    page.fill("#password", "adnCgL@f$krY9Q9")
    page.click("#submitLogin", delay=200)
    # We wait for the selector of the profile user, that means that we are already logged in
    page.wait_for_selector("#panelUsername")
    
    # Navigate to the categorytree url
    page.goto("https://keepa.com/#!categorytree")
    time.sleep(1)
    
    
    #This function try to click on the arrow for expanding an subtree
    def try_click():
        # We save the number of elements that are closed trees
        count = page.locator(f"//span[@class='ag-group-contracted']").count()
        # We iterate the number of elements we had
        for i in range(0, count):
            # If the last element is visible, then we go inside the "if" statement. Why the last element instead of the first one? Because I don't know why the last element is usually the frist one...Keepa things, don't ask
            if page.locator(f"(//span[@class='ag-group-contracted'])[{count-i}]").is_visible():
                # Element was visible, so we try to click on it (Expand it). I wrapped the click inside a try/except block because sometimes playwright says that click failed, but actually does not fail and element is clicked. I don't know why
                try:
                    # Clicking the element
                    page.click(f"(//span[@class='ag-group-contracted'])[{count-i}]", timeout=200)
                    print(f"Clicking Correct {count-i}. Wheel up")
                    # If element is clicked, we do scroll up, and we return true
                    page.mouse.wheel(0, -500)
                    return True
                except:
                    # As I said, sometimes click fails but is actually clicked, so we return also true. The only way of returning False is if the elements are not visible
                    print(f"Error Clicking {count-i} but probably was clicked")
                    return True
    
    # This function basically checks that there are closed trees
    def there_is_still_closed_trees():
        try:
            page.wait_for_selector(selector=f"//span[@class='ag-group-contracted']", state='attached')
            return True
        except:
            print("No more trees closed")
            return False
    
    # When we navigated to categorytree page a pop up appears, and you have to move the mouse to make it disappear, so I move the mouse and I keep it on the list, because later we will need to do scroll up and scroll down over the list
    page.mouse.move(400, 1000)
    page.mouse.move(400, 400)
    # Var to count how many times we made scroll down
    wheel_down_times = 0
    
    # We will do this loop until there are not more closed trees
    while there_is_still_closed_trees():
        # If we could not make click (The closed trees were not visibles in the page) we will do scroll down to find them out
        if not try_click():
            # We do scroll down, and we sum one to the scroll down counter
            print("Wheel down")
            page.mouse.wheel(0, 400)
            wheel_down_times = wheel_down_times + 1
            print(f"Wheel down times =  {wheel_down_times}")
            # Sometimes if we do a lot of scrolls, page can crash, so we sleep the script 10 secs every 100 scrolls
            if wheel_down_times % 100 == 0:
                print("Sleeping 10 secs in order to avoid page crashes")
                time.sleep(10)
            # This "if" checks that the latest element of the whole tree is visible and we did more than 5 scroll down. That means that we are at the end of the list and we forget some closed trees, so we do scroll up till we arrive at the top of the list and we will make scroll down trying to find the pending closed trees
            if page.locator(f"//span[text()='Walkthroughs & Tutorials']").is_visible() and wheel_down_times > 5:
                page.mouse.wheel(0, -5000000)
        else:
            print(f"Wheel down times from {wheel_down_times} to 0")
            wheel_down_times = 0
    
    # Script finishes and show a summary of time
    end = datetime.datetime.now()
    print(f"{datetime.datetime.now()} - Script finished")
    print(f"Script started at: {init}")
    print(f"Script ended at: {end}")
    print("There should not be any more closed trees")
    
    # This sleeps the script if you want to see the screen. But actually you can remove and page will be closed
    time.sleep(10000)
    

    The scripts takes almost 3 hours. I don't know how keepa has a so many categories. Awesome...