Search code examples
pythonselenium-webdriverweb-scraping

Trouble identifying the "Connect" art deco button element in Linkedin for Scraping via Selenium


def send_linkedin_requests(speakers):
    """Send LinkedIn connection requests to scraped speakers."""
    driver = None
    try:
        print("\nStarting LinkedIn connection process...")
        driver = create_chrome_driver()
        driver.get("https://www.linkedin.com/login")

        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.ID, "username"))).send_keys(LINKEDIN_EMAIL)
        driver.find_element(By.ID, "password").send_keys(LINKEDIN_PASSWORD)
        driver.find_element(By.XPATH, "//button[@type='submit']").click()
        time.sleep(5)

        for speaker in speakers:
            try:
                speaker_name = normalize_name(speaker["name"])
                print(f"\nSearching for {speaker['name']}...")
                
                names = speaker_name.split()
                if len(names) < 2:
                    print(f"Skipping {speaker['name']} - insufficient name information")
                    continue

                first_name, last_name = names[:2]
                search_query = f"https://www.linkedin.com/search/results/people/?keywords={first_name}%20{last_name}"
                driver.get(search_query)
                time.sleep(5)

                # Wait for and print number of results if shown
                try:
                    results_count = driver.find_element(By.CSS_SELECTOR, ".search-results-container h2").text
                    print(f"LinkedIn shows: {results_count}")
                except:
                    pass

                try:
                    # Try multiple selectors to find search results
                    selectors = [
                        "div.search-results-container ul.reusable-search__entity-result-list",
                        "div.search-results-container div.mb3",
                        ".search-results-container li.reusable-search__result-container",
                        ".entity-result__item"
                    ]
                    
                    search_results = []
                    for selector in selectors:
                        try:
                            results = driver.find_elements(By.CSS_SELECTOR, selector)
                            if results:
                                search_results = results
                                print(f"Found {len(results)} results using selector: {selector}")
                                break
                        except:
                            continue

                    if not search_results:
                        print("No search results found using any selector")
                        continue

                    print(f"Processing {len(search_results)} results...")
                    matches_found = 0
                    
                    for result in search_results[:5]:
                        try:
                            # Try multiple selectors for the name
                            name_selectors = [
                                ".entity-result__title-text span[aria-hidden='true']",
                                ".entity-result__title-text",
                                "span.actor-name",
                                ".app-aware-link span"
                            ]
                            
                            profile_name = None
                            for selector in name_selectors:
                                try:
                                    name_element = result.find_element(By.CSS_SELECTOR, selector)
                                    profile_name = normalize_name(name_element.text.strip())
                                    if profile_name:
                                        break
                                except:
                                    continue

                            if not profile_name:
                                print("Could not find name in result, skipping...")
                                continue

                            print(f"\nFound profile: {profile_name}")
                            print(f"Looking for: {speaker_name}")

                            # Check for name match
                            if first_name in profile_name and last_name in profile_name:
                                print("Name match found!")
                                
                                # Look for connect button
                                connect_button = None
                                button_selectors = [
                                    "button.artdeco-button--secondary",
                                    "button.artdeco-button[aria-label*='Connect']",
                                    "button.artdeco-button[aria-label*='Invite']"
                                ]

                                for selector in button_selectors:
                                    try:
                                        buttons = result.find_elements(By.CSS_SELECTOR, selector)
                                        for button in buttons:
                                            if 'connect' in button.text.lower():
                                                connect_button = button
                                                break
                                    except:
                                        continue

                                if connect_button:
                                    print("Found Connect button")
                                    if input(f"Send connection request? (yes/no): ").strip().lower() == "yes":
                                        driver.execute_script("arguments[0].click();", connect_button)
                                        time.sleep(2)

                                        note = (
                                            f"{first_name.title()}, hope our paths cross soon! At Kintsugi, we're developing novel voice biomarker AI to screen "
                                            "clinical depression and anxiety from 20 seconds of free-form speech. We were recently featured in Forbes AI 50 and Fierce 15.\n\nWarmly,\nGrace"
                                        )
                                        print(f"\nDraft note:\n{note}")
                                        
                                        if input("Confirm sending note? (yes/no): ").strip().lower() == "yes":
                                            try:
                                                add_note_button = wait.until(EC.element_to_be_clickable((
                                                    By.XPATH, "//button[contains(text(), 'Add a note')]"
                                                )))
                                                driver.execute_script("arguments[0].click();", add_note_button)
                                                time.sleep(1)

                                                textarea = wait.until(EC.presence_of_element_located((
                                                    By.XPATH, "//textarea"
                                                )))
                                                textarea.send_keys(note)

                                                send_button = wait.until(EC.element_to_be_clickable((
                                                    By.XPATH, "//button[contains(text(), 'Send')]"
                                                )))
                                                driver.execute_script("arguments[0].click();", send_button)
                                                print(f"Connection request sent!")
                                                time.sleep(3)
                                            except Exception as e:
                                                print(f"Error sending connection request: {e}")
                                else:
                                    print("No Connect button found - may already be connected or have pending request")

                                matches_found += 1
                                if matches_found >= 3:
                                    break
                            else:
                                print("Name does not match, skipping...")

                        except Exception as e:
                            print(f"Error processing result: {e}")
                            continue

                except Exception as e:
                    print(f"Error processing search results: {e}")

            except Exception as e:
                print(f"Error processing {speaker['name']}: {e}")
                continue

    except Exception as e:
        print(f"Error in LinkedIn connection process: {e}")
    
    finally:
        if driver:
            driver.quit()

The above function is used to connect to a list of speakers on Linkedin using Search People and the "Connect" button selector. However, individuals though they show up in the search results, do not get identified and returns "No initial results."

I've added multiple selectors for finding names and the "Connect" button, better debugging, and better error handling; however, I'm still not able to match-identify the speakers with my list for followup.

Any thoughts on how to improve the capture of the match and Connect sequence? Thanks!


Solution

  • def send_linkedin_requests(speakers):
        """Send LinkedIn connection requests or follow speakers if no connect option exists."""
        driver = create_chrome_driver()
        try:
            print("\nStarting LinkedIn connection process...")
            driver.get("https://www.linkedin.com/login")
    
            # 1) Log into LinkedIn
            wait = WebDriverWait(driver, 20)
            try:
                # Enter credentials and log in
                wait.until(EC.presence_of_element_located((By.ID, "username"))).send_keys(LINKEDIN_EMAIL)
                driver.find_element(By.ID, "password").send_keys(LINKEDIN_PASSWORD)
                driver.find_element(By.XPATH, "//button[@type='submit']").click()
                print("Login submitted. Waiting for verification prompts if any.")
    
                # Wait up to 3 minutes (180 seconds) for manual CAPTCHA or security checks
                MAX_WAIT_TIME = 180  # 3 minutes
                POLL_INTERVAL = 5    # Check every 5 seconds
    
                elapsed_time = 0
                while elapsed_time < MAX_WAIT_TIME:
                    # Check if login is successful by verifying the presence of the search bar or LinkedIn homepage
                    try:
                        WebDriverWait(driver, 5).until(
                            EC.presence_of_element_located((By.XPATH, "//input[contains(@placeholder, 'Search')]"))
                        )
                        print("Verification completed! Proceeding with automation...")
                        break  # Exit the loop
                    except TimeoutException:
                        print(f"Waiting for verification... Elapsed time: {elapsed_time} seconds.")
                        time.sleep(POLL_INTERVAL)
                        elapsed_time += POLL_INTERVAL
                else:
                    print("Timeout: Verification not completed within 3 minutes.")
                    driver.quit()
                    sys.exit(1)
    
            except Exception as e:
                print(f"Failed to log in: {str(e)}")
                driver.quit()
                sys.exit(1)
    
            # 2) Iterate through scraped speakers
            for speaker in speakers:
                try:
                    # Split name to handle sentence-casing
                    name_parts = speaker['name'].split()
                    # If the scraped name has more than 2 parts, handle gracefully
                    if len(name_parts) >= 2:
                        first_name = name_parts[0].capitalize()
                        last_name = " ".join(part.capitalize() for part in name_parts[1:])
                        full_name = f"{first_name} {last_name}"
                    else:
                        # Fallback if there's only a single name or an unexpected format
                        full_name = speaker['name'].title()
    
                    search_url = f"https://www.linkedin.com/search/results/people/?keywords={full_name.replace(' ', '%20')}"
                    driver.get(search_url)
                    
                    # Wait for initial page load
                    time.sleep(8)  # Increased initial wait time
                    
                    # Scroll to trigger content loading
                    driver.execute_script("window.scrollTo(0, 300);")
                    time.sleep(2)
                    
                    # Try multiple approaches to find search results
                    found_results = False
                    
                    # First try: Wait for search results container
                    try:
                        print(f"\nLooking for search results for {full_name}...")
                        
                        # Wait for any of these elements to appear
                        for selector in [
                            "div.search-results-container",
                            "div.search-results__cluster-content",
                            "ul.reusable-search__entity-result-list",
                            "div.entity-result__item",
                        ]:
                            try:
                                print(f"Trying selector: {selector}")
                                element = WebDriverWait(driver, 5).until(
                                    EC.presence_of_element_located((By.CSS_SELECTOR, selector))
                                )
                                print(f"Found results container with selector: {selector}")
                                found_results = True
                                break
                            except TimeoutException:
                                continue
                        
                        if not found_results:
                            print(f"Could not find results container for {full_name}")
                            # Take screenshot for debugging
                            screenshot_name = f"no_results_{speaker['name'].replace(' ', '_')}_{int(time.time())}.png"
                            driver.save_screenshot(screenshot_name)
                            continue
                        
                        # Try to find connect buttons
                        buttons = driver.find_elements(By.CSS_SELECTOR, 
                            "button.artdeco-button--2.artdeco-button--secondary"
                        )
                        
                        if not buttons:
                            print("No connection buttons found, trying alternative selectors...")
                            buttons = driver.find_elements(By.CSS_SELECTOR, 
                                "button.artdeco-button"
                            )
                        
                        if buttons:
                            print(f"Found {len(buttons)} potential connection buttons")
                            for button in buttons:
                                try:
                                    button_text = button.text.strip().lower()
                                    aria_label = button.get_attribute('aria-label') or ''
                                    
                                    if 'connect' in button_text or 'connect' in aria_label.lower():
                                        print(f"Found Connect button for {first_name}")
                                        driver.execute_script("arguments[0].scrollIntoView(true);", button)
                                        time.sleep(1)
                                        driver.execute_script("arguments[0].click();", button)
                                        
                                        # Handle the "Add a note" modal
                                        try:
                                            modal = WebDriverWait(driver, 10).until(
                                                EC.presence_of_element_located((
                                                    By.XPATH,
                                                    "//div[contains(@class, 'artdeco-modal') and contains(@class, 'send-invite')]"
                                                ))
                                            )
                                            print("Modal appeared (Add a note).")
    
                                            # Click "Add a note"
                                            add_note_button = WebDriverWait(modal, 5).until(
                                                EC.element_to_be_clickable((
                                                    By.XPATH, ".//button[@aria-label='Add a note']"
                                                ))
                                            )
                                            add_note_button.click()
                                            time.sleep(2)
                                            print("Clicked 'Add a note' button.")
    
                                            # Type note in textarea
                                            note = (
                                                f"{first_name}, hope our paths cross soon! At Kintsugi, we're developing novel voice biomarker AI to screen "
                                "clinical depression and anxiety from 20 seconds of free-form speech. We were recently featured in Forbes AI 50 and Fierce 15.\n\nWarmly,\nGrace"
                                            )
                                            textarea = WebDriverWait(modal, 5).until(
                                                EC.presence_of_element_located((By.ID, "custom-message"))
                                            )
                                            textarea.send_keys(note)
                                            print("Typed note.")
    
                                            # Send the invite
                                            time.sleep(1)
                                            send_button = WebDriverWait(modal, 5).until(
                                                EC.element_to_be_clickable((
                                                    By.XPATH, ".//button[@aria-label='Send invitation']"
                                                ))
                                            )
                                            driver.execute_script("arguments[0].click();", send_button)
                                            print(f"Sent connection request with a note to {first_name}")
                                            time.sleep(2)
    
                                        except TimeoutException:
                                            print(f"No 'Add a note' modal for {first_name}")
                                        break
                                    
                                    elif 'follow' in button_text or 'follow' in aria_label.lower():
                                        print(f"Found Follow button for {first_name}")
                                        driver.execute_script("arguments[0].scrollIntoView(true);", button)
                                        time.sleep(1)
                                        driver.execute_script("arguments[0].click();", button)
                                        break
                                except Exception as e:
                                    print(f"Error processing button: {str(e)}")
                                    continue
                        
                        time.sleep(3)  # Wait between processing each person
                    
                    except Exception as e:
                        print(f"Error finding results for {speaker['name']}: {str(e)}")
                        traceback.print_exc()
                        continue
    
                    # After finding a result, verify the name
                    try:
                        result_name = WebDriverWait(driver, 5).until(
                            EC.presence_of_element_located((
                                By.CSS_SELECTOR, 
                                "span.entity-result__title-text"
                            ))
                        ).text.strip()
                        
                        # Normalize both names for comparison
                        normalized_search_name = normalize_name(full_name)
                        normalized_result_name = normalize_name(result_name)
                        
                        print(f"\nComparing names:")
                        print(f"Original speaker name: {full_name}")
                        print(f"Found profile name: {result_name}")
                        
                        if normalized_search_name != normalized_result_name:
                            proceed = input(f"\nNames don't match exactly. Proceed anyway? (yes/no): ").strip().lower()
                            if proceed != 'yes':
                                print("Skipping this profile...")
                                continue
                        
                        # ... existing button finding code ...
                        
                        if 'connect' in button_text or 'connect' in aria_label.lower():
                            print(f"Found Connect button for {result_name}")
                            
                            # Prepare connection note
                            note = (
                                f"{result_name.split()[0]}, hope our paths cross soon! At Kintsugi, we're developing novel voice biomarker AI to screen "
                                "clinical depression and anxiety from 20 seconds of free-form speech. We were recently featured in Forbes AI 50 and Fierce 15.\n\nWarmly,\nGrace"
                            )
                            
                            # Ask for review and confirmation
                            print("\nProposed connection note:")
                            print("-" * 50)
                            print(note)
                            print("-" * 50)
                            
                            proceed = input("\nSend this connection request? (yes/no/edit): ").strip().lower()
                            
                            if proceed == 'no':
                                print("Skipping this connection request...")
                                continue
                            elif proceed == 'edit':
                                note = input("\nPlease enter the revised note:\n")
                            
                            # Proceed with connection request
                            driver.execute_script("arguments[0].scrollIntoView(true);", button)
                            time.sleep(1)
                            driver.execute_script("arguments[0].click();", button)
                            
                            # Handle the "Add a note" modal
                            try:
                                modal = WebDriverWait(driver, 10).until(
                                    EC.presence_of_element_located((
                                        By.XPATH,
                                        "//div[contains(@class, 'artdeco-modal') and contains(@class, 'send-invite')]"
                                    ))
                                )
                                
                                # Click "Add a note"
                                add_note_button = WebDriverWait(modal, 5).until(
                                    EC.element_to_be_clickable((
                                        By.XPATH, ".//button[@aria-label='Add a note']"
                                    ))
                                )
                                add_note_button.click()
                                time.sleep(2)
                                
                                # Type note in textarea
                                textarea = WebDriverWait(modal, 5).until(
                                    EC.presence_of_element_located((By.ID, "custom-message"))
                                )
                                textarea.send_keys(note)
                                
                                # Final confirmation before sending
                                if input("\nReady to send? (yes/no): ").strip().lower() == 'yes':
                                    send_button = WebDriverWait(modal, 5).until(
                                        EC.element_to_be_clickable((
                                            By.XPATH, ".//button[@aria-label='Send invitation']"
                                        ))
                                    )
                                    driver.execute_script("arguments[0].click();", send_button)
                                    print(f"Sent connection request to {result_name}")
                                else:
                                    print("Cancelled sending connection request")
                                    # Close the modal
                                    close_button = modal.find_element(By.XPATH, ".//button[@aria-label='Dismiss']")
                                    close_button.click()
                                
                            except TimeoutException:
                                print(f"No 'Add a note' modal for {result_name}")
                            
                    except Exception as e:
                        print(f"Error verifying name or sending connection: {str(e)}")
                        continue
                    
                    time.sleep(3)  # Wait between processing each person
    
                except Exception as e:
                    print(f"Error processing {speaker['name']}: {str(e)}")
                    traceback.print_exc()
                    continue
    
        except Exception as e:
            print(f"Fatal error in send_linkedin_requests: {str(e)}")
            traceback.print_exc()
        finally:
            driver.quit()