python selenium-webdriver computer-vision ocr captcha

Selenium using ia to bypass canvas Captcha

I'm trying to write a web scraping script to create an email address automatically without having to do it manually. Since I have some knowledge of web scraping, I thought it would be simple, but in reality, it’s not.

Once on the website, after completing the email and password creation steps, I encounter a CAPTCHA. I decided to create an AI to solve it, but to my surprise, I’m stuck and need help.

Theoretically, I expected two separate HTML elements to differentiate the puzzle piece from the background, but that’s not the case: //[@id="root"]/div/div/div/div/div/div[1] | //[@id="root"]/div/div/div/div/div/div[1]/canvas.

I’ve run out of ideas since I’ve never built an AI before, and my knowledge has reached its limit. That’s why I’m asking for your help to explain/show me how to do it.

Thank you for everything! (Here is my code & link to the website) https://account.proton.me/mail/signup?plan=free&ref=mail_plus_intro-mailpricing-2

import os
import time
import random
import string
import cv2
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

def get_coordinates(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    piece_coords = None
    hole_coords = None
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        area = cv2.contourArea(cnt)

        if 500 < area < 5000 and 40 < w < 100 and 40 < h < 100:
            if piece_coords is None:
                piece_coords = (x, y, w, h)
            else:
                hole_coords = (x, y, w, h)

    return piece_coords, hole_coords
def move_piece_on_canvas(driver, canvas_element, piece_coords, hole_coords):
    piece_x, piece_y, piece_w, piece_h = piece_coords
    hole_x, hole_y, hole_w, hole_h = hole_coords

    canvas_rect = driver.execute_script("""
        var rect = arguments[0].getBoundingClientRect();
        return {left: rect.left, top: rect.top};
    """, canvas_element)

    canvas_x = canvas_rect["left"]
    canvas_y = canvas_rect["top"]

    start_x = canvas_x + piece_x + (piece_w // 2)
    start_y = canvas_y + piece_y + (piece_h // 2)
    end_x = canvas_x + hole_x + (hole_w // 2)
    end_y = canvas_y + hole_y + (hole_h // 2)

    print(f"Déplacement de ({start_x}, {start_y}) à ({end_x}, {end_y})")

    # Nouvelle méthode avec dragEvent
    driver.execute_script("""
        function simulateDrag(element, startX, startY, endX, endY) {
            var eventStart = new DragEvent('dragstart', {
                bubbles: true,
                cancelable: true
            });
            var eventDrag = new DragEvent('drag', {
                bubbles: true,
                cancelable: true
            });
            var eventDrop = new DragEvent('drop', {
                bubbles: true,
                cancelable: true
            });
            var eventEnd = new DragEvent('dragend', {
                bubbles: true,
                cancelable: true
            });

            element.dispatchEvent(eventStart);
            element.dispatchEvent(eventDrag);
            element.dispatchEvent(eventDrop);
            element.dispatchEvent(eventEnd);
        }

        simulateDrag(arguments[0], arguments[1], arguments[2], arguments[3], arguments[4]);
    """, canvas_element, start_x, start_y, end_x, end_y)
    print("DragEvent simulé avec succès !")

def move_piece_using_coordinates(actions, piece_element, piece_coords, hole_coords):
    piece_x, piece_y, piece_w, piece_h = piece_coords
    hole_x, hole_y, hole_w, hole_h = hole_coords
    if not piece_element.is_displayed():
        print("L'élément de la pièce n'est pas visible.")
        return
    print(f"Coordonnées de la pièce : {piece_coords}")
    print(f"Coordonnées du trou : {hole_coords}")
    actions.drag_and_drop(piece_element).click().perform()
    print("Clic effectué sur la pièce.")
    time.sleep(0.5)
    delta_x = hole_x - piece_x
    delta_y = hole_y - piece_y
    actions.drag_and_drop(piece_element).perform()
    time.sleep(0.1)
    actions.move_by_offset(delta_x // 2, delta_y // 2).perform()
    time.sleep(0.1)
    actions.move_by_offset(delta_x // 2, delta_y // 2).perform()
    time.sleep(0.1)
    actions.release().perform()
    print("Pièce relâchée.")
    time.sleep(1)
    print(f"Pièce déplacée vers le trou à ({hole_x}, {hole_y}) !")


async def gmail():
    os.environ['WDM_LOG_LEVEL'] = '0'
    option = webdriver.ChromeOptions()
    #option.add_argument('--headless')
    option.add_argument('--log-level=3')
    option.add_argument("--disable-blink-features=AutomationControlled")
    option.add_experimental_option("excludeSwitches", ["enable-automation"])
    driver = webdriver.Chrome(options=option)
    wait = WebDriverWait(driver, 160)
    time.sleep(0.5)
    driver.get("https://account.proton.me/mail/signup?plan=free&ref=mail_plus_intro-mailpricing-2")

    mail_length = random.randint(10, 20)
    mdp_length = random.randint(13, 15)
    alphabet = string.ascii_letters + string.digits
    alphabet1 = string.ascii_letters + string.digits + string.punctuation
    mail_base = ''.join(random.choice(alphabet) for _ in range(mail_length))
    mdp = ''.join(random.choice(alphabet1) for _ in range(mdp_length))
    print(f"Mail base: {mail_base}@proton.me | Password: {mdp}")

    # ------------------------------Fenetre pour creation au compte proton----------------------------------
    iframe = wait.until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[4]/div[1]/main/div[1]/div[2]/form/iframe")))
    driver.switch_to.frame(iframe)
    email_input = wait.until(EC.element_to_be_clickable((By.ID, "email"))).send_keys(mail_base)
    driver.switch_to.default_content()
    time.sleep(0.5)
    email_input = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="password"]'))).send_keys(mdp)
    time.sleep(0.5)
    email_input = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="repeat-password"]'))).send_keys(mdp)
    create_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[@type="submit"]'))).click()

    # ------------------------------IA pour captcha --------------------------------------
    iframe = wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, '//*[@id="key_0"]/iframe')))
    iframe = wait.until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, '/html/body/iframe')))
    print("Iframe trouvé !")

    captcha_element = wait.until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div/div/div/div/div[1]/canvas')))
    with open(f'image_temps.png', 'wb') as file:
        file.write(captcha_element.screenshot_as_png)
    print(f"CAPTCHA screenshot fait, résolution du captcha !")

    piece_coords, hole_coords = get_coordinates("image_temps.png")
    if piece_coords and hole_coords:
        piece_x, piece_y, piece_w, piece_h = piece_coords
        hole_x, hole_y, hole_w, hole_h = hole_coords
        delta_x = hole_x - piece_x
        delta_y = hole_y - piece_y
        image = cv2.imread("image_temps.png")
        cv2.rectangle(image, (piece_x, piece_y), (piece_x + piece_w, piece_y + piece_h), (255, 0, 0),2)
        cv2.rectangle(image, (hole_x, hole_y), (hole_x + hole_w, hole_y + hole_h), (0, 255, 0), 2)

        hole_center = (piece_x + piece_w // 2, piece_y + piece_h // 2)
        piece_center = (hole_x + hole_w // 2, hole_y + hole_h // 2)
        cv2.arrowedLine(image, piece_center, hole_center, (0, 0, 255), 3)

        output_path = "image_annotated.png"
        cv2.imwrite(output_path, image)
        print(f"Image annotée sauvegardée sous {output_path}")
        piece_coords, hole_coords = get_coordinates("image_temps.png")

        if piece_coords and hole_coords:
            canvas_element = wait.until(
                EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div/div/div/div/div[1]/canvas')))
            if canvas_element:
                print("Canvas détecté !")
            else:
                print("Erreur : Canvas non trouvé !")
            piece_element = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div/div/div/div/div[1]/canvas')))

            actions = ActionChains(driver)
            move_piece_on_canvas(driver, piece_element, piece_coords, hole_coords)
            piece_position_after = driver.execute_script("return arguments[0].getBoundingClientRect();", piece_element)
            print(f"Position après déplacement : {piece_position_after}")
            file_path1 = 'image_temps.png'
            file_path2 = 'image_annotated.png'
            os.remove(file_path1)
            os.remove(file_path2)

        else:
            print("Erreur : Impossible de détecter la pièce ou le trou.")
            await gmail()
    else:
        if not os.path.exists('image_annotated.png'):
            print("Erreur lors de la creation de l'image_annotated, recommencement...")
            os.remove('image_temps.png')
            time.sleep(2.5)
            driver.quit()
            await gmail()
    time.sleep(999)

Solution

I see that you can verify using an email also. Would it not be more viable to set up a separate gmail and flow which could be used for such verification?
The captcha can be solved by trial and error, because once you release the piece it does not assume that that was your solution. Just move the piece by 1-2pixels until you either solve it or test out all of the locations (depending on how leniant the correct result is)
I tried to navigate to another captcha for visually impared people and it skipped the captcha altogether. I think it was a fluke but you should try it out. Added the updated code from the gmail function.

            canvas_element = wait.until(
                EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div/div/div/div/div[1]/canvas')))
            if canvas_element:
                print("Canvas détecté !")
            else:
                print("Erreur : Canvas non trouvé !")
            piece_element = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="root"]/div/div/div/div/div/div[1]/canvas')))

            actions = ActionChains(driver)
            move_piece_on_canvas(driver, piece_element, piece_coords, hole_coords)
            piece_position_after = driver.execute_script("return arguments[0].getBoundingClientRect();", piece_element)
            print(f"Position après déplacement : {piece_position_after}")
            file_path1 = 'image_temps.png'
            file_path2 = 'image_annotated.png'
            os.remove(file_path1)
            os.remove(file_path2)
            time.sleep(2)
            next = driver.find_element(By.CSS_SELECTOR,'#root > div > div > div > div > div > div:nth-child(2) > button.btn.rounded-full.btn-small.btn-solid-purple.btn-full-width')
            next.click()
            time.sleep(5)
            tr = driver.find_element(By.CSS_SELECTOR,'#root > div > div > div > button.btn.btn-small.rounded-full.btn-outline-grey.btn-full-width')
            tr.click()
            visually_impaired_link = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//div[@class='visuallyImpaired']//a"))
            )
            visually_impaired_link.click()