Search code examples
pythongoogle-chromeselenium-webdriverweb-scraping

Access Denied for selenium chrome webdriver


I am trying to scrape a website. I am using Selenium web driver and getting access denied. I tried the following :

  1. adding options and user agent.
  2. using undetected-chromedriver
  3. added sleep timer for avoiding bot detections

here is my code :

import time
from selenium import webdriver
from time import sleep
from csv import writer
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import uuid
import requests
import html5lib
import undetected_chromedriver as uc


# options = Options()

# user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
# options.add_argument("user-agent={0}".format(user_agent))
# # options.add_argument("--headless")
# options.add_argument("--disable-gpu")
# options.add_argument("--no-sandbox")
# options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_experimental_option("useAutomationExtension", False)

driver = uc.Chrome()

# driver = webdriver.Chrome(options=options)
# wait = WebDriverWait(driver, 20)
# action = ActionChains(driver)

base_url = "https://www.arrow.com/en/products/search?page="
base_xpath = "/html/body/div[1]/div[12]/div[2]/div/div[3]/div/div[1]/table/tbody"

for page_no in range(1, 90):
    print(page_no)
    url = base_url + str(page_no) + "&q=Computer%20on%20Module&r=true"
    print(url)
    driver.get(url)
    sleep(5)

This is the output I am getting

Thanks!


Solution

  • Try with this link:

    base_url = "https://www.arrow.com/en/products"