I am trying to scrape a website. I am using Selenium web driver and getting access denied. I tried the following :
here is my code :
import time
from selenium import webdriver
from time import sleep
from csv import writer
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import uuid
import requests
import html5lib
import undetected_chromedriver as uc
# options = Options()
# user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
# options.add_argument("user-agent={0}".format(user_agent))
# # options.add_argument("--headless")
# options.add_argument("--disable-gpu")
# options.add_argument("--no-sandbox")
# options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_experimental_option("useAutomationExtension", False)
driver = uc.Chrome()
# driver = webdriver.Chrome(options=options)
# wait = WebDriverWait(driver, 20)
# action = ActionChains(driver)
base_url = "https://www.arrow.com/en/products/search?page="
base_xpath = "/html/body/div[1]/div[12]/div[2]/div/div[3]/div/div[1]/table/tbody"
for page_no in range(1, 90):
print(page_no)
url = base_url + str(page_no) + "&q=Computer%20on%20Module&r=true"
print(url)
driver.get(url)
sleep(5)
This is the output I am getting
Thanks!
Try with this link:
base_url = "https://www.arrow.com/en/products"