Using Beautifulsoup to scrap all information for all products. but my code just return first product information. Also when I add .text at the end of find element, it return noneType
options = ChromeOptions()
options.add_argument("headless")
driver=chrome(executable_path="/AppData/Local/Programs/Python/Driver/chromedriver_win32/chromedriver.exe")
driver.get("https://www.adiglobaldistribution.us/MyAccount/signin")# here change your link
driver.maximize_window()
time.sleep(5)
wait=WebDriverWait(driver,10)
data_adi = []
n=0
for n in range(12):
pages_url = f"https://www.adiglobaldistribution.us/search?page={n+1}&criteria=Tp-link%20Usa%20Corporation"
driver.get(pages_url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '[class="rd-thumb-details-price"]')))
time.sleep(5)
html = driver.page_source
soup = Soup(html)
for item in soup.select("div", class_='[class="rd-thumb-details-price"]'):
data_adi.append({
'title' : item.find("span", class_="rd-item-name"),
'name' : item.find("span", class_='item-num-mfg'),
'link' : item.find("div", class_="rd-item-name", href=True),
'price' : item.find("div", class_="pdp-price-wrapper"),
'stock' : item.find("span", class_="availabilityMessage-rd")
})
df_adi = pd.DataFrame(data_adi)
df_adi.drop_duplicates()
df_adi
Result
I tried completely with Selenium. It worked.
I did not include the last two items (price
and stock
) as they may need a login. I am not sure though, but you can mimic the locator as written in here for the rest of them (name
, title
, link
) and then take their text into the dictionary (table
) and then append it to the list.
data_adi = []
for i in range(12):
driver.get(f"https://www.adiglobaldistribution.us/search?page={i+1}&criteria=tp-link%20usa%20corporation")# here change your link
time.sleep(5)
wait=WebDriverWait(driver,10)
search_items = driver.find_elements(By.CSS_SELECTOR, "[class='rd-thumb-details-price']")
print(len(search_items))
for each_item in search_items:
item_title = each_item.find_element(By.CSS_SELECTOR, "div[class='rd-item-name'] span").text
item_name = each_item.find_element(By.CSS_SELECTOR, "span[class='item-num-mfg']").text
item_link = each_item.find_element(By.CSS_SELECTOR, "div[class='rd-item-name'] a").get_attribute('href')
table = {"title": item_title, "name": item_name, "link": item_link}
data_adi.append(table)
df_adi = pd.DataFrame(data_adi)
df_adi.drop_duplicates()
print(df_adi)
Output:
title ... link
0 TP-Link TL-SG1005P 5-Port Gigabit Desktop Swit... ... https://www.adiglobaldistribution.us/Product/F...
1 TP-Link TL-POE10R Gigabit POE Splitter ... https://www.adiglobaldistribution.us/Product/F...
2 TP-Link TL-POE160S PoE+ Injector ... https://www.adiglobaldistribution.us/Product/F...
3 TP-Link TL-SG101616-Port Gigabit Desktop/Rackm... ... https://www.adiglobaldistribution.us/Product/F...
4 TP-Link RE220 AC750 WiFi Range Extender ... https://www.adiglobaldistribution.us/Product/F...
.. ... ... ...
137 TP-Link TL-WA850RE 300Mbps Universal Wi-Fi Ran... ... https://www.adiglobaldistribution.us/Product/F...
138 TP-Link KL430E Kasa Smart Light Strip Extensio... ... https://www.adiglobaldistribution.us/Product/F...
139 TP-Link EAP245 V3 AC1750 Wireless Dual Band Gi... ... https://www.adiglobaldistribution.us/Product/F...
140 T-Link Archer T2E AC600 Wireless Dual Band PCI... ... https://www.adiglobaldistribution.us/Product/F...
141 TP-Link TL-WR940N 450Mbps Wireless N Router wi... ... https://www.adiglobaldistribution.us/Product/F...
[142 rows x 3 columns]