I tried to extract product data of all the products (name and price) from a Lazada shop. However, There are total 102 pages but I only able to extract the first page of the data. Could anyone recognize the problem of my code?
url: https://www.lazada.com.my/guardian/?from=wangpu&langFlag=en&page=1&pageTypeId=2&q=All-Products
Below are my coding
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
class ScrapeLazada():
def scrape(self):
url = 'https://www.lazada.com.my/guardian/?from=wangpu&langFlag=en&page=1&pageTypeId=2&q=All-Products'
driver = webdriver.Chrome()
driver.get(url)
products=[]
for i in range(102):
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#root")))
time.sleep(2)
soup = BeautifulSoup(driver.page_source, "html.parser")
for item in soup.findAll('div', class_='Bm3ON'):
product_name = item.find('div', class_='RfADt').text
price = item.find('span', class_='ooOxS').text.replace('RM', '')
products.append(
(product_name, price)
)
time.sleep(2)
driver.find_element(By.CSS_SELECTOR, ".ant-pagination-next > button").click()
time.sleep(3)
df = pd.DataFrame(products, columns=['Product Name', 'Price'])
print(df)
df.to_excel('Lazada_Guardian_Scrape.xlsx', index=False)
print('Data saved in local disk')
driver.close()
sl = ScrapeLazada()
sl.scrape()
Below are my outcome. Apparently it just listed the first page products but have problem to list starting from the next page.
Product Name Price
0 UPHAMOL 250 Children Suspension Delicious Oran... 7.80
1 Darlie Double Action Fresh + Clean Toothpaste ... 20.92
2 Dermal Therapy Lip Balm 10g 12.78
3 Nurish No Teen Anti Acne Toner 100Ml 12.82
4 Live-Well OCCUsharp 30s Pack-of-3 100.90
5 Oxy Anti- Blackhead Wash 100g 11.95
6 Guardian Clear Assorted Plasters 20s 1.55
7 Selsun Blue 2 in 1 Treatment Shampoo 120ml 24.66
8 Hansaplast Disney Frozen II 20's 8.90
9 Guardian Wet Wipes 10's Fragrance Free 3.46
10 Fruiser Shower Cream Pump Rosemilk 1000ml 8.10
11 Enchanteur Wonder Woman Handbag Edt Fighter Of... 9.90
12 Guardian Plastic Plasters 100s + 20s 9.10
13 Sensodyne Fresh Mint 100g 12.50
14 Pantene Hair Fall Control Conditioner 165ML 10.89
15 Koolfever Cooling Gel For Babies 4s 8.60
16 Hada Labo Premium Whitening Essence 30g 85.90
17 Kinohimitsu J'pan Health Pad 10's + 10's 67.03
18 Ceradan Moisturising Hand Sanitiser 50ml 26.74
19 Sunsweet Pitted Prune 340g (USA) 23.20
20 **21st Century Probiotics 30s 17.00
21 Kundal Honey and Macadamia Hair Treatment Pear... 27.22
22 Sunsilk Super Conditioner Damage Rescue 180ml 11.28
23 LACTOGG probiotic capsules 30's 125.10
24 Rosken Bio Serum 50ml 28.82
25 Simple Kind To Skin Soothing Toner 200ml 21.34
26 L’Oreal White Perfect Toner 200ml 30.25
27 Total Image S Tummy 60s 63.00
28 Durex Invisible Extra Lubricant Condom 10's 51.13
29 3 Legs Tolnaftate Cream Pack Of 2 (2X10g) 14.66
30 Hansaplast Universal Water Resistant 20's 4.20
31 Perfume Generics Perfume Oil Paris Hilton 10Ml 8.90
32 Aiken Shampoo - Intense Repair 350G 12.68
33 GoodMorning VGrains 1kg 62.52
34 Woodwards Gripe Water 148ml 14.00
35 Difflam Hextra Sore Throat Lozenges 2.4mg 8.00
36 Okamoto 003 Cool 3's 14.50
37 Dettol Hand Sanitizer Refresh 50ml 6.25
38 Avene Pre-Serum Hydrating Essence-In-Lotion 200Ml 87.30
39 Guardian Essential Lavender Refreshing Body Wa... 10.10
Traceback (most recent call last):
File "Lazada_Guardian.py", line 43, in <module>
sl.scrape()
File "Lazada_Guardian.py", line 30, in scrape
driver.find_element(By.CSS_SELECTOR, ".ant-pagination-next > button").click()
File "/Users/chingkarlok/opt/anaconda3/lib/python3.8/site-packages/selenium/webdriver/remote/webelement.py", line 94, in click
self._execute(Command.CLICK_ELEMENT)
File "/Users/chingkarlok/opt/anaconda3/lib/python3.8/site-packages/selenium/webdriver/remote/webelement.py", line 403, in _execute
return self._parent.execute(command, params)
File "/Users/chingkarlok/opt/anaconda3/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 440, in execute
self.error_handler.check_response(response)
File "/Users/chingkarlok/opt/anaconda3/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 245, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementClickInterceptedException: Message: element click intercepted: Element <button class="ant-pagination-item-link" type="button" tabindex="-1">...</button> is not clickable at point (1186, 693). Other element would receive the click: <html lang="en" class=" ">...</html>
(Session info: chrome=112.0.5615.137)
Stacktrace:
0 chromedriver 0x000000010295d670 chromedriver + 4298352
1 chromedriver 0x0000000102955bbc chromedriver + 4266940
2 chromedriver 0x0000000102588758 chromedriver + 280408
3 chromedriver 0x00000001025cb444 chromedriver + 554052
4 chromedriver 0x00000001025c8e84 chromedriver + 544388
5 chromedriver 0x00000001025c663c chromedriver + 534076
6 chromedriver 0x00000001025c5530 chromedriver + 529712
7 chromedriver 0x00000001025b8428 chromedriver + 476200
8 chromedriver 0x00000001025b7b90 chromedriver + 474000
9 chromedriver 0x00000001025fc080 chromedriver + 753792
10 chromedriver 0x00000001025b62d0 chromedriver + 467664
11 chromedriver 0x00000001025b7354 chromedriver + 471892
12 chromedriver 0x000000010291d6c4 chromedriver + 4036292
13 chromedriver 0x0000000102921c64 chromedriver + 4054116
14 chromedriver 0x00000001029282d8 chromedriver + 4080344
15 chromedriver 0x0000000102922970 chromedriver + 4057456
16 chromedriver 0x00000001028f98dc chromedriver + 3889372
17 chromedriver 0x000000010294125c chromedriver + 4182620
18 chromedriver 0x00000001029413b4 chromedriver + 4182964
19 chromedriver 0x00000001029500f4 chromedriver + 4243700
20 libsystem_pthread.dylib 0x00000001a0e2e06c _pthread_start + 148
21 libsystem_pthread.dylib 0x00000001a0e28e2c thread_start + 8
Can solve this issue in two ways.
# Imports required
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
actions = ActionChains(driver)
wait = WebDriverWait(driver,30)
products_list = []
for i in range(102):
nextbutton = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,".ant-pagination-next > button")))
actions.move_to_element(nextbutton).click().perform()
time.sleep(2)
page
value in the URL and iterate over and no need to perform click action on next button.for i in range(1,102):
driver.get(f"https://www.lazada.com.my/guardian/?from=wangpu&langFlag=en&page={i}&pageTypeId=2&q=All-Products")
...
But for scraping can use Request library from Python. Refer Web scraping with API to find API and extract data.
import requests
import json
for i in range(1,5):
url = f"https://www.lazada.com.my/guardian/?ajax=true&from=wangpu&isFirstRequest=true&langFlag=en&page={i}&pageTypeId=2&q=All-Products"
response = requests.get(url)
data = response.json()["mods"]["listItems"]
for j in range(len(data)):
name = data[j]["name"]
price = data[j]["priceShow"]
print(f"{name} : {price}")
Output:
PHAMOL 250 Children Suspension Delicious Orange Flavour 60ml : RM7.80
Darlie Double Action Fresh + Clean Toothpaste Original Strong Mint 225g x 2 (Value Pack) : RM20.92
Dermal Therapy Lip Balm 10g : RM12.78
Nurish No Teen Anti Acne Toner 100Ml : RM12.82
Live-Well OCCUsharp 30s Pack-of-3 : RM100.90
...