How to scrape all customer reviews?

I am trying to scrape all reviews in this website - https://www.backmarket.com/en-us/r/l/airpods/345c3c05-8a7b-4d4d-ac21-518b12a0ec17. The website says there are 753 reviews, but when I try to scrape all reviews, I get only 10 reviews. So, I am not sure how to scrape all 753 reviews from the page, Here is my code-

# importing modules 
import pandas as pd
from requests import get
from bs4 import BeautifulSoup

# Fetch the web page
url = 'https://www.backmarket.com/en-us/r/l/airpods/345c3c05-8a7b-4d4d-ac21-518b12a0ec17'
response = get(url) # link exlcudes posts with no picures
page = response.text

# Parse the HTML content
soup = BeautifulSoup(page, 'html.parser')

# To see different information
## reviewer's name 

reviewers_name = soup.find_all('p', class_='body-1-bold') 
[x.text for x in reviewers_name]

name = []

for items in reviewers_name:
    name.append(items.text if items else None)

## Purchase Data 

purchase_date = soup.find_all('p', class_='text-static-default-low body-2')
[x.text for x in purchase_date]

date = []
for items in purchase_date:
    date.append(items.text if items else None)


## Country 

country_text = soup.find_all('p', class_='text-static-default-low body-2 mt-32')
[x.text for x in country_text]

country = []

for items in country_text:
    country.append(items.text if items else None)


## Reviewed Products 

products_text = soup.find_all('span', class_= 'rounded-xs inline-block max-w-full truncate body-2-bold px-4 py-0 bg-static-default-mid text-static-default-hi')
[x.text for x in products_text]

products = []

for items in products_text:
    products.append(items.text if items else None)

## Actual Reviews 

review_text = soup.find_all('p',class_='body-1 block whitespace-pre-line')
[x.text for x in review_text]

review = []

for items in review_text:
    review.append(items.text if items else None)


## Review Ratings 

review_ratings_value = soup.find_all('span',class_='ml-4 mt-1 md:mt-2 body-2-bold')
[x.text for x in review_ratings_value]

review_ratings = []

for items in review_ratings_value:
    review_ratings.append(items.text if items else None)



# Create the Data Frame 
pd.DataFrame({
    'reviewers_name': name,
    'purchase_date': date,
    'country': country,
    'products': products,
    'review': review,
    'review_ratings': review_ratings
})

My question is how I can scrape all reviews.

Solution

based on your expectations, I think using the requests library and a little bit of code can fetch your desired result, here is my mindmap:

we can use this https://www.backmarket.com/reviews/product-landings/345c3c05-8a7b-4d4d-ac21-518b12a0ec17/products/reviews API endpoint to fetch all of your expected information related to reviews. (I guess UUID in the URLs is your product ID, correct me if I'm wrong)

Note: The site has rate limit protection in so I used time.sleep(5) to minimize thread

Here is my code:

import time
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

def get_data(content):
    result = content['results']
    for i in result:
        first_name = i['customer']['firstName']
        last_name = i['customer']['lastName']
        name = f"{first_name} {last_name}"
        rating = i['averageRate']
        review = i['comment']
        date = i['createdAt']
        prod = i['product']['title']
        prod_img = i['product']['imageUrl']
        country = i['countryCode']
        
        
        print(f"reviewers_name:  {name}\npurchase_date:  {date}\ncountry:  {country}\nproducts:-----------\nproduct_name:  {prod}\nproduct_img:  {prod_img}\n---------------------\nreview:  {review}\nreview_ratings:  {rating}\n============================")

def gather_cursor(url):
    n, cursor_and_url = 1, url
    while True:
        time.sleep(3)
        response = requests.get(cursor_and_url, verify=False)
        data = response.json()
        get_data(data)
        cursor = data['nextCursor']
        if not cursor:
            break

        cursor_and_url = f"{url}?cursor={cursor}"


gather_cursor("https://www.backmarket.com/reviews/product-landings/345c3c05-8a7b-4d4d-ac21-518b12a0ec17/products/reviews")

Hope this will help.