I sometimes collect steam reviews using Steam Web API for personal use, and the following code, while having some redundant parts, can collect all reviews for a given game. Lately I have attempted collecting reviews for Apex Legends, however no matter what I do I get 1427 reviews. I have attempted many things such as removing the language filter, changing off topic activity filter, making the end date dynamic, however I cannot seem to collect all reviews.
After the cursor that yields 27 reviews (which is the first time I get anything but 100) next cursor is AoJwq+Kw1PUCf/CWrQI= and it yields 0 reviews and is also returned as the next cursor.
My code is as follows, is there any suggestions so I can collect all reviews, thank you in advance.
import requests
from datetime import datetime
import urllib.parse
import time
def fetch_game_reviews(appid, cursor='*', filter='recent', language='english'):
"""
Fetch game reviews for a specific Steam game.
Parameters:
- appid: Steam Application ID for the game.
- cursor: Cursor for pagination. Use '*' for the first page.
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- JSON response containing game reviews.
"""
if cursor != '*':
cursor = urllib.parse.quote(cursor)
url = f"https://store.steampowered.com/appreviews/{appid}?json=1&cursor={cursor}&filter={filter}&language={language}&num_per_page=100&filter_offtopic_activity=0"
response = requests.get(url)
return response.json()
def fetch_reviews_until_date(appid, end_date, filter='recent', language='all'):
"""
Fetch game reviews for a specific Steam game until a specific date.
Parameters:
- appid: Steam Application ID for the game.
- end_date: The end date for fetching reviews (YYYY-MM-DD format).
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- List of all reviews up until the specified end date.
"""
end_date = datetime.strptime(end_date, '%Y-%m-%d')
previous_cursor = None
all_reviews = []
cursors = []
cursor = '*'
while True:
response = fetch_game_reviews(appid, cursor=cursor, filter=filter, language=language)
collected_timestamp = int(time.time())
reviews = response['reviews']
new_cursor = response.get('cursor')
cursors.append(new_cursor)
if not reviews or (new_cursor == previous_cursor and cursor != '*'):
return all_reviews, cursors
for review in reviews:
review_date = datetime.fromtimestamp(review['timestamp_created'])
review['collected'] = collected_timestamp
review['appid'] = appid
if review_date < end_date:
return all_reviews
all_reviews.append(review)
print(len(all_reviews))
print(response['cursor'])
previous_cursor = cursor
cursor = new_cursor
return all_reviews, cursors
reviews, cursors = fetch_reviews_until_date('1172470', '2020-10-01')
Documentation for getreviews shows some other parameters - review_type
, purchase_type
, day_range
- and they are marked as Required
.
When I add these parameters (except day_range
) then code gives me much more reviews.
I stopped code at 25 000
reviews and I didn't check final number.
payload = {
'json': 1,
'cursor': cursor,
'filter': filter,
'language': language,
'num_per_page': 100,
'filter_offtopic_activity': 0,
#'day_range': 365,
'review_type': 'all',
'purchase_type': 'all',
}
#print(payload)
url = f"https://store.steampowered.com/appreviews/{appid}"
response = requests.get(url, params=payload)
Full working code:
"""
# date: 2024.07.03
# [User Reviews - Get List (Steamworks Documentation)](https://partner.steamgames.com/doc/store/getreviews)
"""
import time
from datetime import datetime
import requests
def fetch_game_reviews(appid, cursor='*', filter='recent', language='english'):
"""
Fetch game reviews for a specific Steam game.
Parameters:
- appid: Steam Application ID for the game.
- cursor: Cursor for pagination. Use '*' for the first page.
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- JSON response containing game reviews.
"""
payload = {
'json': 1,
'cursor': cursor,
'filter': filter,
'language': language,
'num_per_page': 100,
'filter_offtopic_activity': 0,
#'day_range': 365,
'review_type': 'all',
'purchase_type': 'all',
}
#print('payload:', payload)
url = f"https://store.steampowered.com/appreviews/{appid}"
response = requests.get(url, params=payload)
#print('response.status_code:', response.status_code)
#print('response.text:', response.text)
return response.json()
def fetch_reviews_until_date(appid, end_date, filter='recent', language='all'):
"""
Fetch game reviews for a specific Steam game until a specific date.
Parameters:
- appid: Steam Application ID for the game.
- end_date: The end date for fetching reviews (YYYY-MM-DD format).
- filter: Type of reviews to fetch ('recent', 'updated', 'all').
- language: Language of the reviews ('english', 'spanish', etc.).
Returns:
- List of all reviews up until the specified end date.
"""
end_date = datetime.strptime(end_date, '%Y-%m-%d')
previous_cursor = None
all_reviews = []
cursors = []
cursor = '*'
while True:
response = fetch_game_reviews(appid, cursor=cursor, filter=filter, language=language)
collected_timestamp = int(time.time())
reviews = response['reviews']
new_cursor = response.get('cursor')
cursors.append(new_cursor)
if not reviews or (new_cursor == previous_cursor and cursor != '*'):
return all_reviews, cursors
for review in reviews:
review_date = datetime.fromtimestamp(review['timestamp_created'])
review['collected'] = collected_timestamp
review['appid'] = appid
if review_date < end_date:
return all_reviews
all_reviews.append(review)
# - after loop -
print('len:', len(all_reviews))
print('cursor:', response['cursor'])
previous_cursor = cursor
cursor = new_cursor
return all_reviews, cursors
reviews, cursors = fetch_reviews_until_date('1172470', '2020-10-01')