Search code examples
pythonparsingbeautifulsoupsteam

Steam market parsing


I have a link

And it have "_price_asc" in the end, it makes ascending sorting. when I follow this link in the browser sorting works fine.

screenshot1

But! If I try to parse items links using bs4, this gives me items with random prices, i.e. ascending sorting does not work

screenshot2

what am I doing wrong?

from urllib.request import urlopen
from bs4 import BeautifulSoup

link = 'https://steamcommunity.com/market/search?q=&category_730_ItemSet%5B%5D=any&category_730_ProPlayer%5B%5D=any&category_730_StickerCapsule%5B%5D=any&category_730_TournamentTeam%5B%5D=any&category_730_Weapon%5B%5D=any&category_730_Type%5B%5D=tag_CSGO_Type_Knife&appid=730#p1_price_asc'

total_links = ''

page = urlopen(link)
bs_page = BeautifulSoup(page.read(), features="html.parser")
objects = bs_page.findAll(class_="market_listing_row_link")

for g in range(10):
    total_links += str(objects[g]["href"]) + '\n'
print(total_links)

Solution

  • This page uses JavaScript to get sorted data but BeautifulSoup/urllib can't run JavaScript

    But using DevTools in Firefox/Chrome (tab: Network, filter: XHR) I found out that JavaScript reads JSON data from some url and there is HTML with sorted data - so you can use this url with BeautifulSoup to get sorted data.

    from urllib.request import urlopen
    from bs4 import BeautifulSoup
    import json
    
    # new url 
    
    link = 'https://steamcommunity.com/market/search/render/?query=&start=0&count=10&search_descriptions=0&sort_column=price&sort_dir=asc&appid=730&category_730_ItemSet%5B%5D=any&category_730_ProPlayer%5B%5D=any&category_730_StickerCapsule%5B%5D=any&category_730_TournamentTeam%5B%5D=any&category_730_Weapon%5B%5D=any&category_730_Type%5B%5D=tag_CSGO_Type_Knife'
    
    page = urlopen(link)
    
    data = json.loads(page.read().decode())
    html = data['results_html']
    
    bs_page = BeautifulSoup(html, features="html.parser")
    objects = bs_page.findAll(class_="market_listing_row_link")
    
    data = []
    
    for g in objects:
        link  = g["href"]
        price = g.find('span', {'data-price': True}).text
        data.append((price, link))
    
    print("\n".join(f"{price} | {link}" for price, link in data))
    

    Result:

    $67.43 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Urban%20Masked%20%28Field-Tested%29
    $67.70 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Night%20Stripe%20%28Field-Tested%29
    $69.00 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Night%20Stripe%20%28Minimal%20Wear%29
    $69.52 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Scorched%20%28Battle-Scarred%29
    $69.48 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Safari%20Mesh%20%28Field-Tested%29
    $70.32 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Forest%20DDPAT%20%28Battle-Scarred%29
    $70.90 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Night%20Stripe%20%28Well-Worn%29
    $70.52 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Forest%20DDPAT%20%28Field-Tested%29
    $71.99 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Boreal%20Forest%20%28Field-Tested%29
    $72.08 USD | https://steamcommunity.com/market/listings/730/%E2%98%85%20Navaja%20Knife%20%7C%20Scorched%20%28Field-Tested%29
    

    BTW: Here is my first version which was reading from old url and sorting in Python. But it can sort only data on first page. To get better result it would have to read all pages - and it would take a lot of time.

    from urllib.request import urlopen
    from bs4 import BeautifulSoup
    
    link = 'https://steamcommunity.com/market/search?q=&category_730_ItemSet%5B%5D=any&category_730_ProPlayer%5B%5D=any&category_730_StickerCapsule%5B%5D=any&category_730_TournamentTeam%5B%5D=any&category_730_Weapon%5B%5D=any&category_730_Type%5B%5D=tag_CSGO_Type_Knife&appid=730#p1_price_asc'
    page = urlopen(link)
    
    bs_page = BeautifulSoup(page.read(), features="html.parser")
    objects = bs_page.findAll(class_="market_listing_row_link")
    
    data = []
    
    for g in objects:
        link  = g["href"]
        price = g.find('span', {'data-price': True})['data-price']
        price = int(price)
        data.append((price,link))
    
    data = sorted(data)
    
    print("\n".join(f"${price/100} USD | {link}" for price, link in data))