Search code examples
pythonweb-scrapingbeautifulsouphtml-parsing

How to Get Script Tag Variables From a Website using Python


I am trying to pull a variable called meta in a script tag using Python. I have used selenium to do this before, but selenium is too slow for what I am trying to accomplish. Is there any other way of doing this.

I have tried using BeautifulSoup, but I'm stuck... code is below

Here is the script tag I'm trying to get the meta variable from:

<script>window.ShopifyAnalytics = window.ShopifyAnalytics || {};
window.ShopifyAnalytics.meta = window.ShopifyAnalytics.meta || {};
window.ShopifyAnalytics.meta.currency = 'USD';
var meta = {"product"{"id":2006141861957,"vendor":"Nike","type":"Sneakers","variants": [{"id":19039563677765,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 4","public_title":"4","sku":"191888228157"}, {"id":19039563710533,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 4.5","public_title":"4.5","sku":"191888228164"},{"id":19039563743301,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 5","public_title":"5","sku":"191888228171"},{"id":19039563776069,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 5.5","public_title":"5.5","sku":"191888228188"},{"id":19039563808837,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 6","public_title":"6","sku":"886059750741"},{"id":19039563841605,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 6.5","public_title":"6.5","sku":"886059750758"},{"id":19039563874373,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 7","public_title":"7","sku":"886059750765"},{"id":19039563907141,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 7.5","public_title":"7.5","sku":"886059750772"},{"id":19039563939909,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 8","public_title":"8","sku":"886059750789"},{"id":19039563972677,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 8.5","public_title":"8.5","sku":"886059750796"},{"id":19039564005445,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 9","public_title":"9","sku":"886059750802"},{"id":19039564038213,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 9.5","public_title":"9.5","sku":"886059750819"},{"id":19039564070981,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 10","public_title":"10","sku":"886059750826"},{"id":19039564103749,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 10.5","public_title":"10.5","sku":"886059751038"},{"id":19039564136517,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 11","public_title":"11","sku":"886059751045"},{"id":19039564169285,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 11.5","public_title":"11.5","sku":"886059751052"},{"id":19039564202053,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 12","public_title":"12","sku":"886059751069"},{"id":19039564234821,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 12.5","public_title":"12.5","sku":"886059751076"},{"id":19039564267589,"price":16000,"name":"Nike React Element '87 - Light Orewood Brown \/ Laser Orange \/ Volt Glow - 13","public_title":"13","sku":"886059752448"}]},"page":{"pageType":"product","resourceType":"product","resourceId":2006141861957}};
for (var attr in meta) {
  window.ShopifyAnalytics.meta[attr] = meta[attr];
}</script>

Here is what I've tried:

bs = soup(r.text, "html.parser")
scripts = bs.findAll('script')
for s in scripts:
    if 'var meta' in s.text:
        print(s)

This returns like a string:

<script>window.ShopifyAnalytics = window.ShopifyAnalytics || {};
window.ShopifyAnalytics.meta = window.ShopifyAnalytics.meta || {};
window.ShopifyAnalytics.meta.currency = 'USD';
var meta = {"product":{"id":2008798101573,"vendor":"Adidas","type":"Sneakers","variants":[{"id":19054898249797,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 3","public_title":"3","sku":"98197426"},{"id":19054898282565,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 3.5","public_title":"3.5","sku":"98197427"},{"id":19054898315333,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 4","public_title":"4","sku":"98197428"},{"id":19054898348101,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 4.5","public_title":"4.5","sku":"98197429"},{"id":19054898380869,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 5","public_title":"5","sku":"191525030983"},{"id":19054898413637,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 5.5","public_title":"5.5","sku":"191525030952"},{"id":19054898446405,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 6","public_title":"6","sku":"191525030938"},{"id":19054898479173,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 6.5","public_title":"6.5","sku":"191525030914"},{"id":19054898511941,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 7","public_title":"7","sku":"191525030907"},{"id":19054898544709,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 7.5","public_title":"7.5","sku":"191525030976"},{"id":19054898577477,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 8","public_title":"8","sku":"191525031010"},{"id":19054898610245,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 8.5","public_title":"8.5","sku":"191525030990"},{"id":19054898643013,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 9","public_title":"9","sku":"191525031027"},{"id":19054898675781,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 9.5","public_title":"9.5","sku":"191525030921"},{"id":19054898708549,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 10","public_title":"10","sku":"191525030969"},{"id":19054898741317,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 10.5","public_title":"10.5","sku":"191525030945"},{"id":19054898774085,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 11","public_title":"11","sku":"191525031003"},{"id":19054898806853,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 11.5","public_title":"11.5","sku":"98197443"},{"id":19054898839621,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 12","public_title":"12","sku":"98197444"},{"id":19054898872389,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 12.5","public_title":"12.5","sku":"98197445"},{"id":19054898905157,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 13","public_title":"13","sku":"98197446"},{"id":19054898937925,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 13.5","public_title":"13.5","sku":"98197447"},{"id":19054898970693,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 14","public_title":"14","sku":"98197448"},{"id":19054899003461,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 14.5","public_title":"14.5","sku":"98197449"},{"id":19054899036229,"price":13000,"name":"adidas Originals WMNS Falcon - Ash Pearl \/ Off White - 15","public_title":"15","sku":"98197450"}]},"page":{"pageType":"product","resourceType":"product","resourceId":2008798101573}};
for (var attr in meta) {
 window.ShopifyAnalytics.meta[attr] = meta[attr];
}</script>

What I want to do is return the meta variable so that I have the capability to pull the data from the meta variable, for example, all the "id"s of the "product"


Solution

  • Without having the full code to get that output, I'm guessing a bit here. But If you can grab the text, then just use json, you should be able to get that data.

    So I'll use an example of one of your previous questions, that essentially has this same format:

    There's really nothing different, except we're going to extract the part of the string that can utilize json.loads(). Then you have a nice json type of dictionaries and lists that you can extract the id's of the product:

    import requests
    import bs4
    import json
    
    url = 'https://packershoes.com/products/copy-of-adidas-predator-accelerator-trainer'
    r = requests.get(url)
    
    bs = bs4.BeautifulSoup(r.text, "html.parser")
    scripts = bs.find_all('script')
    jsonObj = None
    
    for s in scripts:
        if 'var meta' in s.text:
            script = s.text
            script = script.split('var meta = ')[1]
            script = script.split(';\nfor (var attr in meta)')[0]
    
            jsonStr = script
            jsonObj = json.loads(jsonStr)
    
    for value in jsonObj['product']['variants']:
        print ('ID: '+ str(value['id']))
    

    Output:

    ID: 14189113049177
    ID: 14189122912345
    ID: 14139452129369
    ID: 14139452194905
    ID: 14139452227673
    ID: 14139452293209
    ID: 14139452325977
    ID: 14139452391513
    ID: 14139452424281
    ID: 14189321715801
    ID: 14139452457049
    ID: 14139909505113