Search code examples
pythonpython-3.xweb-scrapingbeautifulsouppython-requests

Unable to parse the results from a webpage using the requests module


I've created a script to scrape the names of the products from this webpage using the requests module. When I run the script, I can see the status code is 200, but the script doesn't bring any results. How can I grab the results from the webpage using the requests module?

from bs4 import BeautifulSoup
import requests

link = "https://branddb.wipo.int/en/advancedsearch/results?sort=score%20desc&strategy=concept&rows=30&asStructure=%7B%22_id%22:%2262a3%22,%22boolean%22:%22AND%22,%22bricks%22:%5B%7B%22_id%22:%2262a4%22,%22key%22:%22type%22,%22value%22:%5B%22AO%22,%22EMBLEM%22,%22GI%22,%22INN%22,%22TRADEMARK%22%5D%7D%5D%7D&_=1722527941041&fg=_void_&start=0"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Accept-Language': 'en-US,en;q=0.9',
    'referer': 'https://branddb.wipo.int/',
    'origin': 'https://branddb.wipo.int',
}
res = requests.get(link,headers=headers)
print(res.status_code)
soup = BeautifulSoup(res.text,"lxml")
for item in soup.select("span.brandName"):
    print(item.get_text())

Solution

  • The page is dynamically loaded, you can use a browser automation tool like selenium or playwright to render the page or better:

    You can scrape the API, which returns Base64 AES encrypted responses. In order to decrypt AES you need to install a cryptographic library like PyCryptodome:

    pip install pycryptodome
    

    With the API you can get up to 360 entries per request:

    from Crypto.Cipher import AES
    from Crypto.Util.Padding import unpad
    import base64
    import requests
    import json
    
    
    def decrypt(base64_input):
        key = "8?)i_~Nk6qv0IX;2"
        cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
    
        decoded_input =  base64.b64decode(base64_input)  
        decrypted_bytes = cipher.decrypt(decoded_input)
        plaintext = unpad(decrypted_bytes, AES.block_size).decode('utf-8')
    
        return plaintext
    
    
    payload = {
        "sort": "score desc",
        "strategy": "concept",
        "rows": 360,
        "start": 0,
        "fg": "_void_",
        "asStructure": json.dumps({
            "boolean": "AND",
            "bricks": [
                {"key": "type", "value": ["AO", "EMBLEM", "GI", "INN","TRADEMARK"]}
            ]
        })
    }
    
    url = 'https://api.branddb.wipo.int/search'
    response = requests.post(url, json=payload)
    
    decrypted_response = decrypt(response.text)
    docs = json.loads(decrypted_response)['response']['docs']
    
    print(docs)