I've created a script to scrape the names of the products from this webpage using the requests module. When I run the script, I can see the status code is 200, but the script doesn't bring any results. How can I grab the results from the webpage using the requests module?
from bs4 import BeautifulSoup
import requests
link = "https://branddb.wipo.int/en/advancedsearch/results?sort=score%20desc&strategy=concept&rows=30&asStructure=%7B%22_id%22:%2262a3%22,%22boolean%22:%22AND%22,%22bricks%22:%5B%7B%22_id%22:%2262a4%22,%22key%22:%22type%22,%22value%22:%5B%22AO%22,%22EMBLEM%22,%22GI%22,%22INN%22,%22TRADEMARK%22%5D%7D%5D%7D&_=1722527941041&fg=_void_&start=0"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9',
'referer': 'https://branddb.wipo.int/',
'origin': 'https://branddb.wipo.int',
}
res = requests.get(link,headers=headers)
print(res.status_code)
soup = BeautifulSoup(res.text,"lxml")
for item in soup.select("span.brandName"):
print(item.get_text())
The page is dynamically loaded, you can use a browser automation tool like selenium or playwright to render the page or better:
You can scrape the API, which returns Base64 AES encrypted responses. In order to decrypt AES you need to install a cryptographic library like PyCryptodome:
pip install pycryptodome
With the API you can get up to 360 entries per request:
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
import base64
import requests
import json
def decrypt(base64_input):
key = "8?)i_~Nk6qv0IX;2"
cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
decoded_input = base64.b64decode(base64_input)
decrypted_bytes = cipher.decrypt(decoded_input)
plaintext = unpad(decrypted_bytes, AES.block_size).decode('utf-8')
return plaintext
payload = {
"sort": "score desc",
"strategy": "concept",
"rows": 360,
"start": 0,
"fg": "_void_",
"asStructure": json.dumps({
"boolean": "AND",
"bricks": [
{"key": "type", "value": ["AO", "EMBLEM", "GI", "INN","TRADEMARK"]}
]
})
}
url = 'https://api.branddb.wipo.int/search'
response = requests.post(url, json=payload)
decrypted_response = decrypt(response.text)
docs = json.loads(decrypted_response)['response']['docs']
print(docs)