Open the link https://www.hsx.vn/Modules/Listed/Web/Symbols?fid=18b12d5d2d554559bf10eeb90304ff2e
with browser and its tool inspect
.Click ENGLISH
buttom at the right corner,then `listing--listing list', the response json is in English.
The first element is as below whose content is in English.
{
"cell": [
624,
"AAA",
"VN000000AAA4",
"BBG000BB42R4",
"An Phat Bioplastics Joint Stock Company ",
"382,274,496.00",
"382,274,496.00",
"10/6/2016"
]
}
I build a request according to the inspect-network
with urllib.request
.
import gzip,json,urllib.request
url_root = "https://www.hsx.vn/Modules/Listed/Web/SymbolList"
params = {
"pageFieldName1":"Code",
"pageFieldValue1":"",
"pageFieldValue2":"",
"pageFieldOperator2":"",
"pageFieldOperator3":"",
"pageFieldValue4":"",
"pageFieldOperator4":"",
"pageFieldOperator1":"eq",
"pageFieldName2":"Sectors",
"pageFieldName3":"Sector",
"pageFieldValue3":"00000000-0000-0000-0000-000000000000",
"pageFieldName4":"StartWith",
"pageCriteriaLength":"4",
"_search":"false",
"rows":10,
"page":"1",
"sidx":"id",
"sord":"desc"
}
query_string = urllib.parse.urlencode( params )
url = url_root + "?" + query_string
headers = {
"Accept-Encoding":"gzip, deflate, br",
"Accept-Language":"en-GB,en-US;q=0.9,en;q=0.8",
"Cache-Control":"max-age=0",
"Connection":"keep-alive",
"X-Requested-With":"XMLHttpRequest",
"Host":"www.hsx.vn",
"Accept":"application/json, text/javascript, */*; q=0.01",
"Sec-Fetch-Dest":"empty",
"Sec-Fetch-Mode":"cors",
"Sec-Fetch-Site":"same-origin",
"Referer":"https://www.hsx.vn/Modules/Listed/Web/Symbols?fid=18b12d5d2d554559bf10eeb90304ff2e",
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/106.0.0.0 Safari/537.36"
}
req = urllib.request.Request(url=url,method='GET',headers=headers)
with urllib.request.urlopen(req) as response:
response_text = response.read()
content = gzip.decompress(response_text)
data = content.decode('utf-8')
data = json.loads(data)
data = data['rows']
The first element i got is in Vietnamese:
data[0]
{'id': 624, 'cell': [624, 'AAA', 'VN000000AAA4', 'BBG000BB42R4', 'Công ty Cổ phần Nhựa An Phát Xanh', '382.274.496,00', '382.274.496,00', '06/10/2016']}
How can get the JSON in English?
You must change the language first and then request the JSON.
Here's how to do it:
from urllib.parse import urlencode
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.34",
"Accept-Language": "en-US,en;q=0.9",
}
payload = {
"pageFieldName1": "Code",
"pageFieldValue1": "",
"pageFieldValue2": "",
"pageFieldOperator2": "",
"pageFieldOperator3": "",
"pageFieldValue4": "",
"pageFieldOperator4": "",
"pageFieldOperator1": "eq",
"pageFieldName2": "Sectors",
"pageFieldName3": "Sector",
"pageFieldValue3": "00000000-0000-0000-0000-000000000000",
"pageFieldName4": "StartWith",
"pageCriteriaLength": "4",
"_search": "false",
"rows": 30,
"page": "1",
"sidx": "id",
"sord": "desc"
}
endpoint = "https://www.hsx.vn/Modules/Listed/Web/SymbolList?"
en_url = "https://www.hsx.vn/Common/ChangeLanguage/9e054dac-a75b-423f-95f6-54d3f73d4e53"
with requests.Session() as s:
change_language = s.get(en_url, headers=headers)
headers.update({"X-Requested-With": "XMLHttpRequest"})
data = (
s.get(f"{endpoint}{urlencode(payload)}", headers=headers)
.json()["rows"]
)
print(data[0]["cell"][4])
This should output:
An Phat Bioplastics Joint Stock Company