Search code examples
python-requestshttp-postform-data

How to Post Country Codes in Form Data to Url to Get Expected WebData?


I am trying to download the calendar of specific countries from https://tradingeconomics.com/calendar without log in.

Firstly, to get the required country data, I POST the country code in a Form to "https://sso.tradingeconomics.com/api/UserOptions".

Secondly, I refreshed webpage 'https://tradingeconomics.com/calendar'. But nothing has updated.

Here is my code to post the country code. In this sample scripts, I tried to get Australia('list[0][Value]': 'aus') calendar only, but it retunrs a calendar of all default countries.

import requests
import json

session = requests.session()
url = 'https://tradingeconomics.com/calendar'
page = session.get(url)
Logincookies = page.cookies

user_opt_url = "https://sso.tradingeconomics.com"
heads= {
    'authority': 'sso.tradingeconomics.com',
    'method': 'POST',
    'path': '/api/UserOptions',
    'scheme': 'https',
    'accept': 'application/json, text/javascript, */*; q=0.01',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'content-length': '418',
    'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'origin': 'https://tradingeconomics.com',
    'referer': 'https://tradingeconomics.com/calendar',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
data = {
    'list[0][Host]': 'tradingeconomics.com',
    'list[0][Env]': '/calendar',
    'list[0][Name]': 'te-cal-countries',
    'list[0][Value]': 'aus',
    'list[1][Host]': 'tradingeconomics.com',
    'list[1][Env]': "/calendar",
    'list[1][Name]': 'te-cal-range',
    'list[1][Value]': '1',
    'list[2][Host]': 'tradingeconomics.com',
    'list[2][Env]': '/calendar',
    'list[2][Name]': 'te-cal-importance',
    'list[2][Value]': '1',
}

page = session.post(user_opt_url, headers = heads, data= json.dumps(data), cookies = Logincookies)

page = session.get(url)

Then I put the webpage into a table, it retunrs a calendar of all default countries.

    from bs4 import BeautifulSoup
    doc = BeautifulSoup(page.text, 'lxml')
    ntr = doc.find_all('table')[1].select('tr[data-url^=""]')
    lst = []
    for n in ntr:
        if n.select('th'):
            lst = lst + [n.select('th')[0].get_text().strip(),  # date
                         None,  # time
                         None,  # lvl
                         None,  # country
                         None,  # event
                         None,  # actual
                         None,  # previous
                         None,  # revised
                         None,  # consensus
                         None,  # forecast
                         ]
        elif n.select('td'):
            lst = lst + [None,  # date
                         n.select('span[class^="calendar-date"]')[0].get_text().strip() if n.select(
                             'span[class^="calendar-date"]') else None,  # time
                         n.select('span[class^="calendar-date"]')[0]['class'][0].strip() if n.select(
                             'span[class^="calendar-date"]') else None,  # lvl
                         n.select('div[class^="flag"]')[0]['title'].strip() if n.select('div[class^="flag"]') else None,
                         # country
                         n.select('a[class="calendar-event"]')[0].get_text().strip() if n.select(
                             'a[class="calendar-event"]') else
                         n.select('span')[1].get_text().strip() if n.select('span') else None,  # event,这个筛选条件很弱
                         n.select('span[id="actual"]')[0].get_text().strip() if n.select('span[id="actual"]') else None,
                         # actual
                         n.select('span[id="previous"]')[0].get_text().strip() if n.select(
                             'span[id="previous"]') else None,  # previous
                         n.select('span[id="revised"]')[0].get_text().strip() if n.select(
                             'span[id="revised"]') else None,  # revised
                         n.select('span[id="consensus"]')[0].get_text().strip() if n.select(
                             'span[id="consensus"]') else None,  # consensus
                         n.select('span[id="forecast"]')[0].get_text().strip() if n.select(
                             'span[id="forecast"]') else None,  # forecast
                         ]
        else:
            print("error!!!")  

I guess when I post the country code, I need to remain in the session and wait the webpage to refresh. Or there are other things I missed. Appreciate your help.


Solution

  • I figured out a few days later. All I need is to add a fews rows in cookies sent by request.

        req_cookies = { "te-cal-range": "1", #,0:recent,1:today,2:tmr,3:this wk, 4:next wk, 5:next Mth, -1:ytdy,-2:last wk, -3:last mth
                       'te-cal-importance': '1',  # 
                       'te-cal-countries': 'aus,bra,can,chn,emu,eun,fra,deu,ind,idn,ita,jpn,mex,rus,sau,zaf,kor,\
                                          esp,tur,gbr,usa,sgp,twn,hkg,nzl,nor,mys,tha,vnm', 
                       'TECalendarOffset': '480',  # GMT+480mins