I am trying to download the calendar of specific countries from https://tradingeconomics.com/calendar without log in.
Firstly, to get the required country data, I POST the country code in a Form to "https://sso.tradingeconomics.com/api/UserOptions".
Secondly, I refreshed webpage 'https://tradingeconomics.com/calendar'. But nothing has updated.
Here is my code to post the country code. In this sample scripts, I tried to get Australia('list[0][Value]': 'aus') calendar only, but it retunrs a calendar of all default countries.
import requests
import json
session = requests.session()
url = 'https://tradingeconomics.com/calendar'
page = session.get(url)
Logincookies = page.cookies
user_opt_url = "https://sso.tradingeconomics.com"
heads= {
'authority': 'sso.tradingeconomics.com',
'method': 'POST',
'path': '/api/UserOptions',
'scheme': 'https',
'accept': 'application/json, text/javascript, */*; q=0.01',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'content-length': '418',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'https://tradingeconomics.com',
'referer': 'https://tradingeconomics.com/calendar',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
data = {
'list[0][Host]': 'tradingeconomics.com',
'list[0][Env]': '/calendar',
'list[0][Name]': 'te-cal-countries',
'list[0][Value]': 'aus',
'list[1][Host]': 'tradingeconomics.com',
'list[1][Env]': "/calendar",
'list[1][Name]': 'te-cal-range',
'list[1][Value]': '1',
'list[2][Host]': 'tradingeconomics.com',
'list[2][Env]': '/calendar',
'list[2][Name]': 'te-cal-importance',
'list[2][Value]': '1',
}
page = session.post(user_opt_url, headers = heads, data= json.dumps(data), cookies = Logincookies)
page = session.get(url)
Then I put the webpage into a table, it retunrs a calendar of all default countries.
from bs4 import BeautifulSoup
doc = BeautifulSoup(page.text, 'lxml')
ntr = doc.find_all('table')[1].select('tr[data-url^=""]')
lst = []
for n in ntr:
if n.select('th'):
lst = lst + [n.select('th')[0].get_text().strip(), # date
None, # time
None, # lvl
None, # country
None, # event
None, # actual
None, # previous
None, # revised
None, # consensus
None, # forecast
]
elif n.select('td'):
lst = lst + [None, # date
n.select('span[class^="calendar-date"]')[0].get_text().strip() if n.select(
'span[class^="calendar-date"]') else None, # time
n.select('span[class^="calendar-date"]')[0]['class'][0].strip() if n.select(
'span[class^="calendar-date"]') else None, # lvl
n.select('div[class^="flag"]')[0]['title'].strip() if n.select('div[class^="flag"]') else None,
# country
n.select('a[class="calendar-event"]')[0].get_text().strip() if n.select(
'a[class="calendar-event"]') else
n.select('span')[1].get_text().strip() if n.select('span') else None, # event,这个筛选条件很弱
n.select('span[id="actual"]')[0].get_text().strip() if n.select('span[id="actual"]') else None,
# actual
n.select('span[id="previous"]')[0].get_text().strip() if n.select(
'span[id="previous"]') else None, # previous
n.select('span[id="revised"]')[0].get_text().strip() if n.select(
'span[id="revised"]') else None, # revised
n.select('span[id="consensus"]')[0].get_text().strip() if n.select(
'span[id="consensus"]') else None, # consensus
n.select('span[id="forecast"]')[0].get_text().strip() if n.select(
'span[id="forecast"]') else None, # forecast
]
else:
print("error!!!")
I guess when I post the country code, I need to remain in the session and wait the webpage to refresh. Or there are other things I missed. Appreciate your help.
I figured out a few days later. All I need is to add a fews rows in cookies sent by request.
req_cookies = { "te-cal-range": "1", #,0:recent,1:today,2:tmr,3:this wk, 4:next wk, 5:next Mth, -1:ytdy,-2:last wk, -3:last mth
'te-cal-importance': '1', #
'te-cal-countries': 'aus,bra,can,chn,emu,eun,fra,deu,ind,idn,ita,jpn,mex,rus,sau,zaf,kor,\
esp,tur,gbr,usa,sgp,twn,hkg,nzl,nor,mys,tha,vnm',
'TECalendarOffset': '480', # GMT+480mins