I have a file defining a list of RSS feeds:
RSS_FEEDS = [
"https://www.fanpage.it/feed/",
"https://www.ilfattoquotidiano.it/feed/",
"https://forbes.it/feed/",
"https://formiche.net/feed/",
]
I wrote the following test:
import requests
from feeds import RSS_FEEDS
for rssfeed in RSS_FEEDS:
response = requests.get(rssfeed)
assert response.status_code == 200
Are there more efficient (download less stuff) ways?
How would you handle a slow response vs a dead link?
The above would just tell me if the URL is fetchable, but how could I assess if it's a valid RSS stream?
You could solve it using the aiohttp library also together with asyncio, like this:
from aiohttp import ClientSession
from asyncio import gather, create_task, run, set_event_loop, set_event_loop_policy
from traceback import format_exc
import sys
# This is necessary on my Windows computer
if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'): # Check for operating system
from asyncio import ProactorEventLoop, WindowsSelectorEventLoopPolicy
set_event_loop(ProactorEventLoop())
set_event_loop_policy(WindowsSelectorEventLoopPolicy()) # Bug is not present in Linux
RSS_FEEDS = [
"https://www.fanpage.it/feed/",
"https://www.ilfattoquotidiano.it/feed/",
"https://forbes.it/feed/",
"https://formiche.net/feed/",
]
async def GetRessource(url: str, session: ClientSession) -> dict:
try:
async with session.get(url) as response:
if response.status == 200:
return(response.status)
else:
r: str = await response.text()
print(f"Error, got response code: {response.status} message: {r}")
except Exception:
print(f"General Exception:\n{format_exc()}")
return({})
async def GetUrls() -> None:
async with ClientSession() as session:
Tasks: list = [create_task(GetRessource(url, session)) for url in RSS_FEEDS]
Results: list = await gather(*Tasks, return_exceptions=False)
for result in Results:
assert result == 200
async def main():
await GetUrls()
if __name__ == "__main__":
run(main())
Result of Results
:
200
200
200
200
It's checking the URLs in parallel.