I have a list of URLs that I would like to convert and save to CSVs on a local drive. I would also like to take a substring of the URL for the filename. This is the code I currently have but it's only writing the first URL data to 2 separate files.
import csv
import requests
from bs4 import BeautifulSoup
link =
['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']
def get_data(link):
for url in link:
res = requests.get(url)
soup = BeautifulSoup(res.text,"lxml")
for items in soup.select("table.table tr"):
td = [item.get_text(strip=True) for item in items.select("th,td")]
writer.writerow(td)
if __name__ == '__main__':
for f in link:
f2 = f.split('audit/')[-1].split('.html')[0]
with open(f2 + '.csv',"w",newline="") as infile:
writer = csv.writer(infile)
get_data(link)
You don't need to loop over link
again in get_data()
. You can just send the url
to get_data
in you main
loop:
import csv
import requests
from bs4 import BeautifulSoup
link = ['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']
def get_data(url):
res = requests.get(url)
soup = BeautifulSoup(res.text,"lxml")
for items in soup.select("table.table tr"):
td = [item.get_text(strip=True) for item in items.select("th,td")]
writer.writerow(td)
if __name__ == '__main__':
for f in link:
f2 = f.split('audit/')[-1].split('.html')[0]
with open(f2 + '.csv',"w",newline="") as infile:
writer = csv.writer(infile)
get_data(f)