Search code examples
pythoncsvweb-scrapingbeautifulsoupexport-to-csv

Web Scraping with Python - loop through list of URLs and convert to CSV


I have a list of URLs that I would like to convert and save to CSVs on a local drive. I would also like to take a substring of the URL for the filename. This is the code I currently have but it's only writing the first URL data to 2 separate files.

import csv
import requests
from bs4 import BeautifulSoup

link =
['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
        'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']

def get_data(link):
    for url in link:
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")

        for items in soup.select("table.table tr"):
            td = [item.get_text(strip=True) for item in items.select("th,td")]
            writer.writerow(td)

if __name__ == '__main__':
    for f in link:        
        f2 = f.split('audit/')[-1].split('.html')[0]   
        with open(f2 + '.csv',"w",newline="") as infile: 
            writer = csv.writer(infile)
            get_data(link)

Solution

  • You don't need to loop over link again in get_data(). You can just send the url to get_data in you main loop:

    import csv
    import requests
    from bs4 import BeautifulSoup
    
    link = ['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
            'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']
    
    def get_data(url):
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")
    
        for items in soup.select("table.table tr"):
            td = [item.get_text(strip=True) for item in items.select("th,td")]
            writer.writerow(td)
    
    if __name__ == '__main__':
        for f in link:
            f2 = f.split('audit/')[-1].split('.html')[0]
            with open(f2 + '.csv',"w",newline="") as infile:
                writer = csv.writer(infile)
                get_data(f)