Search code examples
pythonlistweb-scrapingexport-to-csv

Scraped the <li> of a product detail in a website via BeautifulSoup of Python, but can't export it to csv


I'm having trouble with a crawler script that I created on Python. I'm grabbing the features and specifications (which are bullet points) for a list of product urls:

    import csv
    import requests
    from bs4 import BeautifulSoup
    
    url = 'https://www.academy.com/shop/browse/footwear/womens-footwear/womens-work-boots?&page_{}'
    
    def trade_spider(max_pages):
        data = []
        page = 1
        while page <= max_pages:
            
            current_url = url.format(page)
            source_code = requests.get(current_url)
            plain_text = source_code.text
            soup = BeautifulSoup(plain_text,'html.parser')
    
            for link in soup.findAll('a', {'class':'product-detail-container'}):
                href ="https://www.academy.com/" + link.get('href')
                name=get_single_item_data1(href)
                
                features=get_single_item_data5(href)
                specs=get_single_item_data6(href)
                
                entry = [name, features, specs] 
                data.append(entry)
    
            page += 1
        return data
    
    
    def get_single_item_data1(item_url):
            source_code = requests.get(item_url)
            plain_text = source_code.text
            soup = BeautifulSoup(plain_text,'html.parser')
    
            for item_name in soup.findAll('div', {'class':'flex-wrap flex-fill'}):
                print ('name:', item_name.string)
                return item_name.string
    
   def get_single_item_data5(item_url):
            source_code = requests.get(item_url)
            plain_text = source_code.text
            soup = BeautifulSoup(plain_text,'html.parser')
            
            for features in soup.findAll('li', {'data-auid':'feature-benefits-listing'}):
                print('features: ', features.string)
                return features.string
    
    def get_single_item_data6(item_url):
            source_code = requests.get(item_url)
            plain_text = source_code.text
            soup = BeautifulSoup(plain_text,'html.parser')
            
           
            for spec in soup.findAll('li', {'data-auid':'specifications_listing'}):
                print('specifications: ', spec.text)
                return spec.text
    
    row_data = trade_spider(1)
    row_headers = ['name','features', 'specs']
    
    with open('data.csv', 'w') as f:
        write = csv.writer(f)
        write.writerow(row_headers)
        write.writerows(row_data)
    
    print ()

The scraping works whenever I print out the results, but when it comes to saving them into a csv file, only the first bullet point gets printed.

Desired sample output:

Name: Lace Up Work Boots

Features: Steel toes safeguard your feet Suede uppers provide durability 5.5" boot height Meets ASTM F2413-11 I/75 C/75 standards Goodyear welt construction EVA insoles offer cushioning

Specifications: Activity: Work Steel toe: Yes Safety Toe: Steel Gender: Women's Material: Suede Boot height (in.): 6" and Under Slip Resistant: Yes Waterproof: No Electrical hazard (EH) rated: No

But instead I'm only getting as of now:

Name: Lace Up Work Boots

Features: Steel toes safeguard your feet

Specifications: Activity: Work

Any help?


Solution

  • You can use this example how to grab data from the items and save them to CSV:

    import csv
    import requests
    from bs4 import BeautifulSoup
    
    
    url = "https://www.academy.com/shop/browse/footwear/womens-footwear/womens-work-boots?&page_{}"
    
    
    def grab_data(url):
        soup = BeautifulSoup(requests.get(url).content, "html.parser")
    
        title = soup.h1.get_text(strip=True)
        features = [
            f.get_text(strip=True)
            for f in soup.select('[data-auid="feature-benefits-listing"]')
        ]
        specs = [
            f.get_text(strip=True)
            for f in soup.select('[data-auid="specifications_listing"]')
        ]
    
        return title, features, specs
    
    
    with open("data.csv", "w") as f_in:
    
        csv_writer = csv.writer(f_in)
        csv_writer.writerow(["Name", "Features", "Specifications"])
    
        for page in range(1, 2):  # <-- adjust to your page count
            soup = BeautifulSoup(
                requests.get(url.format(page)).content, "html.parser"
            )
            for a in soup.select("a.detail-card"):
                u = "https://www.academy.com" + a["href"]
                title, feats, specs = grab_data(u)
                print(title)
                print("*** Features:")
                print(*feats, sep="\n")
                print("*** Specifications:")
                print(*specs, sep="\n")
                print("-" * 80)
    
                csv_writer.writerow([title, " ".join(feats), " ".join(specs)])
    

    Prints:

    Brazos Women's Tradesman Steel Toe Lace Up Work Boots
    *** Features:
    Steel toes safeguard your feet
    Suede uppers provide durability
    5.5" boot height
    Meets ASTM F2413-11 I/75 C/75 standards
    Goodyear welt construction
    EVA insoles offer cushioning
    *** Specifications:
    Activity:Work
    Steel toe:Yes
    Safety Toe:Steel
    Gender:Women's
    Material:Suede
    Boot height (in.):6" and Under
    Slip Resistant:Yes
    Waterproof:No
    Electrical hazard (EH) rated:No
    --------------------------------------------------------------------------------
    Fila Women's Memory Workshift Service Shoes
    *** Features:
    Slip-resistant rubber outsoles
    Synthetic uppers
    Low-top style
    EVA midsoles
    *** Specifications:
    Activity:Work
    Steel toe:No
    Safety Toe:Soft
    Gender:Women's
    Material:Man-made Materials
    Slip Resistant:Yes
    Waterproof:No
    Electrical hazard (EH) rated:No
    --------------------------------------------------------------------------------
    
    ...and so on.
    

    and saves data.csv (Screenshot from LibreOffice):

    enter image description here