python python-3.x csv export-to-csv python-requests-html

Python CSV Writer only writing last scraped item processed

So my scraper is only sending the last two items to csv from last page it processed.I can not figure out where i am doing wrong it prints output perfectly fine.May be experience set of eyes able to help.

Code Below:

from requests_html import HTMLSession
import csv
import time


 def get_links(url):
    _request = _session.get(url)
    items = _request.html.find('li.product-grid-view.product.sale')
    links = []
    for item in items:
         links.append(item.find('a', first=True).attrs['href'])

   # print(len(links))

    return links


 def get_product(link):
     _request = _session.get(link)

      title = _request.html.find('h2', first=True).full_text
      price = _request.html.find('span.woocommerce-Price-amount.amount bdi')[1].full_text
      sku = _request.html.find('span.sku', first=True).full_text
      categories = _request.html.find('span.posted_in', first=True).full_text.replace('Categories:', "").strip()
      brand = _request.html.find('span.posted_in')[1].full_text.replace('Brand:', "").strip()
      #print(brand)

       product = {
         'Title': title,
         'Price': price,
         'SKU': sku,
         'Categories': categories,
         'Brand': brand
       }

    #print(product)
     return product


if __name__ == '__main__':
    for page in range(1, 4):

        url = 'https://www.thebassplace.com/product-category/basses/4-string/'
    
        if page == 1:
           parse_url = url
        else:
            parse_url = f'https://www.thebassplace.com/product-category/basses/4-string/page/{page}/'

       _session = HTMLSession()

        links = get_links(parse_url)
        results = []

        for link in links:
            results.append(get_product(link))
            time.sleep(1)
            #print(len(results))


with open('on_sale_bass.csv', 'w', newline='', encoding='utf-8') as csv_file:
    
    writer = csv.DictWriter(csv_file, fieldnames=results[0].keys())
    writer.writeheader()

    for row in results:
        writer.writerow(row)

when i try to append records are written in csv but headers are repeating for each page iteration.

Solution

The problem was in the statement results = [], inside the range loop. You emptied the results on each iteration of the range(1, 4) loop. Thus, you were getting only what the last iteration brought in.

Note, I made the _session as global, but in this case it would be reasonable, in my opinion (feel free to correct), to just pass it between functions. Now, try this out.

from requests_html import HTMLSession
import csv
import time


def get_links(url):
    global _session
    _request = _session.get(url)
    items = _request.html.find('li.product-grid-view.product.sale')
    links = []
    for item in items:
        links.append(item.find('a', first=True).attrs['href'])
    return links


def get_product(link):
    global _session
    _request = _session.get(link)
    title = _request.html.find('h2', first=True).full_text
    price = _request.html.find('span.woocommerce-Price-amount.amount bdi')[1].full_text
    sku = _request.html.find('span.sku', first=True).full_text
    categories = _request.html.find('span.posted_in', first=True).full_text.replace('Categories:', "").strip()
    brand = _request.html.find('span.posted_in')[1].full_text.replace('Brand:', "").strip()
    product = {
        'Title': title,
        'Price': price,
        'SKU': sku,
        'Categories': categories,
        'Brand': brand
    }
    return product


if __name__ == '__main__':
    results = []
    for page in range(1, 4):
        url = 'https://www.thebassplace.com/product-category/basses/4-string/'
        if page == 1:
            parse_url = url
        else:
            parse_url = f'https://www.thebassplace.com/product-category/basses/4-string/page/{page}/'
    
        _session = HTMLSession()
        links = get_links(parse_url)

        for link in links:
            product = get_product(link)
            results.append(product)
            #time.sleep(1)
            
    with open('on_sale_bass.csv', 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=results[0].keys())
        writer.writeheader()
        for row in results:
            writer.writerow(row)

What I get as an example: