Search code examples
pythonfunctionweb-scrapingscrapyweb-crawler

Scrapy nameerror while running multiple functions


I'm trying to run the following code but i'm getting this error 'NameError: name 'scrapedate' is not defined'

import scrapy
from datetime import datetime, timedelta
from dogscraper.items import DogItem

racedate = '2024-01-25'
days = 2
realdate = datetime.strptime(racedate, '%Y-%m-%d').date()
scrape_list = [(realdate - timedelta(days=x)).strftime('%Y-%m-%d') for x in range(days)]

class DogspiderSpider(scrapy.Spider):
    name = "dogspider"
    allowed_domains = ["www.thedogs.com.au"]
    start_urls = ["https://www.thedogs.com.au/racing/"+racedate]

    def parse(self, response):
        for scrapedate in scrape_list:
            next_dateurl = 'https://www.thedogs.com.au/racing/' + scrapedate
            yield response.follow(next_dateurl, callback=self.parse_date)


    def parse_date(self, response):
        nswmeetings = response.css('table.meeting-grid')[0]
        nswmeetings = nswmeetings.css('td.meetings-venues__name')

        for meeting in nswmeetings:
            meeting_url = meeting.css('a::attr(href)').get()
            nextmeeting = 'https://www.thedogs.com.au' + meeting_url
            yield response.follow(nextmeeting, callback=self.parse_meeting)


    def parse_meeting(self, response):
        races = response.css('a.race-box.race-box--result')
        for race in races:
            race_url = race.css('a.race-box.race-box--result::attr(href)').get()
            nextrace = 'https://www.thedogs.com.au' + race_url
            yield response.follow(nextrace, callback=self.parse_race) 
      

    def parse_race(self, response):

        dogs = response.css('tr.accordion__anchor.race-runner')
        dog_item = DogItem()

        for dog in dogs:               

            dog_item['date'] = scrapedate

NameError: name 'scrapedate' is not defined

Essentially, i want to take the scrapedate in the scrape_list under def parse, and use it later on when running def parse_race, dog_item['date'] = scrapedate


Solution

  • Thanks to @SIM.

    I was able to pass the scrapedate using meta

        #...
    yield response.follow(next_dateurl, callback=self.parse_date, meta={'scrapedate' : scrapedate})
    

    then

    #...
    yield response.follow(nextmeeting, callback=self.parse_meeting, meta={'scrapedate' : response.meta['scrapedate']})
    #...
    
    yield response.follow(nextrace, callback=self.parse_race, meta={'scrapedate' : response.meta['scrapedate']})
    

    and i'm able to call it with

    dog_item['date'] = response.meta['scrapedate']