I'm trying to run the following code but i'm getting this error 'NameError: name 'scrapedate' is not defined'
import scrapy
from datetime import datetime, timedelta
from dogscraper.items import DogItem
racedate = '2024-01-25'
days = 2
realdate = datetime.strptime(racedate, '%Y-%m-%d').date()
scrape_list = [(realdate - timedelta(days=x)).strftime('%Y-%m-%d') for x in range(days)]
class DogspiderSpider(scrapy.Spider):
name = "dogspider"
allowed_domains = ["www.thedogs.com.au"]
start_urls = ["https://www.thedogs.com.au/racing/"+racedate]
def parse(self, response):
for scrapedate in scrape_list:
next_dateurl = 'https://www.thedogs.com.au/racing/' + scrapedate
yield response.follow(next_dateurl, callback=self.parse_date)
def parse_date(self, response):
nswmeetings = response.css('table.meeting-grid')[0]
nswmeetings = nswmeetings.css('td.meetings-venues__name')
for meeting in nswmeetings:
meeting_url = meeting.css('a::attr(href)').get()
nextmeeting = 'https://www.thedogs.com.au' + meeting_url
yield response.follow(nextmeeting, callback=self.parse_meeting)
def parse_meeting(self, response):
races = response.css('a.race-box.race-box--result')
for race in races:
race_url = race.css('a.race-box.race-box--result::attr(href)').get()
nextrace = 'https://www.thedogs.com.au' + race_url
yield response.follow(nextrace, callback=self.parse_race)
def parse_race(self, response):
dogs = response.css('tr.accordion__anchor.race-runner')
dog_item = DogItem()
for dog in dogs:
dog_item['date'] = scrapedate
NameError: name 'scrapedate' is not defined
Essentially, i want to take the scrapedate in the scrape_list under def parse, and use it later on when running def parse_race, dog_item['date'] = scrapedate
Thanks to @SIM.
I was able to pass the scrapedate using meta
#...
yield response.follow(next_dateurl, callback=self.parse_date, meta={'scrapedate' : scrapedate})
then
#...
yield response.follow(nextmeeting, callback=self.parse_meeting, meta={'scrapedate' : response.meta['scrapedate']})
#...
yield response.follow(nextrace, callback=self.parse_race, meta={'scrapedate' : response.meta['scrapedate']})
and i'm able to call it with
dog_item['date'] = response.meta['scrapedate']