This spider crawls concatenated links formed by adding a unique product ID from a csv file to a URL and scraping the data form the given page.
While using the -O command to write the results to .CSV, only the first result is written.
All of the items are selected and looped correctly and the correct results are shown in the output in the terminal, but only the first item is written to CSV.
Are you not able to use if else in a loop like this? Do I need another for x in response.xpath when defining the yield?
There is no mistake with the write path and output file
I tried several variations and creating a for try except for each element yielded but the results are the same
Please let me know if you have any advice to resolve this, thank you
import scrapy
import pandas as pd
def readcsv():
df = pd.read_csv('tsuji2.csv')
#return df.values
return df['URL'].values.tolist()
class TsujijsonSpider(scrapy.Spider):
name = 'tsujijson22'
start_urls = ['https://www.example.co.jp/store/online/']
def parse(self, response):
for URL in readcsv():
base_url = 'https://www.example.co.jp/store/online/p/{}'
yield scrapy.Request(base_url.format(URL), callback=self.data)
def data(self, response):
try:
if response.xpath(u"//span[contains(text(), '〇')]").get():
yield{
'zaiko green' : response.css('span.green::text').get()}
elif response.xpath(u"//span[contains(text(), '△')]").get():
yield{
'zaiko red': response.css('span.red:nth-of-type(1)::text').get()}
else: yield {
'zaiko red2': response.css('span.upperRight::text').get()}
except:
pass
All results must have the same column header name to write to CSV correctly it seems
def data(self, response):
try:
if response.xpath(u"//span[contains(text(), '〇')]").get():
yield{
'zaiko' : response.css('span.green::text').get()}
elif response.xpath(u"//span[contains(text(), '△')]").get():
yield{
'zaiko': response.css('span.red:nth-of-type(1)::text').get()}
else: yield {
'zaiko': response.css('span.upperRight::text').get()}
except:
pass
Will yield correct results in one column