Search code examples
pythonscrapy

Scrapy passing a python list into item loader


I want to extract a list of image urls and store it in a list format using scrapy item loaders

I am able to get image urls in list format using extract method but it's only storing one image.

code:

def parseVictimData(self, response):
    victimItem = ItemLoader(item=CaimItem(), selector=response)
    victimItem.add_value('listing_url', response.meta['listing_url'])
    Images = response.xpath('//figure[contains(@data-fancybox,"gallery")]/@href').extract()
    victimItem.add_value('victim_image_url', Images)
    victimItem.add_xpath('listing_title', '//[@id="weldioo"]/section[1]/div[2]/div/div/div/h1')

Desired output:

'image_urls': ['image1.jpg', 'image2.jpg', 'image3.jpg']


Solution

  • Try this one using MapConpose

    from scrapy.loader import ItemLoader
    from scrapy.loader.processors import MapCompose
    
    class MySpider(scrapy.Spider):
        name = "my_spider"
    
        def parseVictimData(self, response):
            victimItem = ItemLoader(item=CaimItem(), selector=response)
            victimItem.add_value('listing_url', response.meta['listing_url'])
            victimItem.add_xpath('listing_title', '//[@id="weldioo"]/section[1]/div[2]/div/div/div/h1')
            Images = response.xpath('//figure[contains(@data-fancybox,"gallery")]/@href')
            victimItem.add_xpath('victim_image_url', '//figure[contains(@data-fancybox,"gallery")]/@href', MapCompose(str.strip))
            yield victimItem.load_item()