I am trying to get data from a website, everything seems to be correct and the xpath was tested on the shell.
# -*- coding: utf-8 -*-
from scrapy.contrib.spiders import CrawlSpider
class KabumspiderSpider(CrawlSpider):
name = "kabumspider"
allowed_domain = ["www.kabum.com.br"]
start_urls = ["https://www.kabum.com.br"]
def parse(self, response):
categorias = response.xpath('//p[@class = "bot-categoria"]/a/text()').extract()
links = response.xpath('//p[@class = "bot-categoria"]/a/@href').extract()
for categoria in zip(categorias, links):
info = {
'categoria': categoria[0],
'link': categoria[1],
}
yield info
Although, the output seems to be:
[
What is wrong with my code?
I ran the scraper and it runs fine for me. The only issue i found is your parse method is outside the class.
# -*- coding: utf-8 -*-
from scrapy.contrib.spiders import CrawlSpider
class KabumspiderSpider(CrawlSpider):
name = "kabumspider"
allowed_domain = ["www.kabum.com.br"]
start_urls = ["https://www.kabum.com.br"]
def parse(self, response):
categorias = response.xpath('//p[@class = "bot-categoria"]/a/text()').extract()
links = response.xpath('//p[@class = "bot-categoria"]/a/@href').extract()
for categoria in zip(categorias, links):
info = {
'categoria': categoria[0],
'link': categoria[1],
}
yield info