Search code examples
pythonweb-crawlerattributeerrorscrapy

Myspider object has no atribute parse_item


I am new to scrapy, I have a base spider, similar to the example below:

class MySpider(scrapy.Spider):
    name = 'myspider'
    allowed_domains = ['example.com'] #the domain where the spider is allowed to crawl
    start_urls = ['http://www.example.com/content/'] #url from which the spider will start crawling
    page_incr = 1
    flag = 0

    def parse(self, response):
            sel=Selector(response)
            stuffs = sel.xpath('//a/@href')
            for stuff in stuffs:
                link = stuff.extract()
                req1 = Request(url=link, callback=self.parse_item)
                yield req1

            url = 'http://www.example.com/content/?q=ajax//date/%d&page=%d' % (self.page_incr, self.page_incr)
            req2 = Request(url=url,
                          headers={"Referer": "http://www.example.com/content", "X-Requested-With": "XMLHttpRequest"},
                          callback=self.parse_xhr)
            yield req2

    def parse_xhr(self, response):
            sel=Selector(response)
            stuffs = sel.xpath('//a/@href')
            for stuff in stuffs:
                link = stuff.extract()
                yield Request(url=link, callback=self.parse_item)

            content = sel.xpath('//a/@href').extract()
            if content == []:
                self.flag +=1
                if self.flag == 5:
                    raise CloseSpider('WARNING: <Spider forced to stop>')
            else:
                self.flag = 0

            self.page_incr +=1
            url = 'http://www.example.com/content/?q=ajax//date/%d&page=%d' % (self.page_incr, self.page_incr)
            req3 = Request(url=url,
                      headers={"Referer": "http://www.example.com/content", "X-Requested-With": "XMLHttpRequest"},
                      callback=self.parse_xhr)
            yield req3

     def parse_item(self, response):
            pass

When I try to set it to crawl there is an error, this:

line 24, in parse
        req1 = Request(url=link, callback=self.parse_item)
    exceptions.AttributeError: 'MySpider' object has no attribute 'parse_item'

I am not getting it... Please help me seeing what is wrong! Thanks for your time and help.


Solution

  • Your parse_item() method is incorrectly indented (with 5 spaces instead of 4).