Scrapy Splash Dynamic scraping with CrawlSpider

I tried to get some data from a react based website, but when I use CrawlSpider I can't parse other pages. For Example I can parse my first URL with splash and other urls will parse regularly without dynamic content.

this is my code:

class PageSpider(CrawlSpider):
  host = ''
  protocol = 'https'
  root_domain = ''
  name = 'page'
  allowed_domains = [host]
  #start_urls = [f'{protocol}://{host}',]

  def start_requests(self):
        url = f'{self.protocol}://{}'
        yield SplashRequest(url, dont_process_response=True, args={'wait':  1}, meta={'real_url': url})

  custom_settings = {
    #'DEPTH_LIMIT': 9,

  rules = (
        # Rule(LinkExtractor(allow=('node_\d+\.htm',)), follow=True),
        Rule(LinkExtractor(allow=(host),deny=('\.webp', '\.js', '\.css', '\.jpg', '\.png'),unique=True),

  def splash_request(self, request):
      request.meta['real_url'] = request.url
      return request

  def _requests_to_follow(self, response):
      if not isinstance(response, HtmlResponse):
      seen = set()
      newresponse = response.replace(url=response.meta.get('real_url'))
      for n, rule in enumerate(self._rules):
          links = [lnk for lnk in rule.link_extractor.extract_links(newresponse)
                  if lnk not in seen]
          if links and rule.process_links:
              links = rule.process_links(links)
          for link in links:
              r = self._build_request(n, link)
              yield rule.process_request(r)

  def parse(self,response):
    if len(LinkExtractor(deny = > 0:
      loader = ItemLoader(item=PageLevelItem(), response=response)
      loader.add_value('page_source_url', response.url)
      yield loader.load_item()


  • Check below code worked for me:

    def splash_request(self, request):
              # request = request.replace(url=RENDER_HTML_URL + request.url)
              request.meta['real_url'] = request.url
              return SplashRequest(request.meta['real_url'], dont_process_response=True, args={'wait':  0}, meta={'real_url': request.meta['real_url']})