How can I make scrapy check for a field and ignore searching the link

import scrapy
from scrapy.selector import HtmlXPathSelector
from scrapy.http.request import Request


class SunBizSpider(scrapy.Spider):
name = 'sunbiz'
start_urls = ['http://search.sunbiz.org/Inquiry/CorporationSearch/SearchResults?inquiryType=EntityName&searchNameOrder=A&searchTerm=a']

def parse(self, response):
    leurl = 'http://search.sunbiz.org'
    next_plis = response.xpath("//div[@class='navigationBar'][1]//a[@title='Next List']/@href").extract()
    next_lis = (leurl+ ', '.join(next_plis))
    yield scrapy.Request(next_lis, callback=self.parse)
    for href in response.css('.large-width a::attr(href)'):
        full_url = response.urljoin(href.extract())
        yield scrapy.Request(full_url, callback=self.parse_biz)

def parse_biz(self, response):
    re1='((?:[0]?[1-9]|[1][012])[-:\\/.](?:(?:[0-2]?\\d{1})|(?:[3][01]{1}))[-:\\/.](?:(?:[1]{1}\\d{1}\\d{1}\\d{1})|(?:[2]{1}\\d{3})))(?![\\d])' #        MMDDYYYY 1
    date = response.xpath('//span').re_first(re1)
    yield {
        'Name': response.css('.corporationName span::text').extract()[1],
        'Date': date,
        'Link': response.url,
        }

Regular expression would most likely find the words inact and cross reff

As you can see above I highlighted words such as inact, name hs and cross rf which are the items I want the crawler to check for and not do anything if it has those words.

Solution

you could use the xpath selector to check inner text, so if you want to get all the td with inner text Active, use something like:

response.xpath('//td[text()="Active"]')

The same for other strings, you could also use:

response.xpath('//td[contains(text(), "Activ")]')

if you only want one part of the string