ruby-on-rails ruby elasticsearch chewy-gem

Elasticsearch query not returning correct data

I am using the Chewy gem to integrate Elasticsearch into my Rails project. I set up the index for a model called (Listing) and my search interface where I use the Chewy es dsl.

listings_index.rb

class ListingsIndex < Chewy::Index
  settings analysis: {
    analyzer: {
      exact: {
        tokenizer: 'keyword',
        filter: ['lowercase']
      }
    }
  }
  define_type Listing.available.includes(:listing_images, :user) do
    field :id, type: 'integer'
    field :listing_type, analyzer: 'exact'
    field :status, analyzer: 'exact'
    field :bedrooms, type: 'integer'
    field :price, type: 'integer'
    field :tenant_fee, type: 'integer'
    field :neighborhood_id, type: 'integer'
    field :bathrooms, type: 'float'
    field :lat, type: 'float'
    field :lng, type: 'float'
    field :available_date, type: 'date'
    field :full_address, type: 'text'
    field :title, type: 'text'
    field :user_last_active_at, value: ->(listing) { listing.user.last_active_at } # last_active_at on the User model is of type date
    field :street, value: ->(listing) { listing.street }
    field :listing_images do
      field :image, type: 'object'
    end
    field :coordinates, type: 'geo_point', value: ->{ { lat: lat, lon: lng } }
  end
end

listing_search.rb

class ListingSearch
  include ActiveData::Model
  attribute :bedrooms, type: Integer
  attribute :listing_type, type: String
  attribute :price_min, type: String
  attribute :price_max, type: String
  attribute :date, type: String
  attribute :neighborhoods, type: Array

  def index
    ListingsIndex
  end

 def search
   [base_filter, neighborhood_ids_filter,
    price_filter, date_filter, bed_filter, apt_type_filter, sorting].compact.reduce(:merge)
 end

 def sorting
   index.order({ user_last_active_at: :desc})
 end

 def base_filter
    index.filter(term: {status: 'available'}).limit(4000)
 end

 def apt_type_filter
   if !listing_type.blank? && listing_type =~ /\d/
     if listing_type == '1'
       index.filter(term: { listing_type: "full" })
     end
     if listing_type == '0'
       index.filter(term: { listing_type: "share" })
     end
   end
 end

 def bed_filter
   return unless bedrooms.present?
   index.filter(term: { bedrooms: bedrooms.to_i })
 end

 def date_filter
   return unless date.present?
   parse_date = Chronic.parse(date, {:guess => false}).first
   body = {}.tap do |body|
     body.merge!(gte: parse_date) if date?
   end
   index.filter(range: {available_date: body}) if body.present?
 end

 def price_filter
  return if price_min == 'Min $' && price_max == 'Max $'
  if price_min != 'Min $' && price_max != 'Max $'
    body = {}.tap do |body|
      body.merge!(gte: price_min.to_i) if price_min?
      body.merge!(lte: price_max.to_i) if price_max?
    end
  elsif price_min == 'Min $' && price_max != 'Max $'
    body = {}.tap do |body|
      body.merge!(lte: price_max) if price_max?
    end
  elsif price_min != 'Min $' && price_max == 'Max $'
    body = {}.tap do |body|
      body.merge!(gte: price_min) if price_min?
    end
  end
   index.filter(range: {price: body}) if body.present?
 end

 def neighborhood_ids_filter
   index.filter(terms: {neighborhood_id: neighborhoods}) if neighborhoods?
 end
end

The first problem is the filter apt_type_filter. It doesn't return the correct data.

The second problem is when I sort the data using the sorting method I get a ES BadRequest error:

Elasticsearch::Transport::Transport::Errors::BadRequest: [400] {"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Fielddata is disabled on text fields by default. Set fielddata=true on [user_last_active_at] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"listings","node":"IYxQCcHESTWOaitD9XtDFA","reason":{"type":"illegal_argument_exception","reason":"Fielddata is disabled on text fields by default. Set fielddata=true on [user_last_active_at] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."}}]},"status":400}

Here's the output of Chewy's index Query object:

<ListingsIndex::Query {:index=>["listings"], :type=>["listing"], :body=>{:size=>4000, :query=>{:bool=>{:filter=>[{:bool=>{:must=>[{:bool=>{:must=>[{:bool=>{:must=>[{:term=>{:status=>"available"}}, {:terms=>{:neighborhood_id=>["45"]}}]}}, {:range=>{:price=>{:gte=>800, :lte=>3000}}}]}}, {:range=>{:available_date=>{:gte=>2018-02-01 00:00:00 +0100}}}]}}, {:term=>{:bedrooms=>1}}]}}}}>

Any help would be amazing. Thanks.

Solution

Mohammad gave me good insight into the problem. To resolve it I changed two things:

First, I did not indicate to ES how user_last_active_at should be indexed so I specified the type of field to be indexed like so:

field :user_last_active_at, type: 'date', value: ->(listing) { listing.user.last_active_at }

As for the listing_type field believe the problem was that ES was tokenizing the field value (splitting it into it's individual character). Rather my goal was to search on the full field value. Using the keyword to make it searchable

field :listing_type, analyzer: 'keyword'