I am using the Chewy gem to integrate Elasticsearch into my Rails project.
I set up the index for a model called (Listing
) and my search interface where I use the Chewy es dsl.
listings_index.rb
class ListingsIndex < Chewy::Index
settings analysis: {
analyzer: {
exact: {
tokenizer: 'keyword',
filter: ['lowercase']
}
}
}
define_type Listing.available.includes(:listing_images, :user) do
field :id, type: 'integer'
field :listing_type, analyzer: 'exact'
field :status, analyzer: 'exact'
field :bedrooms, type: 'integer'
field :price, type: 'integer'
field :tenant_fee, type: 'integer'
field :neighborhood_id, type: 'integer'
field :bathrooms, type: 'float'
field :lat, type: 'float'
field :lng, type: 'float'
field :available_date, type: 'date'
field :full_address, type: 'text'
field :title, type: 'text'
field :user_last_active_at, value: ->(listing) { listing.user.last_active_at } # last_active_at on the User model is of type date
field :street, value: ->(listing) { listing.street }
field :listing_images do
field :image, type: 'object'
end
field :coordinates, type: 'geo_point', value: ->{ { lat: lat, lon: lng } }
end
end
listing_search.rb
class ListingSearch
include ActiveData::Model
attribute :bedrooms, type: Integer
attribute :listing_type, type: String
attribute :price_min, type: String
attribute :price_max, type: String
attribute :date, type: String
attribute :neighborhoods, type: Array
def index
ListingsIndex
end
def search
[base_filter, neighborhood_ids_filter,
price_filter, date_filter, bed_filter, apt_type_filter, sorting].compact.reduce(:merge)
end
def sorting
index.order({ user_last_active_at: :desc})
end
def base_filter
index.filter(term: {status: 'available'}).limit(4000)
end
def apt_type_filter
if !listing_type.blank? && listing_type =~ /\d/
if listing_type == '1'
index.filter(term: { listing_type: "full" })
end
if listing_type == '0'
index.filter(term: { listing_type: "share" })
end
end
end
def bed_filter
return unless bedrooms.present?
index.filter(term: { bedrooms: bedrooms.to_i })
end
def date_filter
return unless date.present?
parse_date = Chronic.parse(date, {:guess => false}).first
body = {}.tap do |body|
body.merge!(gte: parse_date) if date?
end
index.filter(range: {available_date: body}) if body.present?
end
def price_filter
return if price_min == 'Min $' && price_max == 'Max $'
if price_min != 'Min $' && price_max != 'Max $'
body = {}.tap do |body|
body.merge!(gte: price_min.to_i) if price_min?
body.merge!(lte: price_max.to_i) if price_max?
end
elsif price_min == 'Min $' && price_max != 'Max $'
body = {}.tap do |body|
body.merge!(lte: price_max) if price_max?
end
elsif price_min != 'Min $' && price_max == 'Max $'
body = {}.tap do |body|
body.merge!(gte: price_min) if price_min?
end
end
index.filter(range: {price: body}) if body.present?
end
def neighborhood_ids_filter
index.filter(terms: {neighborhood_id: neighborhoods}) if neighborhoods?
end
end
The first problem is the filter apt_type_filter
. It doesn't return the correct data.
The second problem is when I sort the data using the sorting
method I get a ES BadRequest error:
Elasticsearch::Transport::Transport::Errors::BadRequest: [400] {"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Fielddata is disabled on text fields by default. Set fielddata=true on [user_last_active_at] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":"listings","node":"IYxQCcHESTWOaitD9XtDFA","reason":{"type":"illegal_argument_exception","reason":"Fielddata is disabled on text fields by default. Set fielddata=true on [user_last_active_at] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."}}]},"status":400}
Here's the output of Chewy's index Query
object:
<ListingsIndex::Query {:index=>["listings"], :type=>["listing"], :body=>{:size=>4000, :query=>{:bool=>{:filter=>[{:bool=>{:must=>[{:bool=>{:must=>[{:bool=>{:must=>[{:term=>{:status=>"available"}}, {:terms=>{:neighborhood_id=>["45"]}}]}}, {:range=>{:price=>{:gte=>800, :lte=>3000}}}]}}, {:range=>{:available_date=>{:gte=>2018-02-01 00:00:00 +0100}}}]}}, {:term=>{:bedrooms=>1}}]}}}}>
Any help would be amazing. Thanks.
Mohammad gave me good insight into the problem. To resolve it I changed two things:
First, I did not indicate to ES how user_last_active_at
should be indexed so I specified the type of field to be indexed like so:
field :user_last_active_at, type: 'date', value: ->(listing) { listing.user.last_active_at }
As for the listing_type
field believe the problem was that ES was tokenizing the field value (splitting it into it's individual character). Rather my goal was to search on the full field value. Using the keyword
to make it searchable
field :listing_type, analyzer: 'keyword'