Search code examples
elasticsearch

Restrict nested data in result of search


Have an index that stores tasks to do, in it there is a user id, task id and inside there is nested data that is used to store a timer that the user started and finished working on a task.

I have a script that I used in Elasticsearch 6 that searched for tasks and timers in a specific period, and the script only returned timers that matched that time.

However, I recently migrated to Elasticsearch 8 and the script doesn't work like before, now it returns all tasks that have a timer in the period, but I don't restrict it to just the timers in that period.

The tasks are returned correctly, but so are all the timers within it.

How can I make the script return only timers that match the filter?

Script:

{
    "size": 1024,
    "track_total_hits": true,
    "query": {
        "bool": {
            "must": [
                {
                    "term": {
                        "user_id": 123
                    }
                },
                {
                    "nested": {
                        "path": "timer.content",
                        "query": {
                            "bool": {
                                "must": {
                                    "range": {
                                        "timer.content.start_work": {
                                            "gte": "2023-08-26T00:00:00-0300",
                                            "lte": "2023-09-26T23:59:59-0300"
                                        }
                                    }
                                }
                            }
                        }
                    }
                },
                {
                    "terms": {
                        "user_status": [
                            "E",
                            "I"
                        ]
                    }
                }
            ]
        }
    },
    "sort": {
        "task_id": {
            "order": "desc"
        }
    },
    "_source": []
}

Index Structure:

{
    "settings": {
        "analysis": {
            "analyzer": {
                "ignoreaccents": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "asciifolding"
                    ]
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "task_id": {
                "type": "long"
            },
            "user_id": {
                "type": "long",
                "index": true
            },
            "user_status": {
                "type": "keyword",
                "index": true
            },
            "timer": {
                "type": "object",
                "properties": {
                    "content": {
                        "type": "nested",
                        "properties": {
                            "timer_id": {
                                "type": "long",
                                "index": true
                            },
                            "task_id": {
                                "type": "long",
                                "index": true
                            },
                            "user_id": {
                                "type": "long",
                                "index": true
                            },
                            "start_work": {
                                "type": "date",
                                "index": true,
                                "format": "strict_date_time_no_millis"
                            },
                            "end_work": {
                                "type": "date",
                                "index": true,
                                "format": "strict_date_time_no_millis"
                            }
                        }
                    }
                }
            }
        }
    }
}

Solution

  • You need to add inner_hits to the nested query, you can also optionally exclude time.content using excludes in the _source parameter option. Here an example:

    DELETE test
    PUT test
    {
        "settings": {
            "analysis": {
                "analyzer": {
                    "ignoreaccents": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "asciifolding"
                        ]
                    }
                }
            }
        },
        "mappings": {
            "properties": {
                "task_id": {
                    "type": "long"
                },
                "user_id": {
                    "type": "long",
                    "index": true
                },
                "user_status": {
                    "type": "keyword",
                    "index": true
                },
                "timer": {
                    "type": "object",
                    "properties": {
                        "content": {
                            "type": "nested",
                            "properties": {
                                "timer_id": {
                                    "type": "long",
                                    "index": true
                                },
                                "task_id": {
                                    "type": "long",
                                    "index": true
                                },
                                "user_id": {
                                    "type": "long",
                                    "index": true
                                },
                                "start_work": {
                                    "type": "date",
                                    "index": true,
                                    "format": "strict_date_time_no_millis"
                                },
                                "end_work": {
                                    "type": "date",
                                    "index": true,
                                    "format": "strict_date_time_no_millis"
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    
    PUT test/_bulk?refresh
    {"index": {}}
    {"user_id": 123, "user_status": "E", "timer": {"content": [{"timer_id": 1, "start_work": "2023-06-01T00:00:00-0300"}, {"timer_id": 2, "start_work": "2023-09-01T00:00:00-0300"} ]}}
    
    GET test/_search
    {
      "size": 1024,
      "track_total_hits": true,
      "query": {
        "bool": {
          "must": [
            {
              "term": {
                "user_id": 123
              }
            },
            {
              "nested": {
                "path": "timer.content",
                "query": {
                  "bool": {
                    "must": {
                      "range": {
                        "timer.content.start_work": {
                          "gte": "2023-08-26T00:00:00-0300",
                          "lte": "2023-09-26T23:59:59-0300"
                        }
                      }
                    }
                  }
                },
                "inner_hits": {}
              }
            },
            {
              "terms": {
                "user_status": [
                  "E",
                  "I"
                ]
              }
            }
          ]
        }
      },
      "sort": {
        "task_id": {
          "order": "desc"
        }
      },
      "_source": {
        "excludes": "timer.content"
      }
    }