Search code examples
elasticsearchelasticsearch-6

Elasticsearch Copy_to data need to copied self subdocument


Thanks in advance for helping.

I have created ES mapping as :

{"mappings": {
            "policy": {
                "properties": {
                    "name": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "tags": {
                        "properties": {
                            "scope": {
                                "type": "text",
                                "store": "true",
                                "copy_to": [
                                    "tags.tag_scope"
                                ]
                            },
                            "tag": {
                                "type": "text",
                                "store": "true",
                                "copy_to": [
                                    "tags.tag_scope"
                                ]
                            },
                            "tag_scope": {
                                "type": "text",
                                "store": "true"
                            }
                        }
                    }
                }
            }
        }

    }

When i index policy document all tag and scope value from different tags document copied to tag_scope property.

For Example I added a document as to elastic search:

{
                    "name": "policy1",
                    "tags": [
                        {
                            "tag": "pepsi",
                            "scope": "prod"
                        },
                        {
                            "tag": "coke",
                            "scope": "dev"
                        }
                    ]
                }

It is storing all 4 values as in tag_scope documents as:

"tags.tag_scope": [ "pepsi", "test", "coke", "dev" ]

My Exceptions was, it should store like :

 {
                        "name": "policy1",
                        "tags": [
                            {
                                "tag": "pepsi",
                                "scope": "prod",
                                 "tag_scope" : ["pepsi","prod"]
                            },
                            {
                                "tag": "coke",
                                "scope": "dev",
                                 "tag_scope" : ["coke","dev"]
                            }
                        ]
                    }

Could you please help me to do correct mapping for same?


Solution

  • What you are looking for is Nested Datatype. Change your mapping to the below:

    PUT <your_index_name>
    {  
       "mappings":{  
          "policy":{ 
             "properties":{  
                "name":{  
                   "type":"text",
                   "fields":{  
                      "keyword":{  
                         "type":"keyword",
                         "ignore_above":256
                      }
                   }
                },
                "tags":{  
                   "type": "nested", 
                   "properties":{  
                      "scope":{  
                         "type":"text",
                         "store":"true",
                         "copy_to":[  
                            "tags.tag_scope"
                         ]
                      },
                      "tag":{  
                         "type":"text",
                         "store":"true",
                         "copy_to":[  
                            "tags.tag_scope"
                         ]
                      },
                      "tag_scope":{  
                         "type":"text",
                         "store":"true",
                         "fields": {                <---- Added this
                           "keyword": {
                              "type": "keyword"
                           }
                         }
                      }
                   }
                }
             }
          }
       }
    }
    

    Notice how I've made tags as nested type. This would allow the below to be stored as individual document itself, which in your case tags basically has two nested documents.

    {  
       "tag":"coke",
       "scope":"dev"
    }
    

    Now your tags.tag_scope should be what you are expecting it to be.

    Now when it comes to querying for what you are looking for, the below is how a Nested Query should be.

    Nested Query:

    POST <your_index_name>/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "nested": {
                "path": "tags",
                "query": {
                  "bool": {
                    "must": [
                      {
                        "match": {
                          "tags.tag_scope": "pepsi"
                        }
                      },
                      {
                        "match": {
                          "tags.tag_scope": "prod"
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      }
    }
    

    As to return the list of unique tags.tag_scope value you would need to return aggregation query. Notice that I've mentioned size:0 which means I only want to see aggregation result and not normal query results.

    Aggregation Query:

    POST <your_index_name>/_search
    {  
       "size":0,
       "query":{  
          "bool":{  
             "must":[  
                {  
                   "nested":{  
                      "path":"tags",
                      "query":{  
                         "bool":{  
                            "must":[  
                               {  
                                  "match":{  
                                     "tags.tag_scope":"pepsi"
                                  }
                               },
                               {  
                                  "match":{  
                                     "tags.tag_scope":"prod"
                                  }
                               }
                            ]
                         }
                      }
                   }
                }
             ]
          }
       },
       "aggs":{                        <----- Aggregation Query Starts Here
          "myscope":{  
             "nested":{  
                "path":"tags"
             },
             "aggs":{  
                "uniqui_scope":{  
                   "terms":{  
                      "field":"tags.tag_scope.keyword",
                      "size":10
                   }
                }
             }
          }
       }
    }
    

    Aggregation Response:

    {
      "took": 53,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
      },
      "hits": {
        "total": 1,
        "max_score": 0,
        "hits": []
      },
      "aggregations": {
        "myscope": {
          "doc_count": 2,
          "uniqui_scope": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
              {
                "key": "coke",
                "doc_count": 1
              },
              {
                "key": "dev",
                "doc_count": 1
              },
              {
                "key": "pepsi",
                "doc_count": 1
              },
              {
                "key": "prod",
                "doc_count": 1
              }
            ]
          }
        }
      }
    }
    

    Hope this helps.