Search code examples
elasticsearchlogstashelasticsearch-7google-cloud-storage

How to export nested fields in Elasticsearch Index as CSV file to Google Cloud Storage Using Logstash


I am using ElasticSearch, here we are creating the day wise index and huge amount of data is being ingested every minute. wanted to export few fields from index created every day to Google cloud storage, I am able to get direct fields from index, How to get fields from nested objects in elastic search index and send them as csv file to GCS bucket using Logstash

Tried below conf to fetch nested fields from index, it didnt work and giving empty values in output csv file:

input {

 elasticsearch {

    hosts => "host:443"

    user => "user"

    ssl => true

    connect_timeout_seconds => 600

    request_timeout_seconds => 600

    password => "pwd"

    ca_file => "ca.crt"

    index => "test"

    query => '

    {
    "_source": ["obj1.Name","obj1.addr","obj1.obj2.location"],

    "query": {

    "match_all": {}

    }

    }

  '

  }

}

filter {
mutate {
    rename => {
        "obj1.Name" => "col1"
        "obj1.addr" => "col2"
        "obj1.obj2.location" => "col3"
    }
  }
 }


output {
   google_cloud_storage {
   codec => csv {
    include_headers => true
    columns => [ "col1", "col2","col3"]
   }
     bucket => "bucket"
     json_key_file => "creds.json"
     temp_directory => "/tmp"
     log_file_prefix => "log_gcs"
     max_file_size_kbytes => 1024
     date_pattern => "%Y-%m-%dT%H:00"
     flush_interval_secs => 600
     gzip => false
     uploader_interval_secs => 600
     include_uuid => true
     include_hostname => true
   }
}

How to get field populated to above csv from array of objects, in below example i wanted to fetch categoryUrl:

"Hierarchy" : [
            {
              "level" : "1",
              "category" : "test",
              "categoryUrl" : "testurl1"
            },
            {
              "level" : "2",
              "category" : "test2",
              "categoryUrl" : "testurl2"
            }}

Solution

  • You need to use the Logstash field notation

    mutate {
        rename => {
            "[obj1][Name]" => "col1"
            "[obj1][addr]" => "col2"
            "[obj1][obj2][location]" => "col3"
        }
      }
     }