Search code examples
elasticsearchkibana

Elasticsearch How to count total docs by date


As my theme, I wanna count docs the day and before by date, it's sample to understand that the chart.

{"index":{"_index":"login-2015.12.23","_type":"logs"}}
{"uid":"1","register_time":"2015-12-23T12:00:00Z","login_time":"2015-12-23T12:00:00Z"}
{"index":{"_index":"login-2015.12.23","_type":"logs"}}
{"uid":"2","register_time":"2015-12-23T12:00:00Z","login_time":"2015-12-23T12:00:00Z"}
{"index":{"_index":"login-2015.12.24","_type":"logs"}}
{"uid":"1","register_time":"2015-12-23T12:00:00Z","login_time":"2015-12-24T12:00:00Z"}
{"index":{"_index":"login-2015.12.25","_type":"logs"}}
{"uid":"1","register_time":"2015-12-23T12:00:00Z","login_time":"2015-12-25T12:00:00Z"}

As you see, index login-2015.12.23 has two docs, index login-2015.12.24 has one doc, index login-2015.12.23 has one doc.

And now I wanna get the result

{
"hits" : {
    "total" : 6282,
    "max_score" : 1.0,
    "hits" : []
  },
  "aggregations" : {
    "group_by_date" : {
      "buckets" : [
        {
          "key_as_string" : "2015-12-23T12:00:00Z",
          "key" : 1662163200000,
          "doc_count" : 2,
        },
        {
          "key_as_string" : "2015-12-24T12:00:00Z",
          "key" : 1662163200000,
          "doc_count" : 3,
        },
        {
          "key_as_string" : "2015-12-25T12:00:00Z",
          "key" : 1662163200000,
          "doc_count" : 4,
        }
      ]
}

If I count the date 2015-12-24T12:00:00Z and it means I must count day 2015-12-23T12:00:00Z and 2015-12-24T12:00:00Z at the same time. In my project I have many indices like that, and I searching many ways to make this goal come true but not, this is my demo:

{
  "query": {"match_all": {}},
  "size": 0,
  "aggs": {
    "group_by_date": {
      "date_histogram": {
        "field": "timestamp", 
        "interval": "day"
      },
      "aggs": {
        "intersect": {
          "scripted_metric": {
            "init_script": "state.inner=[]",
            "map_script": "state.inner.add(params.param1 == 3 ? params.param2 * params.param1 : params.param1 * params.param2)",
            "combine_script": "return state.inner",
            "reduce_script": "return states",
            "params": {
              "param1": 3,
              "param2": 5
            }
          }
        }
      }
    }
  }
}

I wanna group by date, and use scripted_metric to iter the date list, not the second iteration just can in its bucket and not for all the document, so do anyone has better idea to solve this problem?


Solution

  • You can simply use the cumulative sum pipeline aggregation

    {
      "query": {"match_all": {}},
      "size": 0,
      "aggs": {
        "group_by_date": {
          "date_histogram": {
            "field": "login_time", 
            "interval": "day"
          },
          "aggs": {
            "cumulative_docs": {
              "cumulative_sum": {
                "buckets_path": "_count" 
              }
            }
          }
        }
      }
    }
    

    And the results will look like this:

      "aggregations" : {
        "group_by_date" : {
          "buckets" : [
            {
              "key_as_string" : "2015-12-23T00:00:00.000Z",
              "key" : 1450828800000,
              "doc_count" : 2,
              "cumulative_docs" : {
                "value" : 2.0
              }
            },
            {
              "key_as_string" : "2015-12-24T00:00:00.000Z",
              "key" : 1450915200000,
              "doc_count" : 1,
              "cumulative_docs" : {
                "value" : 3.0
              }
            },
            {
              "key_as_string" : "2015-12-25T00:00:00.000Z",
              "key" : 1451001600000,
              "doc_count" : 1,
              "cumulative_docs" : {
                "value" : 4.0
              }
            }
          ]
        }
      }