Search code examples
elasticsearch

Calculate percentage based on status of max per group in Elasticsearch


Given the dataset below, I'd like to calculate percentage of status over unique count of workflow.

id,workflow,status
1,A,FAILURE
2,A,ABORTED
3,A,SUCCESS
4,A,SUCCESS
1,B,FAILURE
2,B,SUCCESS
3,B,FAILURE
1,C,FAILURE
2,C,FAILURE
1,D,FAILURE
2,D,SUCCESS
3,D,FAILURE
4,D,FAILURE
5,D,FAILURE

Always refer to the status of max id.

A - SUCCESS (refer to max id 4)
B - FAILURE (refer to max id 3)
C - FAILURE (refer to max id 2)
D - FAILURE (refer to max id 5)

Expected aggregated results as follow:

unique count of workflow: 4   // workflow A, B, C, and D
SUCCESS: (1/4) * 100 = 25%    // workflow A
FAILURE: (3/4) * 100 = 75%    // workflow B, C, and D

Appreciate it if you can advise the query to get the 25% (SUCCESS) and 75% (FAILURE).


Solution

  • DELETE test
    PUT test
    {
      "settings": {
        "number_of_replicas": 0,
        "number_of_shards": 1
      },
      "mappings": {
        "properties": {
          "id": {
            "type": "long"
          },
          "status": {
            "type": "keyword"
          },
          "status_success": {
            "type": "long",
            "script": {
              "source": "if (doc['status'].value == 'SUCCESS') {emit(1)} else {emit(0)}"
            }
          },
          "status_failure": {
            "type": "long",
            "script": {
              "source": "if (doc['status'].value == 'FAILURE') {emit(1)} else {emit(0)}"
            }
          },
          "workflow": {
            "type": "keyword"
          }
        }
      }
    }
    
    POST test/_bulk?refresh
    {"index":{}}
    {"id": 1, "workflow": "A", "status": "FAILURE"}
    {"index":{}}
    {"id": 2, "workflow": "A", "status": "ABORTED"}
    {"index":{}}
    {"id": 3, "workflow": "A", "status": "SUCCESS"}
    {"index":{}}
    {"id": 4, "workflow": "A", "status": "SUCCESS"}
    {"index":{}}
    {"id": 1, "workflow": "B", "status": "FAILURE"}
    {"index":{}}
    {"id": 2, "workflow": "B", "status": "SUCCESS"}
    {"index":{}}
    {"id": 3, "workflow": "B", "status": "FAILURE"}
    {"index":{}}
    {"id": 1, "workflow": "C", "status": "FAILURE"}
    {"index":{}}
    {"id": 2, "workflow": "C", "status": "FAILURE"}
    {"index":{}}
    {"id": 1, "workflow": "D", "status": "FAILURE"}
    {"index":{}}
    {"id": 2, "workflow": "D", "status": "SUCCESS"}
    {"index":{}}
    {"id": 3, "workflow": "D", "status": "FAILURE"}
    {"index":{}}
    {"id": 4, "workflow": "D", "status": "FAILURE"}
    {"index":{}}
    {"id": 5, "workflow": "D", "status": "FAILURE"}
    
    
    GET test/_search
    {
      "size": 0,
      "aggs": {
        "by_workflow": {
          "terms": {
            "field": "workflow"
          },
          "aggs": {
            "top_status": {
              "top_metrics": {
                "metrics": [
                  {"field": "status_success"},
                  {"field": "status_failure"}
                  ],
                  "sort": {
                    "id": "desc"
                  }
              }
            }
          }
        },
        "success_rate": {
          "avg_bucket": {
            "buckets_path": "by_workflow>top_status[status_success]",
            "gap_policy": "skip",
            "format": "#%"
          }
        },
        "failure_rate": {
          "avg_bucket": {
            "buckets_path": "by_workflow>top_status[status_failure]",
            "gap_policy": "skip",
            "format": "#%"
          }
        }
      }
    }