Search code examples
elasticsearchkuberneteslogstash

Logstash Indexing error “Index -1 out of bounds for length 0”


I want to use ELK stack to analyze some Kubernetes audit logs. They're sent to the Logstash webhook as JSON. Here's my config file :

input{
 http {
   port => 8888
   codec => "json"
   type => "json"
 }
}

filter{
 json {
   source => "message"
 }
}

output{
 file {
   path => "/home/ubuntu/logstash-kubernetes/audit.log"
 }
 elasticsearch{
   hosts => "localhost:9200"
   index => "kubernetes"
 }
}

The output file is populated with good looking entries, I'm happy with it. But when it comes to exporting to Elasticsearch, I'm getting the following error :

elasticsearch - Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"kubernetes", :routing=>nil, :_type=>"_doc"}, #<LogStash::Event:0x71740f4b>], :response=>{"index"=>{"_index"=>"kubernetes", "_type"=>"_doc", "_id"=>"x6QKJHYBkIKyNOo2Q-z8", "status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse", "caused_by"=>{"type"=>"array_index_out_of_bounds_exception", "reason"=>"Index -1 out of bounds for length 0"}}}}}

It only happens for some entries. Let me show you an generated entry that was processed without error :

{
  "_index": "kubernetes",
  "_type": "_doc",
  "_id": "xKQKJHYBkIKyNOo2POwJ",
  "_version": 1,
  "_score": 0,
  "_source": {
    "headers": {
      "content_type": "application/json",
      "request_method": "POST",
      "accept_encoding": "gzip",
      "request_path": "/",
      "http_host": "****:8888",
      "http_accept": "application/json, */*",
      "content_length": "2813",
      "http_version": "HTTP/1.1",
      "http_user_agent": "Go-http-client/1.1"
    },
    "@version": "1",
    "host": "****",
    "apiVersion": "audit.k8s.io/v1",
    "items": [
      {
        "requestObject": {
          "type": "Normal",
          "firstTimestamp": "2020-12-02T15:20:19Z",
          "involvedObject": {
            "namespace": "kube-system",
            "name": "kube-scheduler",
            "apiVersion": "v1",
            "resourceVersion": "2573",
            "uid": "0ebf0ea5-f8c0-475a-9c9e-57674edefe8d",
            "kind": "Endpoints"
          },
          "message": "****1_938d6a87-bccb-4ea0-bd6b-31fc27e48b7a became leader",
          "eventTime": null,
          "source": {
            "component": "default-scheduler"
          },
          "reason": "LeaderElection",
          "count": 1,
          "reportingInstance": "",
          "apiVersion": "v1",
          "reportingComponent": "",
          "lastTimestamp": "2020-12-02T15:20:19Z",
          "metadata": {
            "name": "kube-scheduler.164cef6d1f3ed55e",
            "namespace": "kube-system",
            "creationTimestamp": null
          },
          "kind": "Event"
        },
        "requestURI": "/api/v1/namespaces/kube-system/events",
        "responseStatus": {
          "code": 201,
          "metadata": {}
        },
        "user": {
          "groups": [
            "system:authenticated"
          ],
          "username": "system:kube-scheduler"
        },
        "annotations": {
          "authorization.k8s.io/reason": "RBAC: allowed by ClusterRoleBinding \"system:kube-scheduler\" of ClusterRole \"system:kube-scheduler\" to User \"system:kube-scheduler\"",
          "authorization.k8s.io/decision": "allow"
        },
        "stageTimestamp": "2020-12-02T15:20:19.090835Z",
        "userAgent": "kube-scheduler/v1.19.4 (linux/amd64) kubernetes/d360454/scheduler",
        "objectRef": {
          "namespace": "kube-system",
          "name": "kube-scheduler.164cef6d1f3ed55e",
          "apiVersion": "v1",
          "resource": "events"
        },
        "responseObject": {
          "type": "Normal",
          "firstTimestamp": "2020-12-02T15:20:19Z",
          "involvedObject": {
            "namespace": "kube-system",
            "name": "kube-scheduler",
            "apiVersion": "v1",
            "resourceVersion": "2573",
            "uid": "0ebf0ea5-f8c0-475a-9c9e-57674edefe8d",
            "kind": "Endpoints"
          },
          "message": "****1_938d6a87-bccb-4ea0-bd6b-31fc27e48b7a became leader",
          "eventTime": null,
          "source": {
            "component": "default-scheduler"
          },
          "reason": "LeaderElection",
          "count": 1,
          "reportingInstance": "",
          "apiVersion": "v1",
          "reportingComponent": "",
          "lastTimestamp": "2020-12-02T15:20:19Z",
          "metadata": {
            "name": "kube-scheduler.164cef6d1f3ed55e",
            "namespace": "kube-system",
            "creationTimestamp": "2020-12-02T15:20:19Z",
            "managedFields": [
              {
                "operation": "Update",
                "fieldsType": "FieldsV1",
                "time": "2020-12-02T15:20:19Z",
                "manager": "kube-scheduler",
                "apiVersion": "v1",
                "fieldsV1": {
                  "f:source": {
                    "f:component": {}
                  },
                  "f:lastTimestamp": {},
                  "f:reason": {},
                  "f:firstTimestamp": {},
                  "f:type": {},
                  "f:involvedObject": {
                    "f:kind": {},
                    "f:namespace": {},
                    "f:name": {},
                    "f:apiVersion": {},
                    "f:uid": {},
                    "f:resourceVersion": {}
                  },
                  "f:message": {},
                  "f:count": {}
                }
              }
            ],
            "resourceVersion": "2576",
            "selfLink": "/api/v1/namespaces/kube-system/events/kube-scheduler.164cef6d1f3ed55e",
            "uid": "573e4082-7c38-473c-9116-a114be97a47d"
          },
          "kind": "Event"
        },
        "level": "RequestResponse",
        "requestReceivedTimestamp": "2020-12-02T15:20:19.021481Z",
        "auditID": "cf291b3f-f5fa-4e43-a177-adf9cbfc0197",
        "sourceIPs": [
          "****"
        ],
        "stage": "ResponseComplete",
        "verb": "create"
      }
    ],
    "@timestamp": "2020-12-02T15:20:19.094Z",
    "type": "json",
    "metadata": {},
    "kind": "EventList"
  }
}

And here's the JSON for that error happens :

{
  "headers": {
    "content_type": "application/json",
    "request_method": "POST",
    "accept_encoding": "gzip",
    "request_path": "/",
    "http_host": "****:8888",
    "http_accept": "application/json, */*",
    "content_length": "2668",
    "http_version": "HTTP/1.1",
    "http_user_agent": "Go-http-client/1.1"
  },
  "@version": "1",
  "host": "****",
  "apiVersion": "audit.k8s.io/v1",
  "items": [
    {
      "requestObject": {
        "metadata": {
          "name": "kube-scheduler",
          "namespace": "kube-system",
          "creationTimestamp": "2020-12-02T14:36:33Z",
          "annotations": {
            "control-plane.alpha.kubernetes.io/leader": "{\"holderIdentity\":\"****38d6a87-bccb-4ea0-bd6b-31fc27e48b7a\",\"leaseDurationSeconds\":15,\"acquireTime\":\"2020-12-02T15:18:52Z\",\"renewTime\":\"2020-12-02T15:18:52Z\",\"leaderTransitions\":67}"
          },
          "managedFields": [
            {
              "operation": "Update",
              "fieldsType": "FieldsV1",
              "time": "2020-12-02T15:18:23Z",
              "manager": "kube-scheduler",
              "apiVersion": "v1",
              "fieldsV1": {
                "f:metadata": {
                  "f:annotations": {
                    ".": {},
                    "f:control-plane.alpha.kubernetes.io/leader": {}
                  }
                }
              }
            }
          ],
          "resourceVersion": "2531",
          "selfLink": "/api/v1/namespaces/kube-system/endpoints/kube-scheduler",
          "uid": "0ebf0ea5-f8c0-475a-9c9e-57674edefe8d"
        },
        "apiVersion": "v1",
        "kind": "Endpoints"
      },
      "requestURI": "/api/v1/namespaces/kube-system/endpoints/kube-scheduler?timeout=10s",
      "responseStatus": {
        "code": 200,
        "metadata": {}
      },
      "user": {
        "groups": [
          "system:authenticated"
        ],
        "username": "system:kube-scheduler"
      },
      "annotations": {
        "authorization.k8s.io/reason": "RBAC: allowed by ClusterRoleBinding \"system:kube-scheduler\" of ClusterRole \"system:kube-scheduler\" to User \"system:kube-scheduler\"",
        "authorization.k8s.io/decision": "allow"
      },
      "stageTimestamp": "2020-12-02T15:18:52.067347Z",
      "userAgent": "kube-scheduler/v1.19.4 (linux/amd64) kubernetes/d360454/leader-election",
      "objectRef": {
        "namespace": "kube-system",
        "name": "kube-scheduler",
        "apiVersion": "v1",
        "resourceVersion": "2531",
        "uid": "0ebf0ea5-f8c0-475a-9c9e-57674edefe8d",
        "resource": "endpoints"
      },
      "responseObject": {
        "metadata": {
          "name": "kube-scheduler",
          "namespace": "kube-system",
          "creationTimestamp": "2020-12-02T14:36:33Z",
          "annotations": {
            "control-plane.alpha.kubernetes.io/leader": "{\"holderIdentity\":\"****1_938d6a87-bccb-4ea0-bd6b-31fc27e48b7a\",\"leaseDurationSeconds\":15,\"acquireTime\":\"2020-12-02T15:18:52Z\",\"renewTime\":\"2020-12-02T15:18:52Z\",\"leaderTransitions\":67}"
          },
          "managedFields": [
            {
              "operation": "Update",
              "fieldsType": "FieldsV1",
              "time": "2020-12-02T15:18:52Z",
              "manager": "kube-scheduler",
              "apiVersion": "v1",
              "fieldsV1": {
                "f:metadata": {
                  "f:annotations": {
                    ".": {},
                    "f:control-plane.alpha.kubernetes.io/leader": {}
                  }
                }
              }
            }
          ],
          "resourceVersion": "2542",
          "selfLink": "/api/v1/namespaces/kube-system/endpoints/kube-scheduler",
          "uid": "0ebf0ea5-f8c0-475a-9c9e-57674edefe8d"
        },
        "apiVersion": "v1",
        "kind": "Endpoints"
      },
      "level": "RequestResponse",
      "requestReceivedTimestamp": "2020-12-02T15:18:52.062449Z",
      "auditID": "a45860c7-e0c9-4724-be2e-705a1b2955f4",
      "sourceIPs": [
        "****"
      ],
      "stage": "ResponseComplete",
      "verb": "update"
    }
  ],
  "@timestamp": "2020-12-02T15:20:22.001Z",
  "type": "json",
  "metadata": {},
  "kind": "EventList"
}

(I removed some personnal data from the JSONs) Both JSONs appears as valid. I noticed that the JSONs with responseStatus.code = 201 were correctly processed, and those with code = 200 were throwing this error.

I'm getting a bit mad so if anyone can help with that, I'd be very pleased !


Solution

  • The error is thrown on this line because subfields has a length of 0 and hence it is trying to access index -1, which is not allowed.

    The method that splits the field names into pathsis called splitAndValidatePath and what it does is to split field names when it encounters . (i.e. a dot).

    In your second document, you have two fields called "." (i.e. it is just a dot) and they are not really valid.

    ".": {},
    

    If you remove them both, your document will be indexed correctly.

    Another way of solving this is to disable indexing for the parent f:annotations object field and that will do the trick... provided, of course, you don't need to search for content within f:annotations.