Search code examples
elasticsearchlogstashelastic-stacklogstash-configurationkibana-6

Kibana not showing the correct data while choosing with timestamp & received_at


I have Below logstash.conf file, where i saw the data was processing correctly but today i see very weird issues where indices for noi-syslog are not showing the correct syslog_timestamp .

input {
  file {
    path => [ "/scratch/rsyslog/*/messages.log" ]
    start_position => beginning
    sincedb_path => "/dev/null"
    max_open_files => 64000
    type => "noi-syslog"
  }
  file {
    path => [ "/scratch/rsyslog_CISCO/*/network.log" ]
    start_position => beginning
    sincedb_path => "/dev/null"
    max_open_files => 64000
    type => "apic_logs"
  }
}

filter {
  if [type] == "noi-syslog" {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp } %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
    }
    syslog_pri { }
    date {
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
 }
}
  if [type] == "apic_logs" {
    grok {
      match => { "message" => "%{CISCOTIMESTAMP:syslog_timestamp} %{CISCOTIMESTAMP} %{SYSLOGHOST:syslog_hostname} (?<prog>[\w._/%-]+) %{SYSLOG5424SD:fault_code}%{SYSLOG5424SD:fault_state}%{SYSLOG5424SD:crit_info}%{SYSLOG5424SD:log_severity}%{SYSLOG5424SD:log_info} %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
    }
    syslog_pri { }
    date {
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
  }
 }
}
output {
        if [type] == "noi-syslog" {
        elasticsearch {
                hosts => "noida-elk:9200"
                manage_template => false
                index => "noi-syslog-%{+YYYY.MM.dd}"
                document_type => "messages"
  }
 }
}

output {
        if [type] == "apic_logs" {
        elasticsearch {
                hosts => "noida-elk:9200"
                manage_template => false
                index => "apic_logs-%{+YYYY.MM.dd}"
                document_type => "messages"
  }
 }
}

Indices For the apic_logs & noi-syslog:

$ curl -s -XGET http://127.0.0.1:9200/_cat/indices?v |  grep apic_logs
green  open   noi-syslog-2019.03.13           Fz1Rht65QDCYCshmSjWO4Q   5   1    6845696            0      2.2gb            1gb
green  open   noi-rmlog-2019.03.13            W_VW8Y1eTWq-TKHAma3DLg   5   1     148613            0     92.6mb           45mb
green  open   apic_logs-2019.03.13            pKz61TS5Q-W2yCsCtrVvcQ   5   1    1606765            0    788.6mb        389.7mb

The Kibana page showing correctly all the fields while choosing with @timesatmp for apic_logs Index but not working correctly For Linux system Logs Index noi-syslog.

enter image description here

The noi-syslog not showing all the fields while choosing with @timestamp however showing _grokparsefailure tag, anothering fact is that when choosing received_at for the same noi-syslog its showing all the fields but not showing the timely data.

Below is the Image showing chosen with received_at

enter image description here

Below is the Image showing chosen with @timestamp

enter image description here

In the logstash logs:

# tail -5 log-cohort_deprecation.log
[2019-03-13T20:16:29,112][WARN ][o.e.d.a.a.i.t.p.PutIndexTemplateRequest] [noida-elk.cadence.com] Deprecated field [template] used, replaced by [index_patterns]
[2019-03-13T20:16:30,548][WARN ][o.e.d.a.a.i.t.p.PutIndexTemplateRequest] [noida-elk.cadence.com] Deprecated field [template] used, replaced by [index_patterns]
[2019-03-13T20:19:45,935][WARN ][o.e.d.a.a.i.t.p.PutIndexTemplateRequest] [noida-elk.cadence.com] Deprecated field [template] used, replaced by [index_patterns]
[2019-03-13T20:19:48,644][WARN ][o.e.d.a.a.i.t.p.PutIndexTemplateRequest] [noida-elk.cadence.com] Deprecated field [template] used, replaced by [index_patterns]
[2019-03-13T20:20:13,069][WARN ][o.e.d.a.a.i.t.p.PutIndexTemplateRequest] [noida-elk.cadence.com] Deprecated field [template] used, replaced by [index_patterns]

The Memory usage on the System:

             total       used       free     shared    buffers     cached
Mem:         32057      31794        263          0        210      18206
-/+ buffers/cache:      13378      18679
Swap:       102399        115     102284

Total memory 32GB i have assigned 8GB to each Elastic & Logstash, i doubt if this is causing the issue.

Work-around for dropping the grokparsefailure tag:

filter {
  if [type] == "noi-syslog" {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
    }
    syslog_pri { }
    date {
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
 }
 if "_grokparsefailure" in [tags] {
         drop { }
 }
}

1- OR an alternative just an idea..

filter {
  if [type] == "noi-syslog" {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
    }
    syslog_pri { }
    date {
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
 }
  if "_grokparsefailure" in [tags] {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp } %{SYSLOGHOST:syslog_hostname} %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
   }
  }     
 }
}

2- OR another alternative just an idea..

filter {
  if [type] == "noi-syslog" {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
    }
    syslog_pri { }
    date {
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
 }
  elif "_grokparsefailure" in [tags] {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp } %{SYSLOGHOST:syslog_hostname} %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      remove_field => [ "host", "path" ]
   }
   else "_grokparsefailure" in [tags] {
         drop { }
  }
 }

Solution

  • The problem here is that the messages on your examples for the type noi-syslog are different from each other and your grok filter only works for the first one, when grok fails to parse a message it adds a tag named _grokparsefailure.

    Your first examples that works on grok is:

    Mar 13 15:55:02 hostname /usr/bin/crontab[32708]: (root) LIST (root)
    

    The second example that fails grok is:

    Mar 12 11:01:02 hostname run-parts(/etc/cron.hourly)[3970 starting mcelog.cron
    

    This second message is wrong, it is missing a closing bracket (]) and a colon (:) after the PID 3970, so your grok pattern does not work.

    Since your grok failed, the field syslog_timestamp does not exist, so your date filter has nothing to do and @timestamp will be set to the time when the event entered the logstash pipeline.

    You need to have a grok pattern to each pattern of messages you have, one quick way to correct the syslog_timestamp is to catch the message who failed grok and apply another grok filter to get the syslog_timestamp field and the rest of the message in another field.

    Try to add the following conditional to your pipeline.

    if "_grokparsefailure" in [tags] {
      grok {
          match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp } %{SYSLOGHOST:syslog_hostname} %{GREEDYDATA:rest_of_syslog_message}" }
          add_field => [ "received_at", "%{@timestamp}" ]
          remove_field => [ "host", "path" ]
       } 
    }
    

    The result of this grok will be like this:

    {
      "syslog_hostname": "hostname",
      "syslog_timestamp": "Mar 12 11:01:02",
      "rest_of_syslog_message": "run-parts(/etc/cron.hourly)[3970 starting mcelog.cron"
    }