Search code examples
elasticsearchlogstashkibanakibana-4

how to extract a portion of a field and store it into another field in logstash filter?


I have syslog files. I am using logstash syslog plugin filter to process those logs. I am getting messages in the syslog_message field like this one:

syslog_message:[cdp/interface.ERR] - {- -} Error writing CDP frame on system. Link is down

But I want a new field which should contain only the portion before .ERR i.e "cdp/interface". This portion varies and I dont know how many types will occur. So I want a new field and want to store that portion on to that field, so that I can plot count of those terms in kibana.

current logstash configuration:

filter {
  if [type] == "syslog" {
    grok {
      match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
      add_field => [ "received_at", "%{@timestamp}" ]
      add_field => [ "received_from", "%{host}" ]

    }
    date {
      target => "syslog_timestamp"
      match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
    }

After adding a new grok to configuration file as suggested by @Val

grok {
  match => { "syslog_message" => "\[%{PROG:syslog_type}\.%{LOGLEVEL:syslog_level}\] %{GREEDYDATA:syslog_message}" }
}

For some of the inputs grokparsefailure happens.
Some lines for which it fails are :

syslog_message: [hald.INFO]: Support query handler called
syslog_message:[mgmtd.INFO]: Finished database commit

Any help will be appreciated. Thanks


Solution

  • You can either add one more pattern in your main grok filter or add another grok filter just for the syslog_message field

    filter {
      if [type] == "syslog" {
        grok {
          match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
          add_field => [ "received_at", "%{@timestamp}" ]
          add_field => [ "received_from", "%{host}" ]
    
        }
        # ====> add this <====
        grok {
          match => { "syslog_message" => "\[%{PROG:syslog_type}\.%{LOGLEVEL:syslog_level}\]:? %{GREEDYDATA:syslog_message}" }
    
        }
        date {
          target => "syslog_timestamp"
          match => [ "syslog_timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
        }
    

    As a result, you'd get two additional fields, namely:

    • syslog_type: cdp/interface
    • syslog_level: ERR

    and syslog_message would only contain this - {- -} Error writing CDP frame on system. Link is down