Search code examples
elasticsearchkibanaelastic-stackhaproxyfilebeat

Filebeat + Haproxy + TCP + Provided Grok expressions do not match field value


I am using Filebeat to stream the Haproxy logs to Elasticsearch.

Filebeat Version: 7.17
Elasticsearch Version: 7.17
Haproxy Version: 1.8

My Haproxy configuration is as below:

global
log 127.0.0.1 local0
daemon
maxconn 2048 

defaults
log global
timeout connect 500000ms
timeout client 86400s
timeout server 86400s

frontend front-https-servers
mode tcp
option tcplog
bind *:443
capture request header Host len 64
default_backend back-https-servers

listen stats
    bind :1936
    mode http
    stats enable
    stats realm Haproxy\ Statistics
    stats uri /

backend back-https-servers
mode tcp
capture response header Content-Length len 64
balance leastconn
stick-table type ip size 1m expire 8h
stick on src
option ssl-hello-chk
server server1 X.X.X.X:443 check
server server2 X.X.X.X:443 backup

frontend front-ssh-servers
mode tcp
option tcplog
bind *:22
default_backend back-ssh-servers
timeout client 8h

backend back-ssh-servers
mode tcp
balance leastconn
stick-table type ip size 1m expire 8h
stick on src
server server1 X.X.X.X:22 check
server server2 X.X.X.X:22 check
server server3 X.X.X.X:22 backup

The Filebeat haproxy module configuration is as below:

- module: haproxy
  log:
    enabled: true
    var.input: "file"
    var.paths: ["/var/log/haproxy.log"]

A sample log entry looks like the one below.

2023-01-24T23:02:27-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:2122 [24/Jan/2023:23:02:22.111] front-https-servers back-https-servers/server1 1/0/5836 8054 -- 3/1/0/0/0 0/0
2023-01-24T23:02:28-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:43940 [24/Jan/2023:23:02:26.223] front-ssh-servers back-ssh-servers/server1 1/0/2724 5617 -- 2/1/0/0/0 0/0

The log entry in Kibana looks like the one below.

{
  "_index": "scm-logs-2023.01.25",
  "_type": "_doc",
  "_id": "ITaF6YUBNucnOQxxoZvg",
  "_version": 1,
  "_score": 1,
  "_source": {
    "agent": {
      "hostname": "XXXXX",
      "name": "XXXXXX",
      "id": "ee7f39d8-ff25-4f87-b8d4-9c6c376ed780",
      "ephemeral_id": "ae88b18c-8d01-4c9b-a220-1b694aa7bcfa",
      "type": "filebeat",
      "version": "7.17.8"
    },
    "log": {
      "file": {
        "path": "/var/log/haproxy.log"
      },
      "offset": 199009
    },
    "message": "2023-01-25T07:23:36-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:5540 [25/Jan/2023:07:23:36.430] front-https-servers back-https-servers/server1 1/0/10 9261 -- 2/1/0/0/0 0/0",
    "fileset": {
      "name": "log"
    },
    "error": {
      "message": "Provided Grok expressions do not match field value: [2023-01-25T07:23:36-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:5540 [25/Jan/2023:07:23:36.430] front-https-servers back-https-servers/server1 1/0/10 9261 -- 2/1/0/0/0 0/0]"
    },
    "input": {
      "type": "log"
    },
    "@timestamp": "2023-01-25T15:23:45.177Z",
    "ecs": {
      "version": "1.12.0"
    },
    "service": {
      "type": "haproxy"
    },
    "host": {
      "hostname": "XXXXXXX",
      "os": {
        "kernel": "4.18.0-372.32.1.el8_6.x86_64",
        "codename": "Ootpa",
        "name": "Red Hat Enterprise Linux",
        "type": "linux",
        "family": "redhat",
        "version": "8.6 (Ootpa)",
        "platform": "rhel"
      },
      "ip": ["X.X.X.X", "fe80::250:56ff:fea3:1b47"],
      "containerized": false,
      "name": "XXXXXXX",
      "id": "ca808918bd214e6e8e9b71323ca2337c",
      "mac": ["00:50:56:a3:1b:47"],
      "architecture": "x86_64"
    },
    "event": {
      "ingested": "2023-01-25T15:22:07.197619647Z",
      "timezone": "-08:00",
      "module": "haproxy",
      "dataset": "haproxy.log"
    }
  },
  "fields": {
    "host.os.name.text": ["Red Hat Enterprise Linux"],
    "host.hostname": ["XXXXXX"],
    "host.mac": ["00:50:56:a3:1b:47"],
    "service.type": ["haproxy"],
    "host.ip": ["X.X.X.X", "fe80::250:56ff:fea3:1b47"],
    "agent.type": ["filebeat"],
    "event.module": ["haproxy"],
    "host.os.version": ["8.6 (Ootpa)"],
    "host.os.kernel": ["4.18.0-372.32.1.el8_6.x86_64"],
    "host.os.name": ["Red Hat Enterprise Linux"],
    "agent.name": ["XXXXXX"],
    "host.name": ["XXXXXXX"],
    "host.id": ["ca808918bd214e6e8e9b71323ca2337c"],
    "event.timezone": ["-08:00"],
    "host.os.type": ["linux"],
    "fileset.name": ["log"],
    "host.os.codename": ["Ootpa"],
    "input.type": ["log"],
    "log.offset": [199009],
    "agent.hostname": ["XXXXX"],
    "message": [
      "2023-01-25T07:23:36-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:5540 [25/Jan/2023:07:23:36.430] front-https-servers back-https-servers/server1 1/0/10 9261 -- 2/1/0/0/0 0/0"
    ],
    "host.architecture": ["x86_64"],
    "event.ingested": ["2023-01-25T15:22:07.197Z"],
    "@timestamp": ["2023-01-25T15:23:45.177Z"],
    "agent.id": ["ee7f39d8-ff25-4f87-b8d4-9c6c376ed780"],
    "ecs.version": ["1.12.0"],
    "host.containerized": [false],
    "host.os.platform": ["rhel"],
    "error.message": [
      "Provided Grok expressions do not match field value: [2023-01-25T07:23:36-08:00 127.0.0.1 haproxy[2284158]: X.X.X.X:5540 [25/Jan/2023:07:23:36.430] front-https-servers back-https-servers/server1 1/0/10 9261 -- 2/1/0/0/0 0/0]"
    ],
    "log.file.path": ["/var/log/haproxy.log"],
    "agent.ephemeral_id": ["ae88b18c-8d01-4c9b-a220-1b694aa7bcfa"],
    "agent.version": ["7.17.8"],
    "host.os.family": ["redhat"],
    "event.dataset": ["haproxy.log"]
  }
}

I have been trying different combinations of the headers and log format without much luck. Appreciate any pointers.

Below is the grok pattern from Ingest pipeline.

{
  "grok": {
    "field": "message",
    "patterns": [
      "%{HAPROXY_DATE:haproxy.request_date} %{IPORHOST:haproxy.source} %{PROG:process.name}(?:\\[%{POSINT:process.pid:long}\\])?: %{GREEDYDATA} (%{IPORHOST:source.address}|-):%{POSINT:source.port:long} %{WORD} %{IPORHOST:destination.ip}:%{POSINT:destination.port:long} \\(%{WORD:haproxy.frontend_name}/%{WORD:haproxy.mode}\\)",
      "(%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?(%{IP:source.address}|-):%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} (%{IPORHOST:destination.address} )?%{NUMBER:haproxy.http.request.time_wait_ms:long}/%{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:haproxy.http.request.time_wait_without_data_ms:long}/%{NUMBER:temp.duration:long} %{NUMBER:http.response.status_code:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.http.request.captured_cookie} %{NOTSPACE:haproxy.http.response.captured_cookie} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long} (\\{%{DATA:haproxy.http.request.captured_headers}\\} \\{%{DATA:haproxy.http.response.captured_headers}\\} |\\{%{DATA}\\} )?\"%{GREEDYDATA:haproxy.http.request.raw_request_line}\"",
      "(%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?(%{IP:source.address}|-):%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name}/%{NOTSPACE:haproxy.bind_name} %{GREEDYDATA:haproxy.error_message}",
      "%{HAPROXY_DATE} %{IPORHOST:haproxy.source} (%{NOTSPACE:process.name}\\[%{NUMBER:process.pid:long}\\]: )?(%{IP:source.address}|-):%{NUMBER:source.port:long} \\[%{NOTSPACE:haproxy.request_date}\\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} %{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:temp.duration:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long}"
    ],
    "ignore_missing": false,
    "pattern_definitions": {
      "HAPROXY_DATE": "(%{MONTHDAY}[/-]%{MONTH}[/-]%{YEAR}:%{HOUR}:%{MINUTE}:%{SECOND})|%{SYSLOGTIMESTAMP}"
    }
  }
}

Solution

  • Tldr;

    I think you do not use a standard format for your logs. The be able to parse those logs you will need to tweak the Grok pattern

    Solution

    The following grok pattern should work:

    %{TIMESTAMP_ISO8601:haproxy.request_date} %{IPORHOST:haproxy.source} (%{NOTSPACE:process.name}\[%{NUMBER:process.pid:long}\]: )?(%{IP:source.address}|-):%{NUMBER:source.port:long} \[%{NOTSPACE:haproxy.request_date}\] %{NOTSPACE:haproxy.frontend_name} %{NOTSPACE:haproxy.backend_name}/%{NOTSPACE:haproxy.server_name} %{NUMBER:haproxy.total_waiting_time_ms:long}/%{NUMBER:haproxy.connection_wait_time_ms:long}/%{NUMBER:temp.duration:long} %{NUMBER:haproxy.bytes_read:long} %{NOTSPACE:haproxy.termination_state} %{NUMBER:haproxy.connections.active:long}/%{NUMBER:haproxy.connections.frontend:long}/%{NUMBER:haproxy.connections.backend:long}/%{NUMBER:haproxy.connections.server:long}/%{NUMBER:haproxy.connections.retries:long} %{NUMBER:haproxy.server_queue:long}/%{NUMBER:haproxy.backend_queue:long}
    

    I tested on the following logs:

    2023-01-24T23:02:27-08:00 127.0.0.1 haproxy[2284158]: 1.1.1.1:2122 [24/Jan/2023:23:02:22.111] front-https-servers back-https-servers/server1 1/0/5836 8054 -- 3/1/0/0/0 0/0
    2023-01-24T23:02:28-08:00 127.0.0.1 haproxy[2284158]: 1.1.1.1:43940 [24/Jan/2023:23:02:26.223] front-ssh-servers back-ssh-servers/server1 1/0/2724 5617 -- 2/1/0/0/0 0/0
    

    Where the IP used to be X.X.X.X but would, I assume in your logs they contains proper value and not the X