Search code examples
loggingdevopsfluentd

How to define hash (JSON or object) in Fluentd configuration?


I have a problem with formatting labels for Google Cloud plugin for Fluentd. I want to set logging.googleapis.com/labels field with my own labels. That's what Google Cloud plugin documentation says about this field:

The value of this field should be a structured record.

Fluentd documentation mentions that there is a hash data type:

hash: the field is parsed as a JSON object. It also supports the shorthand syntax. These are the same values:

  • normal: {"key1": "value1", "key2": "value2"}
  • shorthand: key1:value1,key2:value2

So, I tried this (quotes + braces):

<filter app.**>
  @type record_transformer

  renew_record true

  <record>
    severity ${record["severity"]}
    message ${record["message"]}
    logging.googleapis.com/trace ${record["trace"]}
    logging.googleapis.com/spanId ${record["spanId"]}
    logging.googleapis.com/labels {
      "kubernetes-host": "${record.dig("kubernetes", "host")}",
      "kubernetes-pod-name": "${record.dig("kubernetes", "pod_name")}",
      "kubernetes-pod-id": "${record.dig("kubernetes", "pod_id")}",
      "kubernetes-pod-ip": "${record.dig("kubernetes", "pod_ip")}",
      "kubernetes-container-name": "${record.dig("kubernetes", "container_name")}",
      "kubernetes-namespace-name": "${record.dig("kubernetes", "namespace_name")}",
      "kubernetes-namespace-id": "${record.dig("kubernetes", "namespace_id")}"
    }
  </record>
</filter>

Also I tried this (no quotes, but with braces):

<filter app.**>
  @type record_transformer

  renew_record true

  <record>
    severity ${record["severity"]}
    message ${record["message"]}
    logging.googleapis.com/trace ${record["trace"]}
    logging.googleapis.com/spanId ${record["spanId"]}
    logging.googleapis.com/labels {
      "kubernetes-host": ${record.dig("kubernetes", "host")},
      "kubernetes-pod-name": ${record.dig("kubernetes", "pod_name")},
      "kubernetes-pod-id": ${record.dig("kubernetes", "pod_id")},
      "kubernetes-pod-ip": ${record.dig("kubernetes", "pod_ip")},
      "kubernetes-container-name": ${record.dig("kubernetes", "container_name")},
      "kubernetes-namespace-name": ${record.dig("kubernetes", "namespace_name")},
      "kubernetes-namespace-id": ${record.dig("kubernetes", "namespace_id")}
    }
  </record>
</filter>

Also I tried this (no quotes and no braces):

<filter app.**>
  @type record_transformer

  renew_record true

  <record>
    severity ${record["severity"]}
    message ${record["message"]}
    logging.googleapis.com/trace ${record["trace"]}
    logging.googleapis.com/spanId ${record["spanId"]}
    logging.googleapis.com/labels {
      "kubernetes-host": record.dig("kubernetes", "host"),
      "kubernetes-pod-name": record.dig("kubernetes", "pod_name"),
      "kubernetes-pod-id": record.dig("kubernetes", "pod_id"),
      "kubernetes-pod-ip": record.dig("kubernetes", "pod_ip"),
      "kubernetes-container-name": record.dig("kubernetes", "container_name"),
      "kubernetes-namespace-name": record.dig("kubernetes", "namespace_name"),
      "kubernetes-namespace-id": record.dig("kubernetes", "namespace_id")
    }
  </record>
</filter>

But each time Fluentd failed with following error:

/opt/bitnami/fluentd/gems/fluentd-1.12.0/lib/fluent/config/basic_parser.rb:92:in `parse_error!': got incomplete JSON hash configuration at fluentd.conf line 92,9 (Fluent::ConfigParseError)

My entire config (92th line from the error message is the last line of the config with </match>):

# Ignore fluentd own events
<match fluent.**>
  @type null
</match>

# TCP input to receive logs from
<source>
  @type forward

  bind 0.0.0.0
  port 24224
</source>

# HTTP input for the liveness and readiness probes
<source>
  @type http

  bind 0.0.0.0
  port 9880
</source>

# Throw the healthcheck to the standard output instead of forwarding it
<match fluentd.healthcheck>
  @type stdout
</match>

<filter kubernetes.var.log.containers.**.log>
  @type grep

  <regexp>
    key log
    pattern /^\[(?<logtime>[0-9T:.+-]+)\] \[(?<trace>.*?)\/(?<spanId>.*?)\] \[(?<channel>.+?)\] (?<severity>[A-Z]+): (?<message>.+)$/
  </regexp>
</filter>

<filter kubernetes.var.log.containers.**.log>
  @type parser

  key_name log

  <parse>
    @type regexp

    expression /^\[(?<logtime>[0-9T:.+-]+)\] \[(?<trace>.*?)\/(?<spanId>.*?)\] \[(?<channel>.+?)\] (?<severity>[A-Z]+): (?<message>.+)$/
    time_key logtime
    time_format %Y-%m-%dT%H:%M:%S.%L%z
  </parse>
</filter>

<match kubernetes.var.log.containers.**.log>
  @type rewrite_tag_filter

  <rule>
    key channel
    pattern /^(\w+)$/
    tag app.$1
  </rule>
</match>

<filter app.**>
  @type record_transformer

  renew_record true

  <record>
    severity ${record["severity"]}
    message ${record["message"]}
    logging.googleapis.com/trace ${record["trace"]}
    logging.googleapis.com/spanId ${record["spanId"]}
    logging.googleapis.com/labels {
      "kubernetes-host": record.dig("kubernetes", "host"),
      "kubernetes-pod-name": record.dig("kubernetes", "pod_name"),
      "kubernetes-pod-id": record.dig("kubernetes", "pod_id"),
      "kubernetes-pod-ip": record.dig("kubernetes", "pod_ip"),
      "kubernetes-container-name": record.dig("kubernetes", "container_name"),
      "kubernetes-namespace-name": record.dig("kubernetes", "namespace_name"),
      "kubernetes-namespace-id": record.dig("kubernetes", "namespace_id")
    }
  </record>
</filter>

<match app.**>
  @type google_cloud

  autoformat_stackdriver_trace true

  <inject>
    time_key time
    time_type string
    time_format %Y-%m-%dT%H:%M:%S.%NZ
  </inject>
</match>

How do I define this value properly? I haven't found any examples neither of using hash data type (with Ruby expressions as values) and usage of logging.googleapis.com/labels property in Google Cloud plugin.


Solution

  • Just tested with this (single quotes inside double quotes):

    <filter app.**>
      @type record_transformer
    
      renew_record true
    
      <record>
        severity ${record["severity"]}
        message ${record["message"]}
        logging.googleapis.com/trace ${record["trace"]}
        logging.googleapis.com/spanId ${record["spanId"]}
    
        logging.googleapis.com/labels {
          "kubernetes-host": "${record.dig('kubernetes', 'host')}",
          "kubernetes-pod-name": "${record.dig('kubernetes', 'pod_name')}",
          "kubernetes-pod-id": "${record.dig('kubernetes', 'pod_id')}",
          "kubernetes-pod-ip": "${record.dig('kubernetes', 'pod_ip')}",
          "kubernetes-container-name": "${record.dig('kubernetes', 'container_name')}",
          "kubernetes-namespace-name": "${record.dig('kubernetes', 'namespace_name')}",
          "kubernetes-namespace-id": "${record.dig('kubernetes', 'namespace_id')}"
        }
      </record>
    </filter>
    

    And, with --dry-run (with some commented config for unavailable plugins):

    $ fluentd -c ./fluent-hash-test.conf --dry-run
    2021-01-14 17:57:13 +0500 [info]: parsing config file is succeeded path="./fluent-hash-test.conf"
    ...
    2021-01-14 17:57:13 +0500 [info]: using configuration file: <ROOT>
      <source>
        @type http
        bind "0.0.0.0"
        port 9880
      </source>
      <filter app.**>
        @type record_transformer
        renew_record true
        <record>
          severity ${record["severity"]}
          message ${record["message"]}
          logging.googleapis.com/trace ${record["trace"]}
          logging.googleapis.com/spanId ${record["spanId"]}
          logging.googleapis.com/labels {"kubernetes-host":"${record.dig('kubernetes', 'host')}","kubernetes-pod-name":"${record.dig('kubernetes', 'pod_name')}","kubernetes-pod-id":"${record.dig('kubernetes', 'pod_id')}","kubernetes-pod-ip":"${record.dig('kubernetes', 'pod_ip')}","kubernetes-container-name":"${record.dig('kubernetes', 'container_name')}","kubernetes-namespace-name":"${record.dig('kubernetes', 'namespace_name')}","kubernetes-namespace-id":"${record.dig('kubernetes', 'namespace_id')}"}
        </record>
      </filter>
    </ROOT>
    2021-01-14 17:57:13 +0500 [info]: finished dry run mode