Search code examples
kubernetesprometheus

there are no state metrics namespace_workload_pod and workload_type


there are no state metrics namespace_workload_pod and workload_type setting up load monitoring on a kubernetes cluster, using prometheus(vmagent) how do I get such metrics? who gives them away?


Solution

  • with Victoria metrics, I haven't finished the scheme yet, prometheus is also suitable for a stage cluster.

    I took the prometheus chart https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus

    and add to values and everything worked out

    recording_rules.yml:
     groups:
          - name: k8s.rules
            rules:
            - expr: |
                sum by (cluster, namespace, pod, container) (
                  rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
                ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
                  1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
                )
              record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
            - expr: |
                container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
                * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
                  max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
                )
              record: node_namespace_pod_container:container_memory_working_set_bytes
            - expr: |
                container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
                * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
                  max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
                )
              record: node_namespace_pod_container:container_memory_rss
            - expr: |
                container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
                * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
                  max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
                )
              record: node_namespace_pod_container:container_memory_cache
            - expr: |
                container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
                * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
                  max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
                )
              record: node_namespace_pod_container:container_memory_swap
            - expr: |
                kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"}  * on (namespace, pod, cluster)
                group_left() max by (namespace, pod, cluster) (
                  (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
                )
              record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
            - expr: |
                sum by (namespace, cluster) (
                    sum by (namespace, pod, cluster) (
                        max by (namespace, pod, container, cluster) (
                          kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"}
                        ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
                          kube_pod_status_phase{phase=~"Pending|Running"} == 1
                        )
                    )
                )
              record: namespace_memory:kube_pod_container_resource_requests:sum
            - expr: |
                kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"}  * on (namespace, pod, cluster)
                group_left() max by (namespace, pod, cluster) (
                  (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
                )
              record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
            - expr: |
                sum by (namespace, cluster) (
                    sum by (namespace, pod, cluster) (
                        max by (namespace, pod, container, cluster) (
                          kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"}
                        ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
                          kube_pod_status_phase{phase=~"Pending|Running"} == 1
                        )
                    )
                )
              record: namespace_cpu:kube_pod_container_resource_requests:sum
            - expr: |
                kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}  * on (namespace, pod, cluster)
                group_left() max by (namespace, pod, cluster) (
                  (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
                )
              record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
            - expr: |
                sum by (namespace, cluster) (
                    sum by (namespace, pod, cluster) (
                        max by (namespace, pod, container, cluster) (
                          kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
                        ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
                          kube_pod_status_phase{phase=~"Pending|Running"} == 1
                        )
                    )
                )
              record: namespace_memory:kube_pod_container_resource_limits:sum
            - expr: |
                kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}  * on (namespace, pod, cluster)
                group_left() max by (namespace, pod, cluster) (
                 (kube_pod_status_phase{phase=~"Pending|Running"} == 1)
                 )
              record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
            - expr: |
                sum by (namespace, cluster) (
                    sum by (namespace, pod, cluster) (
                        max by (namespace, pod, container, cluster) (
                          kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
                        ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
                          kube_pod_status_phase{phase=~"Pending|Runningstage.secr.mts-corp.ru"} == 1
                        )
                    )
                )
              record: namespace_cpu:kube_pod_container_resource_limits:sum
            - expr: |
                max by (cluster, namespace, workload, pod) (
                  label_replace(
                    label_replace(
                      kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
                      "replicaset", "$1", "owner_name", "(.*)"
                    ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
                      1, max by (replicaset, namespace, owner_name) (
                        kube_replicaset_owner{job="kube-state-metrics"}
                      )
                    ),
                    "workload", "$1", "owner_name", "(.*)"
                  )
                )
              labels:
                workload_type: deployment
              record: namespace_workload_pod:kube_pod_owner:relabel
            - expr: |
                max by (cluster, namespace, workload, pod) (
                  label_replace(
                    kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
                    "workload", "$1", "owner_name", "(.*)"
                  )
                )
              labels:
                workload_type: daemonset
              record: namespace_workload_pod:kube_pod_owner:relabel
            - expr: |
                max by (cluster, namespace, workload, pod) (
                  label_replace(
                    kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
                    "workload", "$1", "owner_name", "(.*)"
                  )
                )
              labels:
                workload_type: statefulset
              record: namespace_workload_pod:kube_pod_owner:relabel
            - expr: |
                max by (cluster, namespace, workload, pod) (
                  label_replace(
                    kube_pod_owner{job="kube-state-metrics", owner_kind="Job"},
                    "workload", "$1", "owner_name", "(.*)"
                  )
                )
              labels:
                workload_type: job
              record: namespace_workload_pod:kube_pod_owner:relabel