i'm trying to get metrics from spring boot application inside my prometheus operator: eks: ver. 1.18 kube-prometheus-stack: version: 12.12.1 appVersion: 0.44.0
i checked and the application is indeed pulling out the metrics via endpoint:
http://myloadbalancer/internal-gateway/actuator/prometheus
# HELP system_cpu_usage The "recent cpu usage" for the whole system
# TYPE system_cpu_usage gauge
system_cpu_usage 0.013852972596312008
# HELP process_cpu_usage The "recent cpu usage" for the Java Virtual Machine process
# TYPE process_cpu_usage gauge
process_cpu_usage 0.0
# HELP jvm_gc_pause_seconds Time spent in GC pause
# TYPE jvm_gc_pause_seconds summary
jvm_gc_pause_seconds_count{action="end of major GC",cause="Allocation Failure",} 4.0
jvm_gc_pause_seconds_sum{action="end of major GC",cause="Allocation Failure",} 0.922
jvm_gc_pause_seconds_count{action="end of minor GC",cause="Allocation Failure",} 235.0
jvm_gc_pause_seconds_sum{action="end of minor GC",cause="Allocation Failure",} 2.584
# HELP jvm_gc_pause_seconds_max Time spent in GC pause
# TYPE jvm_gc_pause_seconds_max gauge
jvm_gc_pause_seconds_max{action="end of major GC",cause="Allocation Failure",} 0.0
jvm_gc_pause_seconds_max{action="end of minor GC",cause="Allocation Failure",} 0.0
# HELP jvm_gc_memory_allocated_bytes_total Incremented for an increase in the size of the young generation memory pool after one GC to before the next
# TYPE jvm_gc_memory_allocated_bytes_total counter
jvm_gc_memory_allocated_bytes_total 8.888016704E9
# HELP tomcat_sessions_active_current_sessions
# TYPE tomcat_sessions_active_current_sessions gauge
tomcat_sessions_active_current_sessions 0.0
# HELP tomcat_sessions_alive_max_seconds
# TYPE tomcat_sessions_alive_max_seconds gauge
tomcat_sessions_alive_max_seconds 0.0
# HELP jvm_gc_memory_promoted_bytes_total Count of positive increases in the size of the old generation memory pool before GC to after GC
# TYPE jvm_gc_memory_promoted_bytes_total counter
jvm_gc_memory_promoted_bytes_total 1.13497864E8
# HELP jvm_buffer_memory_used_bytes An estimate of the memory that the Java virtual machine is using for this buffer pool
# TYPE jvm_buffer_memory_used_bytes gauge
jvm_buffer_memory_used_bytes{id="mapped",} 0.0
jvm_buffer_memory_used_bytes{id="direct",} 509649.0
# HELP system_cpu_count The number of processors available to the Java virtual machine
# TYPE system_cpu_count gauge
system_cpu_count 1.0
# HELP tomcat_sessions_created_sessions_total
# TYPE tomcat_sessions_created_sessions_total counter
tomcat_sessions_created_sessions_total 0.0
# HELP jvm_gc_live_data_size_bytes Size of old generation memory pool after a full GC
# TYPE jvm_gc_live_data_size_bytes gauge
jvm_gc_live_data_size_bytes 8.5375192E7
# HELP jvm_classes_unloaded_classes_total The total number of classes unloaded since the Java virtual machine has started execution
# TYPE jvm_classes_unloaded_classes_total counter
jvm_classes_unloaded_classes_total 199.0
# HELP tomcat_sessions_active_max_sessions
# TYPE tomcat_sessions_active_max_sessions gauge
tomcat_sessions_active_max_sessions 0.0
# HELP process_files_open_files The open file descriptor count
# TYPE process_files_open_files gauge
process_files_open_files 66.0
# HELP logback_events_total Number of error level events that made it to the logs
# TYPE logback_events_total counter
logback_events_total{level="warn",} 2.0
logback_events_total{level="debug",} 0.0
logback_events_total{level="error",} 0.0
logback_events_total{level="trace",} 0.0
logback_events_total{level="info",} 443.0
# HELP jvm_gc_max_data_size_bytes Max size of old generation memory pool
# TYPE jvm_gc_max_data_size_bytes gauge
jvm_gc_max_data_size_bytes 5.36870912E8
# HELP jvm_buffer_count_buffers An estimate of the number of buffers in the pool
# TYPE jvm_buffer_count_buffers gauge
jvm_buffer_count_buffers{id="mapped",} 0.0
jvm_buffer_count_buffers{id="direct",} 18.0
# HELP jvm_buffer_total_capacity_bytes An estimate of the total capacity of the buffers in this pool
# TYPE jvm_buffer_total_capacity_bytes gauge
jvm_buffer_total_capacity_bytes{id="mapped",} 0.0
jvm_buffer_total_capacity_bytes{id="direct",} 509649.0
# HELP jvm_memory_committed_bytes The amount of memory in bytes that is committed for the Java virtual machine to use
# TYPE jvm_memory_committed_bytes gauge
jvm_memory_committed_bytes{area="heap",id="Tenured Gen",} 1.4229504E8
jvm_memory_committed_bytes{area="nonheap",id="CodeHeap 'profiled nmethods'",} 2.9229056E7
jvm_memory_committed_bytes{area="heap",id="Eden Space",} 5.7081856E7
jvm_memory_committed_bytes{area="nonheap",id="Metaspace",} 1.01359616E8
jvm_memory_committed_bytes{area="nonheap",id="CodeHeap 'non-nmethods'",} 2555904.0
jvm_memory_committed_bytes{area="heap",id="Survivor Space",} 7077888.0
jvm_memory_committed_bytes{area="nonheap",id="Compressed Class Space",} 1.31072E7
jvm_memory_committed_bytes{area="nonheap",id="CodeHeap 'non-profiled nmethods'",} 1.1599872E7
# HELP spring_kafka_listener_seconds_max Kafka Listener Timer
# TYPE spring_kafka_listener_seconds_max gauge
spring_kafka_listener_seconds_max{exception="ListenerExecutionFailedException",name="fgMessageConsumer-0",result="failure",} 0.0
spring_kafka_listener_seconds_max{exception="none",name="fgMessageConsumer-0",result="success",} 0.0
# HELP spring_kafka_listener_seconds Kafka Listener Timer
# TYPE spring_kafka_listener_seconds summary
spring_kafka_listener_seconds_count{exception="ListenerExecutionFailedException",name="fgMessageConsumer-0",result="failure",} 0.0
spring_kafka_listener_seconds_sum{exception="ListenerExecutionFailedException",name="fgMessageConsumer-0",result="failure",} 0.0
spring_kafka_listener_seconds_count{exception="none",name="fgMessageConsumer-0",result="success",} 9.0
spring_kafka_listener_seconds_sum{exception="none",name="fgMessageConsumer-0",result="success",} 16.017111464
# HELP jvm_memory_max_bytes The maximum amount of memory in bytes that can be used for memory management
# TYPE jvm_memory_max_bytes gauge
jvm_memory_max_bytes{area="heap",id="Tenured Gen",} 5.36870912E8
jvm_memory_max_bytes{area="nonheap",id="CodeHeap 'profiled nmethods'",} 1.22912768E8
jvm_memory_max_bytes{area="heap",id="Eden Space",} 2.14827008E8
jvm_memory_max_bytes{area="nonheap",id="Metaspace",} -1.0
jvm_memory_max_bytes{area="nonheap",id="CodeHeap 'non-nmethods'",} 5828608.0
jvm_memory_max_bytes{area="heap",id="Survivor Space",} 2.6804224E7
jvm_memory_max_bytes{area="nonheap",id="Compressed Class Space",} 1.073741824E9
jvm_memory_max_bytes{area="nonheap",id="CodeHeap 'non-profiled nmethods'",} 1.22916864E8
# HELP jvm_memory_used_bytes The amount of used memory
# TYPE jvm_memory_used_bytes gauge
jvm_memory_used_bytes{area="heap",id="Tenured Gen",} 8.6654784E7
jvm_memory_used_bytes{area="nonheap",id="CodeHeap 'profiled nmethods'",} 2.382144E7
jvm_memory_used_bytes{area="heap",id="Eden Space",} 7444976.0
jvm_memory_used_bytes{area="nonheap",id="Metaspace",} 9.7431448E7
jvm_memory_used_bytes{area="nonheap",id="CodeHeap 'non-nmethods'",} 1346432.0
jvm_memory_used_bytes{area="heap",id="Survivor Space",} 571600.0
jvm_memory_used_bytes{area="nonheap",id="Compressed Class Space",} 1.1687056E7
jvm_memory_used_bytes{area="nonheap",id="CodeHeap 'non-profiled nmethods'",} 1.1500544E7
# HELP jvm_classes_loaded_classes The number of classes that are currently loaded in the Java virtual machine
# TYPE jvm_classes_loaded_classes gauge
jvm_classes_loaded_classes 16917.0
# HELP tomcat_sessions_rejected_sessions_total
# TYPE tomcat_sessions_rejected_sessions_total counter
tomcat_sessions_rejected_sessions_total 0.0
# HELP process_start_time_seconds Start time of the process since unix epoch.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.616689221264E9
# HELP jvm_threads_peak_threads The peak live thread count since the Java virtual machine started or peak was reset
# TYPE jvm_threads_peak_threads gauge
jvm_threads_peak_threads 37.0
# HELP jvm_threads_live_threads The current number of live threads including both daemon and non-daemon threads
# TYPE jvm_threads_live_threads gauge
jvm_threads_live_threads 36.0
# HELP system_load_average_1m The sum of the number of runnable entities queued to available processors and the number of runnable entities running on the available processors averaged over a period of time
# TYPE system_load_average_1m gauge
system_load_average_1m 0.0
# HELP jvm_threads_daemon_threads The current number of live daemon threads
# TYPE jvm_threads_daemon_threads gauge
jvm_threads_daemon_threads 30.0
# HELP tomcat_sessions_expired_sessions_total
# TYPE tomcat_sessions_expired_sessions_total counter
tomcat_sessions_expired_sessions_total 0.0
# HELP jvm_threads_states_threads The current number of threads having NEW state
# TYPE jvm_threads_states_threads gauge
jvm_threads_states_threads{state="runnable",} 10.0
jvm_threads_states_threads{state="blocked",} 0.0
jvm_threads_states_threads{state="waiting",} 17.0
jvm_threads_states_threads{state="timed-waiting",} 9.0
jvm_threads_states_threads{state="new",} 0.0
jvm_threads_states_threads{state="terminated",} 0.0
# HELP process_uptime_seconds The uptime of the Java virtual machine
# TYPE process_uptime_seconds gauge
process_uptime_seconds 45380.981
# HELP http_server_requests_seconds
# TYPE http_server_requests_seconds summary
http_server_requests_seconds_count{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/actuator/health",} 6032.0
http_server_requests_seconds_sum{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/actuator/health",} 5.492759869
# HELP http_server_requests_seconds_max
# TYPE http_server_requests_seconds_max gauge
http_server_requests_seconds_max{exception="None",method="GET",outcome="SUCCESS",status="200",uri="/actuator/health",} 7.97605E-4
# HELP process_files_max_files The maximum file descriptor count
# TYPE process_files_max_files gauge
process_files_max_files 1048576.0
so its all good from this end.
this is my ServiceMonitor:
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: internal-gateway-service-monitor
labels:
release: kube-prometheus-stack
spec:
selector:
matchLabels:
app: internal-gateway
endpoints:
- port: http
path: '/actuator/prometheus'
interval: 10s
honorLabels: true
this is my service:
apiVersion: v1
kind: Service
metadata:
annotations:
meta.helm.sh/release-name: perf4-backend
meta.helm.sh/release-namespace: perf4
creationTimestamp: "2021-03-23T13:00:47Z"
labels:
app.kubernetes.io/managed-by: Helm
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
.: {}
f:meta.helm.sh/release-name: {}
f:meta.helm.sh/release-namespace: {}
f:labels:
.: {}
f:app.kubernetes.io/managed-by: {}
f:spec:
f:externalTrafficPolicy: {}
f:ports:
.: {}
k:{"port":80,"protocol":"TCP"}:
.: {}
f:name: {}
f:port: {}
f:protocol: {}
f:targetPort: {}
f:selector:
.: {}
f:app: {}
f:sessionAffinity: {}
f:type: {}
manager: Go-http-client
operation: Update
time: "2021-03-23T13:00:47Z"
name: internal-gateway
namespace: perf4
resourceVersion: "18659"
selfLink: /api/v1/namespaces/perf4/services/internal-gateway
uid: 75f89f23-d76e-4701-80f9-a029ce0f1153
spec:
clusterIP: 172.20.105.66
externalTrafficPolicy: Cluster
ports:
- name: http
nodePort: 31500
port: 80
protocol: TCP
targetPort: 8070
selector:
app: internal-gateway
sessionAffinity: None
type: NodePort
status:
loadBalancer: {}
this is my pod yaml: (removed unnecessary fields)
apiVersion: v1
kind: Pod
metadata:
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
kubernetes.io/psp: eks.privileged
generateName: fg-internal-gateway-deployment-76cd98ccd8-
labels:
app: internal-gateway
pod-template-hash: 76cd98ccd8
version: "92095"
name: fg-internal-gateway-deployment-76cd98ccd8-ksmgt
namespace: perf4
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: ReplicaSet
name: fg-internal-gateway-deployment-76cd98ccd8
uid: 69301225-d013-47e4-a126-b525f39ce608
resourceVersion: "801092"
selfLink: /api/v1/namespaces/perf4/pods/fg-internal-gateway-deployment-76cd98ccd8-ksmgt
uid: 5fedee50-b572-4949-8055-9e58a7053b6a
image:
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
httpGet:
path: /actuator/health
port: 8070
scheme: HTTP
initialDelaySeconds: 140
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
name: internal-gateway
ports:
- containerPort: 8070
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /actuator/health
port: 8070
scheme: HTTP
initialDelaySeconds: 140
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "1"
memory: 3Gi
requests:
cpu: "1"
memory: 3Gi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: default-token-vcnjm
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
nodeName:
nodeSelector:
role: fgworkers
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- key: gated
operator: Equal
value: "true"
- key: preprod
operator: Equal
value: "true"
- key: staging
operator: Equal
value: "true"
- key: fgworkers
operator: Equal
value: "true"
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- name: default-token-vcnjm
secret:
defaultMode: 420
secretName: default-token-vcnjm
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2021-03-25T14:42:35Z"
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: "2021-03-25T14:45:14Z"
status: "True"
type: Ready
- lastProbeTime: null
lastTransitionTime: "2021-03-25T14:45:14Z"
status: "True"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: "2021-03-25T14:42:35Z"
status: "True"
type: PodScheduled
containerStatuses:
- containerID:
image:
imageID:
lastState: {}
name: internal-gateway
ready: true
restartCount: 0
started: true
state:
running:
startedAt: "2021-03-25T14:42:41Z"
hostIP:
phase: Running
podIP:
podIPs:
- ip:
qosClass: Guaranteed
startTime: "2021-03-25T14:42:35Z"
and i used the label app: internal-gateway same as my pod spec.
this is what i'm getting in prometheus:
what can be the issue?
The problem is the servicemonitor can't find your service
the problem is your selector in the servicemonitor definition is not selecting the label of the service
solution: change the label of the service definition to be the same as the matchLabeles definition of your servicemonitor like that:
apiVersion: v1
kind: Service
metadata:
annotations:
meta.helm.sh/release-name: perf4-backend
meta.helm.sh/release-namespace: perf4
creationTimestamp: "2021-03-23T13:00:47Z"
labels:
app: internal-gateway