--- kubernetes/apps/observability/kube-prometheus-stack/app Kustomization: observability/kube-prometheus-stack HelmRelease: observability/kube-prometheus-stack
+++ kubernetes/apps/observability/kube-prometheus-stack/app Kustomization: observability/kube-prometheus-stack HelmRelease: observability/kube-prometheus-stack
@@ -75,31 +75,54 @@
- pods=[*]
- deployments=[*]
- persistentvolumeclaims=[*]
prometheus:
monitor:
enabled: true
+ interval: 60s
+ metricRelabelings:
+ - action: drop
+ regex: kube_.*_owner
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: kube_.*(annotations|labels)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: kube_.*_resource_version
+ sourceLabels:
+ - __name__
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
+ scrapeTimeout: 55s
kubeApiServer:
enabled: true
serviceMonitor:
+ interval: 60s
metricRelabelings:
- action: drop
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
sourceLabels:
- __name__
- action: drop
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
sourceLabels:
- __name__
+ - action: drop
+ regex: apiserver_request_duration_seconds_bucket;(0.001|0.002|0.004|0.008|0.016|0.032|0.064|0.128|0.256)
+ separator: ;
+ sourceLabels:
+ - __name__
+ - le
+ scrapeTimeout: 55s
kubeControllerManager:
enabled: true
endpoints:
- 192.168.69.110
kubeEtcd:
enabled: false
@@ -113,42 +136,83 @@
- 192.168.69.110
kubeStateMetrics:
enabled: true
kubelet:
enabled: true
serviceMonitor:
+ cAdvisor: true
+ cAdvisorMetricRelabelings:
+ - action: drop
+ regex: (container_tasks_state|container_memory_failures_total|container_memory_mapped_file)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (exited|dead)
+ sourceLabels:
+ - container_state
+ - action: drop
+ regex: (container_.*_duration_seconds_bucket);(0.005|0.01|0.025|0.05|0.075|2.5|7.5|15|30|45)
+ separator: ;
+ sourceLabels:
+ - __name__
+ - le
+ interval: 60s
metricRelabelings:
- action: labeldrop
regex: (uid)
- action: labeldrop
regex: (id|name)
+ - action: labeldrop
+ regex: (container_id|image_id)
- action: drop
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
sourceLabels:
- __name__
+ - action: drop
+ regex: (container_memory_failures_total|container_memory_usage_bytes|container_network_tcp_usage_total)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (container_cpu_cfs_throttled_periods_total|container_cpu_cfs_throttled_seconds_total|container_cpu_cfs_periods_total|container_cpu_usage_seconds_total|container_fs_io_time_seconds_total|container_fs_io_time_weighted_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_last_seen|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (kubelet_runtime_operations_duration_seconds_bucket|kubelet_runtime_operations_errors_total)
+ sourceLabels:
+ - __name__
+ scrapeTimeout: 55s
nodeExporter:
enabled: true
prometheus:
prometheusSpec:
+ enableAdminAPI: true
+ enforcedLabelLimit: 50
+ enforcedLabelNameLengthLimit: 256
+ enforcedLabelValueLengthLimit: 2048
+ enforcedSampleLimit: 500000
+ enforcedTargetLimit: 500
+ evaluationInterval: 60s
externalUrl: https://prometheus.juno.moe
image:
registry: docker.io
repository: prompp/prompp
tag: 0.6.4
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
+ queryLogFile: /prometheus/query.log
replicas: 1
resources:
limits:
memory: 4Gi
requests:
- cpu: 100m
+ cpu: 500m
retention: 8d
retentionSize: 23GB
ruleSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
+ scrapeInterval: 60s
securityContext:
fsGroup: 64535
runAsGroup: 64535
runAsNonRoot: true
runAsUser: 64535
serviceMonitorSelectorNilUsesHelmValues: false
@@ -157,12 +221,13 @@
spec:
resources:
requests:
storage: 25Gi
storageClassName: ceph-block
version: v2.55.1
+ walCompression: true
route:
main:
enabled: true
hostnames:
- prometheus.juno.moe
parentRefs:
@@ -175,14 +240,29 @@
enabled: false
prometheus-node-exporter:
fullnameOverride: node-exporter
prometheus:
monitor:
enabled: true
+ interval: 60s
+ metricRelabelings:
+ - action: drop
+ regex: node_filesystem_.*;/var/lib/kubelet/pods/.*
+ separator: ;
+ sourceLabels:
+ - __name__
+ - mountpoint
+ - action: drop
+ regex: node_network_.*;(veth.*|br.*|cilium.*|lxc.*)
+ separator: ;
+ sourceLabels:
+ - __name__
+ - device
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
+ scrapeTimeout: 50s
--- HelmRelease: observability/kube-prometheus-stack Prometheus: observability/kube-prometheus-stack
+++ HelmRelease: observability/kube-prometheus-stack Prometheus: observability/kube-prometheus-stack
@@ -28,18 +28,20 @@
replicas: 1
shards: 1
logLevel: info
logFormat: logfmt
listenLocal: false
enableOTLPReceiver: false
- enableAdminAPI: false
+ enableAdminAPI: true
+ scrapeInterval: 60s
+ evaluationInterval: 60s
resources:
limits:
memory: 4Gi
requests:
- cpu: 100m
+ cpu: 500m
retention: 8d
retentionSize: 23GB
tsdb:
outOfOrderTimeWindow: 0s
walCompression: true
routePrefix: /
@@ -82,8 +84,14 @@
- prometheus
- key: app.kubernetes.io/instance
operator: In
values:
- kube-prometheus-stack
portName: http-web
+ queryLogFile: /prometheus/query.log
+ enforcedSampleLimit: 500000
+ enforcedTargetLimit: 500
+ enforcedLabelLimit: 50
+ enforcedLabelNameLengthLimit: 256
+ enforcedLabelValueLengthLimit: 2048
hostNetwork: false
--- HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-state-metrics
+++ HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-state-metrics
@@ -16,13 +16,28 @@
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/instance: kube-prometheus-stack
endpoints:
- port: http
+ interval: 60s
+ scrapeTimeout: 55s
honorLabels: true
+ metricRelabelings:
+ - action: drop
+ regex: kube_.*_owner
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: kube_.*(annotations|labels)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: kube_.*_resource_version
+ sourceLabels:
+ - __name__
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
--- HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/node-exporter
+++ HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/node-exporter
@@ -19,14 +19,29 @@
app.kubernetes.io/instance: kube-prometheus-stack
attachMetadata:
node: false
endpoints:
- port: http-metrics
scheme: http
+ interval: 60s
+ scrapeTimeout: 50s
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: kubernetes_node
+ metricRelabelings:
+ - action: drop
+ regex: node_filesystem_.*;/var/lib/kubelet/pods/.*
+ separator: ;
+ sourceLabels:
+ - __name__
+ - mountpoint
+ - action: drop
+ regex: node_network_.*;(veth.*|br.*|cilium.*|lxc.*)
+ separator: ;
+ sourceLabels:
+ - __name__
+ - device
--- HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-prometheus-stack-apiserver
+++ HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-prometheus-stack-apiserver
@@ -11,23 +11,30 @@
app.kubernetes.io/part-of: kube-prometheus-stack
release: kube-prometheus-stack
heritage: Helm
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+ interval: 60s
port: https
scheme: https
metricRelabelings:
- action: drop
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
sourceLabels:
- __name__
- action: drop
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
sourceLabels:
- __name__
+ - action: drop
+ regex: apiserver_request_duration_seconds_bucket;(0.001|0.002|0.004|0.008|0.016|0.032|0.064|0.128|0.256)
+ separator: ;
+ sourceLabels:
+ - __name__
+ - le
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
serverName: kubernetes
insecureSkipVerify: false
jobLabel: component
namespaceSelector:
--- HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-prometheus-stack-kubelet
+++ HelmRelease: observability/kube-prometheus-stack ServiceMonitor: observability/kube-prometheus-stack-kubelet
@@ -22,87 +22,85 @@
matchLabels:
app.kubernetes.io/name: kubelet
k8s-app: kubelet
endpoints:
- port: https-metrics
scheme: https
+ interval: 60s
+ scrapeTimeout: 55s
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
honorTimestamps: true
metricRelabelings:
- action: labeldrop
regex: (uid)
- action: labeldrop
regex: (id|name)
+ - action: labeldrop
+ regex: (container_id|image_id)
- action: drop
regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (container_memory_failures_total|container_memory_usage_bytes|container_network_tcp_usage_total)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (container_cpu_cfs_throttled_periods_total|container_cpu_cfs_throttled_seconds_total|container_cpu_cfs_periods_total|container_cpu_usage_seconds_total|container_fs_io_time_seconds_total|container_fs_io_time_weighted_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_last_seen|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total)
+ sourceLabels:
+ - __name__
+ - action: drop
+ regex: (kubelet_runtime_operations_duration_seconds_bucket|kubelet_runtime_operations_errors_total)
sourceLabels:
- __name__
relabelings:
- action: replace
sourceLabels:
- __metrics_path__
targetLabel: metrics_path
- port: https-metrics
scheme: https
path: /metrics/cadvisor
- interval: 10s
+ interval: 60s
+ scrapeTimeout: 55s
honorLabels: true
honorTimestamps: true
trackTimestampsStaleness: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
metricRelabelings:
- action: drop
- regex: container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)
+ regex: (container_tasks_state|container_memory_failures_total|container_memory_mapped_file)
sourceLabels:
- __name__
- action: drop
- regex: container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)
+ regex: (exited|dead)
+ sourceLabels:
+ - container_state
+ - action: drop
+ regex: (container_.*_duration_seconds_bucket);(0.005|0.01|0.025|0.05|0.075|2.5|7.5|15|30|45)
+ separator: ;
sourceLabels:
- __name__
- - action: drop
- regex: container_memory_(mapped_file|swap)
- sourceLabels:
- - __name__
- - action: drop
- regex: container_(file_descriptors|tasks_state|threads_max)
- sourceLabels:
- - __name__
- - action: drop
- regex: container_memory_failures_total;hierarchy
- sourceLabels:
- - __name__
- - scope
- - action: drop
- regex: container_network_.*;(cali|cilium|cni|lxc|nodelocaldns|tunl).*
- sourceLabels:
- - __name__
- - interface
- - action: drop
- regex: container_spec.*
- sourceLabels:
- - __name__
- - action: drop
- regex: .+;
- sourceLabels:
- - id
- - pod
+ - le
relabelings:
- action: replace
sourceLabels:
- __metrics_path__
targetLabel: metrics_path
- port: https-metrics
scheme: https
path: /metrics/probes
+ interval: 60s
+ scrapeTimeout: 55s
honorLabels: true
honorTimestamps: true
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token