loki
loki copied to clipboard
Promtail: "Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc"
Hello!
I'm trying to install promtail with manifests and export logs from Kubernetes cluster to Loki. While I couldn't get manifest installation described in documentation going I gave helm install a try.
Helm install works fine. It labels and tags the logs files and sends them to loki.
helm upgrade --install promtail grafana/promtail -f /tmp/promtail_values.yml
Now I've tried to extract manifests from helm using:
helm upgrade --install promtail grafana/promtail -f /tmp/promtail_values.yml --dry-run
and applied them.
Promtail errors out with
msg="GET /ready (500) 35.791µs Response: \"Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc.\\n\" ws: false; Accept: */*; Connection: close; User-Agent: kube-probe/1.23
Kubernetes: v1.23.4 Helm: v3.8.0+gd141386
Manifest:
# Source: promtail/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: promtail
namespace: loki
labels:
helm.sh/chart: promtail-4.2.0
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
app.kubernetes.io/version: "2.5.0"
app.kubernetes.io/managed-by: Helm
---
# Source: promtail/templates/secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: promtail
namespace: loki
labels:
helm.sh/chart: promtail-4.2.0
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
app.kubernetes.io/version: "2.5.0"
app.kubernetes.io/managed-by: Helm
stringData:
promtail.yaml: |
server:
log_level: info
http_listen_port: 3101
clients:
- url: http://loki-loki-simple-scalable-gateway/loki/api/v1/push
positions:
filename: /run/promtail/positions.yaml
scrape_configs:
# See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference
- job_name: kubernetes-pods
pipeline_stages:
- cri: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels:
- __meta_kubernetes_pod_controller_name
regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})?
action: replace
target_label: __tmp_controller_name
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_name
- __meta_kubernetes_pod_label_app
- __tmp_controller_name
- __meta_kubernetes_pod_name
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: app
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_component
- __meta_kubernetes_pod_label_component
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: component
- action: replace
source_labels:
- __meta_kubernetes_pod_node_name
target_label: node_name
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: namespace
- action: replace
replacement: $1
separator: /
source_labels:
- namespace
- app
target_label: job
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- action: replace
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- action: replace
regex: true/(.*)
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
target_label: __path__
---
# Source: promtail/templates/clusterrole.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: promtail
labels:
helm.sh/chart: promtail-4.2.0
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
app.kubernetes.io/version: "2.5.0"
app.kubernetes.io/managed-by: Helm
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs:
- get
- watch
- list
---
# Source: promtail/templates/clusterrolebinding.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: promtail
labels:
helm.sh/chart: promtail-4.2.0
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
app.kubernetes.io/version: "2.5.0"
app.kubernetes.io/managed-by: Helm
subjects:
- kind: ServiceAccount
name: promtail
namespace: loki
roleRef:
kind: ClusterRole
name: promtail
apiGroup: rbac.authorization.k8s.io
---
# Source: promtail/templates/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: loki
labels:
helm.sh/chart: promtail-4.2.0
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
app.kubernetes.io/version: "2.5.0"
app.kubernetes.io/managed-by: Helm
spec:
selector:
matchLabels:
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
updateStrategy:
{}
template:
metadata:
labels:
app.kubernetes.io/name: promtail
app.kubernetes.io/instance: promtail
annotations:
checksum/config: e94680e64beaef2d5b2debc874a8580d23b627b7e373d2a2f207d872170d15c2
spec:
serviceAccountName: promtail
securityContext:
runAsGroup: 0
runAsUser: 0
containers:
- name: promtail
image: "docker.io/grafana/promtail:2.5.0"
imagePullPolicy: IfNotPresent
args:
- "-config.file=/etc/promtail/promtail.yaml"
volumeMounts:
- name: config
mountPath: /etc/promtail
- name: run
mountPath: /run/promtail
- mountPath: /var/lib/docker/containers
name: containers
readOnly: true
- mountPath: /var/log/pods
name: pods
readOnly: true
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
ports:
- name: http-metrics
containerPort: 3101
protocol: TCP
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
readinessProbe:
failureThreshold: 5
httpGet:
path: /ready
port: http-metrics
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
operator: Exists
volumes:
- name: config
secret:
secretName: promtail
- name: run
hostPath:
path: /run/promtail
- hostPath:
path: /var/lib/docker/containers
name: containers
- hostPath:
path: /var/log/pods
name: pods
Log:
level=warn ts=2022-04-19T08:42:35.660135972Z caller=logging.go:72 msg="GET /ready (500) 113.083µs Response: \"Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc.\\n\" ws: false; Accept: */*; Connection: close; User-Agent: kube-probe/1.23; "
level=warn ts=2022-04-19T08:42:45.660860296Z caller=logging.go:72 msg="GET /ready (500) 32.75µs Response: \"Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc.\\n\" ws: false; Accept: */*; Connection: close; User-Agent: kube-probe/1.23; "
I think if i read this correctly if you install it with helm it works fine? but when you try to extract the manifests from helm and install them it doesn't work?
If that's the case I'm not sure we can offer much support here, the helm charts are largely already community maintained and we are not really helm experts, and given that the helm chart seems to be working it seems like maybe the way you are extracting the manifests is the issue?
check if hostpath exists and the daemonset has access to those mounts. you can exec into promtail pods and try cd to /var/log/pods and see if your pod logs exist there (or other path appropriate for your k8s node setup) - if you can't, then promtail wont be able to find the logs to tail.
I'm hitting the same issue and I can list the contents from the /var/log/pods directory from the container. Going to look if I can gather more information. Interesting is that I'm seeing this only in 1 node.
$ kubectl grep pods -n observability loki -o wide
NAMESPACE NAME READY STATUS RESTART AGE IP NODENAME
observability loki-stack-loki-0 1/1 Running 0 15m 10.223.28.101 p53.tatu.home
observability loki-stack-promtail-98465 1/1 Running 0 14m 10.223.40.58 p70.tatu.home
observability loki-stack-promtail-hqdl4 0/1 Running 0 15m 10.223.28.90 p53.tatu.home
observability loki-stack-promtail-m67nb 1/1 Running 0 15m 10.223.42.255 t470n2.tatu.home
observability loki-stack-promtail-m6hn4 1/1 Running 0 16m 10.223.59.3 t470n1.tatu.home
$ kubectl images -n observability loki-stack-promtail
[Summary]: 1 namespaces, 4 pods, 4 containers and 2 different images
+----------------------------+-----------+----------------------------------+
| Pod | Container | Image |
+----------------------------+-----------+----------------------------------+
| loki-stack-promtail-98465 | promtail | docker.io/grafana/promtail:2.6.1 |
+----------------------------+ + +
| loki-stack-promtail-hqdl4 | | |
+----------------------------+ + +
| loki-stack-promtail-m67nb | | |
+----------------------------+ +----------------------------------+
| loki-stack-promtail-m6hn4 | | docker.io/grafana/promtail:2.4.2 |
+----------------------------+-----------+----------------------------------+
From the logs, I see:
loki-stack-promtail-hqdl4 promtail W1011 03:03:11.593343 1 reflector.go:324] github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:502: failed to list *v1.Pod: Get "https://10.96.0.1:443/api/v1/pods?fieldSelector=spec.nodeName%3Dp53.tatu.home&limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: i/o timeout
loki-stack-promtail-hqdl4 promtail I1011 03:03:11.593490 1 trace.go:205] Trace[815994244]: "Reflector ListAndWatch" name:github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:502 (11-Oct-2022 03:02:41.591) (total time: 30001ms):
loki-stack-promtail-hqdl4 promtail Trace[815994244]: ---"Objects listed" error:Get "https://10.96.0.1:443/api/v1/pods?fieldSelector=spec.nodeName%3Dp53.tatu.home&limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: i/o timeout 30001ms (03:03:11.593)
loki-stack-promtail-hqdl4 promtail Trace[815994244]: [30.001581055s] [30.001581055s] END
loki-stack-promtail-hqdl4 promtail E1011 03:03:11.593516 1 reflector.go:138] github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:502: Failed to watch *v1.Pod: failed to list *v1.Pod: Get "https://10.96.0.1:443/api/v1/pods?fieldSelector=spec.nodeName%3Dp53.tatu.home&limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: i/o timeout
loki-stack-promtail-hqdl4 promtail level=warn ts=2022-10-11T03:03:19.802334186Z caller=logging.go:86 msg="GET /ready (500) 96.942µs Response: \"Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc.\\n\" ws: false; Accept: */*; Connection: close; User-Agent: kube-probe/1.25; "
However from the container, I see the hostPath volumes:
$ kubectl iexec -n observability promtail /bin/bash
Namespace: observability | Pod: ✔ loki-stack-promtail-hqdl4
root@loki-stack-promtail-hqdl4:/# ls -la /var/log/pods/ | head
total 12
drwxr-xr-x 31 root root 8192 Oct 11 02:46 .
drwxr-xr-x 1 root root 18 Oct 11 02:45 ..
drwxr-xr-x 3 root root 22 Oct 7 05:40 cadvisor_cadvisor-kx7lh_f189a7d6-bb3e-4011-9899-60924f7d28b7
drwxr-xr-x 3 root root 20 Oct 5 06:16 default_iperf3-kkpmf_f8184cd5-a98b-414a-a867-0301f99f6933
drwxr-xr-x 3 root root 29 Oct 10 11:37 default_ookla-speedtest-cronjob-27756697-tdp6x_035abf20-73a6-4ee3-aea4-a83b6fe82cea
drwxr-xr-x 3 root root 29 Oct 10 12:37 default_ookla-speedtest-cronjob-27756757-xj7dc_a561193b-d7da-4928-825e-e9e1a7790607
drwxr-xr-x 3 root root 29 Oct 10 13:37 default_ookla-speedtest-cronjob-27756817-clj2c_c371b08e-dacd-4b1e-b7ed-19b49ef55438
drwxr-xr-x 3 root root 29 Oct 10 14:37 default_ookla-speedtest-cronjob-27756877-28pzh_b7efbe8e-a0f2-46ba-a9c8-c8c9d3e8ffa2
drwxr-xr-x 3 root root 29 Oct 10 15:37 default_ookla-speedtest-cronjob-27756937-cf8bw_58d9b93f-e984-419c-aa66-c33b9bd2dedf
root@loki-stack-promtail-hqdl4:/# ls -la /var/log/pods/* | wc -l
176
Interesting that I've start seeing this after migrating to Kubernetes 1.25.2. Interesting that the only node that I'm get the issue is a worker node and I can see this:
$ kubectl get pods -A -o wide -w | grep promt 9.3m Mon 10 Oct 2022 11:23:49 PM EDT
observability loki-stack-promtail-29l5j 1/1 Running 0 8m35s 10.223.59.56 t470n1.tatu.home <none> <none>
observability loki-stack-promtail-98465 1/1 Running 0 37m 10.223.40.58 p70.tatu.home <none> <none>
observability loki-stack-promtail-m67nb 1/1 Running 0 38m 10.223.42.255 t470n2.tatu.home <none> <none>
observability loki-stack-promtail-scw89 0/1 Running 0 8m35s 10.223.28.81 p53.tatu.home <none> <none>
From the logs, I can see the k8s API is returning a timeout, however, it's working from the other nodes:
loki-stack-promtail-scw89 promtail W1011 03:21:10.994690 1 reflector.go:324] github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:502: failed to list *v1.Pod: Get "https://10.96.0.1:443/api/v1/pods?fieldSelector=spec.nodeName%3Dp53.tatu.home&limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: i/o timeout
However, if I run kubectl proxy and then try that namespace it works fine:
$ kubectl proxy
$ curl 2>/dev/null "http://127.0.0.1:8001/api/v1/pods?fieldSelector=spec.nodeName%3Dp53.tatu.home&limit=500" | jq | head
{
"kind": "PodList",
"apiVersion": "v1",
"metadata": {
"resourceVersion": "119135761"
},
"items": [
{
"metadata": {
"name": "cadvisor-kx7lh",
I can also achieve the same if I try directly to the k8s API using the token (mounted at the pod) (which also explains why the other pods are working). Interesting...
To investigate the issue I've enabled the EphemeralContainers so I could attach a curl to the container to test the API version timeout. Turns out that I found an issue with calico and the k8s 1.25.2 which was fixed on the latest calico version.
Deployed the new calico version and once I rolled out the loki DS it worked like a charm. So it seems the error was being triggered but the API call for that given node.
$ kubectl get pods -A -o wide -w | grep promtai
observability loki-stack-promtail-95mmm 1/1 Running 0 28s 10.223.40.24 p70.tatu.home <none> <none>
observability loki-stack-promtail-c69vk 1/1 Running 0 28s 10.223.59.11 t470n1.tatu.home <none> <none>
observability loki-stack-promtail-cln9z 1/1 Running 0 28s 10.223.28.143 p53.tatu.home <none> <none>
observability loki-stack-promtail-qtkd6 1/1 Running 0 28s 10.223.42.193 t470n2.tatu.home <none> <none>
@jsteppe on your logs, check if you are hitting the same timeout when querying the k8s API. It might help you on this direction.
I also encountered the same problem. After updating the promtail configmap file and adding keep to capture specific pod logs, a node's promtail failed to start
same "problem". I have specified specific app labels for the collection of logs only for particular pods and pods for this app not running on all nodes. So promtail is not running on node where no pods match labels. So it is probably correct but monitoring yell at me that loki "daemonset mismatch"
I'm experiencing the same issue. Specifically, only the Promtail instance running on a node without any pods being monitored by the associated configuration is generating a warning. This behavior appears to be expected, since there are no logs to capture and thus the liveness/readiness checks fail.
yes, all loki daemonset pods should be up doesn't matter if there aren`t logs for tailing. Similar as for splunk collector, elasticsearch collectors ...
Hi, any update on this issue ? If promtail is running on a node where there isn't any log to collect, readiness probe fails. Is this behavior expected ?
I'm using it to find the logs in EKS, I hope it helps you.
- action: replace
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- action: replace
replacement: /var/log/pods/*$1/*.log
regex: true/(.*)
separator: /
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
target_label: __path__
I'm combining the labels before matching, maybe it helps you
config:
snippets:
extraRelabelConfigs:
# Combine labels in order to keep logs from NGINX Ingress and Promtail # https://github.com/grafana/loki/issues/808#issuecomment-592698307
- source_labels: [ __meta_kubernetes_pod_label_app_kubernetes_io_instance, __meta_kubernetes_pod_label_app_kubernetes_io_name ]
separator: ';'
target_label: combined_labels
- source_labels: [ combined_labels ]
action: keep
regex: alb-ingress-nginx;.*|.*;promtail
Hi everyone,
I'm facing the same issue when trying to select specific pods with pod selector via kubernetes_sd_configs.
My config is like the following:
scrape_configs:
# See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference
- job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
selectors:
- role: pod
label: "app=myapp"
relabel_configs:
- source_labels: [ __meta_kubernetes_namespace ]
regex: '[^(myapp.*)].+'
action: drop
- source_labels:
- __meta_kubernetes_pod_label_app_kubernetes_io_name
- __meta_kubernetes_pod_label_app
- __tmp_controller_name
- __meta_kubernetes_pod_name
regex: ^;*([^;]+)(;.*)?$
action: replace
target_label: app
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- action: replace
source_labels:
- __meta_kubernetes_pod_container_name
target_label: container
- action: replace
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
target_label: __path__
- action: replace
regex: true/(.*)
replacement: /var/log/pods/*$1/*.log
separator: /
source_labels:
- __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash
- __meta_kubernetes_pod_annotation_kubernetes_io_config_hash
- __meta_kubernetes_pod_container_name
target_label: __path__
pipeline_stages:
- cri: {}
- match:
selector: '{app="myapp"}'
stages:
- json:
expressions:
level: level
method: method
status_code: status_code
path: path
msg: msg
- labels:
level:
method:
status_code:
path:
msg:
│ level=warn ts=2024-05-28T12:31:13.758554373Z caller=logging.go:126 traceID=63d4bda14f0ee10b msg="GET /ready (500) 41.249µs Response: \"Not ready: Unable to find any log │
│ s to tail. Please verify permissions, volumes, scrape_config, etc.\\n\" ws: false; Accept: */*; Connection: close; User-Agent: kube-probe/1.28;
hi everyone im facing the same issue
apiVersion: v1 kind: ConfigMap metadata: name: promtail-config namespace: grafana data: promtail-config.yaml: | server: log_level: debug log_format: logfmt http_listen_port: 3101 grpc_listen_port: 0
clients:
- url: https://xxxx/loki/api/v1/push
positions:
filename: /run/promtail/positions.yaml
scrape_configs:
- job_name: kubernetes-pods
pipeline_stages:
- cri: {}
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: replace
source_labels:
- __meta_kubernetes_pod_namespace
- __meta_kubernetes_pod_name
- __meta_kubernetes_pod_uid
- __meta_kubernetes_pod_container_name
separator: "_"
target_label: __path__
replacement: "/var/log/pods/$1_$2_$3/$4/0.log"
limits_config: {}
tracing:
enabled: false
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: promtail
namespace: grafana
spec:
selector:
matchLabels:
app: promtail
template:
metadata:
labels:
app: promtail
spec:
serviceAccountName: promtail
containers:
- name: promtail
image: grafana/promtail:latest
args:
- "-config.file=/etc/promtail/promtail-config.yaml"
securityContext:
privileged: true
volumeMounts:
- name: config
mountPath: /etc/promtail
- name: varlog
mountPath: /var/log
readOnly: true
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: false
- name: positions
mountPath: /run/promtail
volumes:
- name: config
configMap:
name: promtail-config
- name: varlog
hostPath:
path: /var/log
type: Directory
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
type: Directory
- name: positions
emptyDir: {}
apiVersion: v1 kind: ServiceAccount metadata: name: promtail namespace: grafana
apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: promtail-role rules:
- apiGroups: [""]
resources:
- pods
- namespaces
- nodes
- endpoints verbs:
- get
- list
- watch
apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: promtail-role-binding subjects:
- kind: ServiceAccount
name: promtail
namespace: grafana
roleRef: kind: ClusterRole name: promtail-role apiGroup: rbac.authorization.k8s.io
Not ready: Unable to find any logs to tail. Please verify permissions, volumes, scrape_config, etc.