helm-charts
helm-charts copied to clipboard
loki-ingester load balancing problem.
Hi, I'm beginner to loki, and I need some help.
before you reading this article, I am sorry that I'm not good at english.
I am running loki-distributed on EKS.
chart version : 0.43.0 loki version : 4.2.4
Log flow is : logstash(4 node) is consuming log from kafka and push the log to Loki's domain. loki domain is connected from AWS ALB ingress
I have 4 ingester pod, and ring status is normal.
I thought distributor do load balancing for 4 ingester.
but only 2 ingester was used. and they take too much memory, and oom killed and relaunch. It is never ending..
why 2 ingester is idle and never use??
my configuration something wrong?
this is my configuration. could you help me please??
config: |
auth_enabled: false
server:
http_listen_port: 3100
grpc_server_min_time_between_pings: 10s
grpc_server_ping_without_stream_allowed: true
grpc_server_max_recv_msg_size: 104857600
grpc_server_max_send_msg_size: 104857600
distributor:
ring:
kvstore:
store: memberlist
heartbeat_timeout: 30s
memberlist:
join_members:
- loki-memberlist
ingester:
lifecycler:
join_after: 0s
ring:
kvstore:
store: memberlist
replication_factor: 1
chunk_idle_period: 1h
chunk_target_size: 1536000
chunk_block_size: 262144
chunk_encoding: snappy
chunk_retain_period: 1m
max_transfer_retries: 0
autoforget_unhealthy: false
wal:
dir: /var/loki/wal
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_cache_freshness_per_query: 10m
max_streams_per_user: 0
max_query_length: 720h
max_query_parallelism: 24
max_entries_limit_per_query: 10000
ingestion_burst_size_mb: 32
ingestion_rate_mb: 16
cardinality_limit: 1000000
schema_config:
configs:
- from: "2021-12-24"
store: aws
object_store: s3
schema: v11
index:
prefix: {{ index_name }}
period: 720h
storage_config:
aws:
s3: s3://ap-northeast-2/{{ bucket_name }}
dynamodb:
dynamodb_url: dynamodb://ap-northeast-2
http_config:
response_header_timeout: 5s
boltdb_shipper:
shared_store: s3
active_index_directory: /var/loki/index
cache_location: /var/loki/cache
cache_ttl: 168h
index_gateway_client:
server_address: dns://loki-index-gateway:9095
index_cache_validity: 168h
index_queries_cache_config:
enable_fifocache: true
default_validity: 168h
fifocache:
validity: 168h
chunk_store_config:
max_look_back_period : 0s
chunk_cache_config:
enable_fifocache: true
default_validity: 168h
fifocache:
validity: 168h
table_manager:
retention_deletes_enabled: false
throughput_updates_disabled: false
retention_period: 0
chunk_tables_provisioning:
enable_ondemand_throughput_mode: true
enable_inactive_throughput_on_demand_mode: true
provisioned_write_throughput: 0
provisioned_read_throughput: 0
inactive_write_throughput: 0
inactive_read_throughput: 0
index_tables_provisioning:
enable_ondemand_throughput_mode: true
enable_inactive_throughput_on_demand_mode: true
provisioned_write_throughput: 0
provisioned_read_throughput: 0
inactive_write_throughput: 0
inactive_read_throughput: 0
querier:
query_timeout: 5m
query_ingesters_within: 1h
engine:
timeout: 5m
query_range:
align_queries_with_step: true
max_retries: 5
split_queries_by_interval: 10m
cache_results: true
align_queries_with_step: true
parallelise_shardable_queries: true
results_cache:
cache:
enable_fifocache: true
default_validity: 168h
fifocache:
validity: 168h
frontend_worker:
frontend_address: loki-query-frontend:9095
#scheduler_address: loki-scheduler:9095
grpc_client_config:
max_recv_msg_size: 104857600
max_send_msg_size: 104857600
match_max_concurrent: false
parallelism: 8
frontend:
log_queries_longer_than: 1m
compress_responses: true
tail_proxy_url: http://loki-querier:3100
#scheduler_address: loki-scheduler:9095
compactor:
shared_store: filesystem
ruler:
enable_api: true
storage:
type: s3
s3:
s3: s3://ap-northeast-2/{{ rule-bucket-name }}
rule_path: /tmp/loki/scratch
alertmanager_url: http://alertmanager:9093
Did anyone found a solution for this problem?
Hi zerofive83. How did you set the ALB ingress? Are you using the gateway component from the loki-distributed chart? Could you share more details about this configuration?
I also observed the same issue.
loki: 2.8.2
chart: 0.69.16
gateway:
worker_processes 5; ## Default: 1
error_log /dev/stderr;
pid /tmp/nginx.pid;
worker_rlimit_nofile 8192;
events {
worker_connections 4096; ## Default: 1024
}
http {
client_body_temp_path /tmp/client_temp;
proxy_temp_path /tmp/proxy_temp_path;
fastcgi_temp_path /tmp/fastcgi_temp;
uwsgi_temp_path /tmp/uwsgi_temp;
scgi_temp_path /tmp/scgi_temp;
proxy_http_version 1.1;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] $status '
'"$request" $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /dev/stderr main;
sendfile on;
tcp_nopush on;
resolver kube-dns.kube-system.svc.cluster.local;
server {
listen 8080;
location = / {
return 200 'OK';
auth_basic off;
access_log off;
}
location = /api/prom/push {
set $api_prom_push_backend http://loki-distributed-distributor.monitoring.svc.cluster.local;
proxy_pass $api_prom_push_backend:3100$request_uri;
proxy_http_version 1.1;
}
location = /api/prom/tail {
set $api_prom_tail_backend http://loki-distributed-querier.monitoring.svc.cluster.local;
proxy_pass $api_prom_tail_backend:3100$request_uri;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_http_version 1.1;
}
# Ruler
location ~ /prometheus/api/v1/alerts.* {
proxy_pass http://loki-distributed-ruler.monitoring.svc.cluster.local:3100$request_uri;
}
location ~ /prometheus/api/v1/rules.* {
proxy_pass http://loki-distributed-ruler.monitoring.svc.cluster.local:3100$request_uri;
}
location ~ /api/prom/rules.* {
proxy_pass http://loki-distributed-ruler.monitoring.svc.cluster.local:3100$request_uri;
}
location ~ /api/prom/alerts.* {
proxy_pass http://loki-distributed-ruler.monitoring.svc.cluster.local:3100$request_uri;
}
location ~ /api/prom/.* {
set $api_prom_backend http://loki-distributed-query-frontend-headless.monitoring.svc.cluster.local;
proxy_pass $api_prom_backend:3100$request_uri;
proxy_http_version 1.1;
}
location = /loki/api/v1/push {
set $loki_api_v1_push_backend http://loki-distributed-distributor.monitoring.svc.cluster.local;
proxy_pass $loki_api_v1_push_backend:3100$request_uri;
proxy_http_version 1.1;
}
location = /loki/api/v1/tail {
set $loki_api_v1_tail_backend http://loki-distributed-querier.monitoring.svc.cluster.local;
proxy_pass $loki_api_v1_tail_backend:3100$request_uri;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_http_version 1.1;
}
location ~ /loki/api/.* {
set $loki_api_backend http://loki-distributed-query-frontend-headless.monitoring.svc.cluster.local;
proxy_pass $loki_api_backend:3100$request_uri;
proxy_http_version 1.1;
}
}
}
anyone found a solution to solve the problem?