helm-charts
helm-charts copied to clipboard
[loki-distributed] fix - add support for autoscaling ingester
I believe if you set replication_factor: 3, you can autoscale the ingester up and down. memory load seems to be evenly distributed to all of my replicas with a max of 6. mostly copied from https://github.com/grafana/helm-charts/pull/667
@joshuasimon-taulia Just to double check. You run your ingester with the deployment kind? Or with statefulset?
i'm currently running my ingester as a statefulset, but the included vpa logic will work with a deployment. is it problematic to autoscale an ingester deployed as a sts? if so, I can modify the logic accordingly. this is my (helmfile-templated) loki/helm chart config
ingester:
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 10
targetCPUUtilizationPercentage: null
# this service seems memory-bound
targetMemoryUtilizationPercentage: 60
resources:
requests:
cpu: 25m
memory: 512Mi
limits:
cpu: 100m
memory: 896Mi
loki:
# -- Check https://grafana.com/docs/loki/latest/configuration/#schema_config for more info on how to configure schemas
schemaConfig:
configs:
- from: 2020-09-07
store: boltdb-shipper
object_store: gcs
schema: v11
index:
prefix: loki_index_
period: 24h
# -- Check https://grafana.com/docs/loki/latest/configuration/#storage_config for more info on how to configure storages
storageConfig:
boltdb_shipper:
shared_store: gcs
gcs:
bucket_name: "myorg-loki-{{ .Values.jxRequirements.cluster.clusterName }}"
config: |
auth_enabled: false
server:
http_listen_port: 3100
# Max gRPC message size that can be received
grpc_server_max_recv_msg_size: 7567796
# Max gRPC message size that can be sent
grpc_server_max_send_msg_size: 7567796
distributor:
ring:
kvstore:
store: memberlist
memberlist:
join_members:
- {{`{{ include "loki.fullname" . }}`}}-memberlist
ingester:
lifecycler:
ring:
kvstore:
store: memberlist
replication_factor: 3
chunk_idle_period: 30m
chunk_block_size: 262144
chunk_encoding: snappy
chunk_retain_period: 1m
max_transfer_retries: 0
wal:
dir: /var/loki/wal
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_cache_freshness_per_query: 10m
{{`{{- if .Values.loki.schemaConfig}}`}}
schema_config:
{{`{{- toYaml .Values.loki.schemaConfig | nindent 2}}`}}
{{`{{- end}}`}}
{{`{{- if .Values.loki.storageConfig}}`}}
storage_config:
{{`{{- toYaml .Values.loki.storageConfig | nindent 2}}`}}
{{`{{- if .Values.indexGateway.enabled}}`}}
index_gateway_client:
server_address: dns:///{{`{{ include "loki.indexGatewayFullname" . }}`}}:9095
{{`{{- end}}`}}
{{`{{- end}}`}}
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s
query_range:
align_queries_with_step: true
max_retries: 5
split_queries_by_interval: 15m
cache_results: true
results_cache:
cache:
enable_fifocache: true
fifocache:
max_size_items: 1024
validity: 24h
frontend_worker:
frontend_address: {{`{{ include "loki.queryFrontendFullname" . }}`}}:9095
frontend:
log_queries_longer_than: 5s
compress_responses: true
tail_proxy_url: http{{`://{{ include "loki.querierFullname" . }}`}}:3100
compactor:
shared_store: gcs
ruler:
storage:
type: local
local:
directory: /etc/loki/rules
ring:
kvstore:
store: memberlist
rule_path: /tmp/loki/scratch
alertmanager_url: https://alertmanager.xx
external_url: https://alertmanager.xx