Middle-Managers disappearing from the cluster
Affected Version
24.0.0
Description
Cluster size:
- 5 Coordinators
- 5 Overlords
- 20 Middle-Managers
- 6 Historicals
- 4 Brokers
- 3 Routers
Configurations in use (possibly sensitive information redacted):
Middle-Manager
{
"awt.toolkit": "sun.awt.X11.XToolkit",
"druid.coordinator.loadqueuepeon.type": "http",
"druid.discovery.k8s.clusterIdentifier": "druid",
"druid.discovery.type": "k8s",
"druid.emitter": "noop",
"druid.emmiter.logging.logLevel": "warn",
"druid.extensions.loadList": "[ \"druid-kubernetes-extensions\", \"druid-basic-security\", \"prometheus-emitter\", \"postgresql-metadata-storage\", \"druid-google-extensions\", \"druid-pac4j\", \"druid-kafka-indexing-service\", \"druid-lookups-cached-global\", \"druid-virtual-columns\", \"druid-avro-extensions\", \"druid-datasketches\", \"druid-moving-average-query\", \"druid-time-min-max\" ]",
"druid.generic.useDefaultValueForNull": "true",
"druid.google.bucket": "***-druid-deepstorage",
"druid.host": "10.28.8.24",
"druid.indexer.fork.property.druid.processing.buffer.sizeBytes": "100MiB",
"druid.indexer.fork.property.druid.processing.numMergeBuffers": "2",
"druid.indexer.fork.property.druid.processing.numThreads": "1",
"druid.indexer.logs.bucket": "***-druid-deepstorage",
"druid.indexer.logs.kill.durationToRetain": "604800000",
"druid.indexer.logs.kill.enabled": "false",
"druid.indexer.logs.prefix": "taskLogs",
"druid.indexer.logs.type": "google",
"druid.indexer.runner.javaOptsArray": "[\"-XX:MaxDirectMemorySize=3072m\",\"-Xmx1024m\",\"-Xms128m\"]",
"druid.indexer.runner.type": "httpRemote",
"druid.javascript.enabled": "false",
"druid.lookup.enableLookupSyncOnStartup": "true",
"druid.lookup.lookupTier": "broker",
"druid.metadata.storage.connector.connectURI": "jdbc:postgresql://***.svc.cluster.local:5432/druid",
"druid.metadata.storage.connector.createTables": "true",
"druid.metadata.storage.type": "postgresql",
"druid.port": "8088",
"druid.processing.numMergeBuffers": "2",
"druid.processing.numThreads": "1",
"druid.query.groupBy.applyLimitPushDownToSegment": "true",
"druid.query.groupBy.maxOnDiskStorage": "1073741824",
"druid.realtime.cache.populateCache": "true",
"druid.realtime.cache.useCache": "true",
"druid.selectors.coordinator.serviceName": "druid/coordinator",
"druid.selectors.indexing.serviceName": "druid/overlord",
"druid.server.http.numThreads": "5",
"druid.serverview.type": "http",
"druid.service": "druid/middlemanager",
"druid.storage.type": "google",
"druid.worker.capacity": "1",
"druid.zk.service.enabled": "false",
"file.encoding": "UTF-8",
"file.encoding.pkg": "sun.io",
"file.separator": "/",
"java.awt.graphicsenv": "sun.awt.X11GraphicsEnvironment",
"java.awt.printerjob": "sun.print.PSPrinterJob",
"java.class.path": "/tmp/conf/druid/cluster/_common:/tmp/conf/druid/cluster/data/middleManager:lib/zstd-jni-1.5.2-3.jar:lib/jackson-dataformat-smile-2.10.5.jar:lib/commons-beanutils-1.9.4.jar:lib/classmate-1.1.0.jar:lib/log4j-jul-2.18.0.jar:lib/javax.inject-1.jar:lib/avatica-metrics-1.17.0.jar:lib/avatica-server-1.17.0.jar:lib/aopalliance-1.0.jar:lib/aether-util-0.9.0.M2.jar:lib/jetty-servlet-9.4.48.v20220622.jar:lib/spymemcached-2.12.3.jar:lib/zookeeper-jute-3.5.9.jar:lib/guava-16.0.1.jar:lib/google-oauth-client-1.26.0.jar:lib/druid-core-24.0.0.jar:lib/validation-api-1.1.0.Final.jar:lib/janino-3.0.11.jar:lib/httpclient-4.5.13.jar:lib/accessors-smart-1.2.jar:lib/async-http-client-2.5.3.jar:lib/joda-time-2.10.5.jar:lib/json-smart-2.3.jar:lib/guice-servlet-4.1.0.jar:lib/vavr-match-0.10.2.jar:lib/curator-client-4.3.0.jar:lib/calcite-linq4j-1.21.0.jar:lib/commons-collections4-4.2.jar:lib/curator-recipes-4.3.0.jar:lib/netty-transport-native-epoll-4.1.68.Final-linux-x86_64.jar:lib/aggdesigner-algorithm-6.0.jar:lib/jna-4.5.1.jar:lib/jackson-jaxrs-json-provider-2.10.5.jar:lib/asm-9.3.jar:lib/netty-reactive-streams-2.0.0.jar:lib/metrics-core-4.0.0.jar:lib/commons-collections-3.2.2.jar:lib/guice-assistedinject-4.1.0.jar:lib/derby-10.14.2.0.jar:lib/fastutil-8.5.4.jar:lib/google-api-client-1.26.0.jar:lib/netty-buffer-4.1.68.Final.jar:lib/druid-sql-24.0.0.jar:lib/log4j-slf4j-impl-2.18.0.jar:lib/commons-compiler-3.0.11.jar:lib/caffeine-2.8.0.jar:lib/jackson-module-guice-2.10.5.jar:lib/jetty-security-9.4.48.v20220622.jar:lib/maven-repository-metadata-3.1.1.jar:lib/commons-io-2.11.0.jar:lib/jackson-jq-0.0.10.jar:lib/asm-commons-9.3.jar:lib/compress-lzf-1.0.4.jar:lib/commons-codec-1.13.jar:lib/protobuf-java-3.11.0.jar:lib/config-magic-0.9.jar:lib/airline-io-2.8.4.jar:lib/audience-annotations-0.5.0.jar:lib/fastutil-extra-8.5.4.jar:lib/txw2-2.3.1.jar:lib/jersey-core-1.19.4.jar:lib/avatica-core-1.17.0.jar:lib/netty-3.10.6.Final.jar:lib/fastutil-core-8.5.4.jar:lib/druid-indexing-hadoop-24.0.0.jar:lib/jetty-client-9.4.48.v20220622.jar:lib/druid-indexing-service-24.0.0.jar:lib/commons-compress-1.21.jar:lib/reactive-streams-1.0.2.jar:lib/maven-model-builder-3.1.1.jar:lib/javax.el-api-3.0.0.jar:lib/netty-resolver-dns-4.1.68.Final.jar:lib/hibernate-validator-5.2.5.Final.jar:lib/druid-aws-common-24.0.0.jar:lib/sigar-1.6.5.132.jar:lib/maven-settings-3.1.1.jar:lib/log4j-1.2-api-2.18.0.jar:lib/resilience4j-bulkhead-1.3.1.jar:lib/icu4j-55.1.jar:lib/jackson-jaxrs-base-2.10.5.jar:lib/stax-ex-1.8.jar:lib/aether-connector-okhttp-0.0.9.jar:lib/javax.activation-api-1.2.0.jar:lib/netty-codec-4.1.68.Final.jar:lib/calcite-core-1.21.0.jar:lib/netty-codec-http-4.1.68.Final.jar:lib/aws-java-sdk-sts-1.12.264.jar:lib/jersey-servlet-1.19.4.jar:lib/json-path-2.3.0.jar:lib/druid-server-24.0.0.jar:lib/jetty-proxy-9.4.48.v20220622.jar:lib/lz4-java-1.8.0.jar:lib/jaxb-runtime-2.3.1.jar:lib/maven-artifact-3.6.0.jar:lib/jackson-module-jaxb-annotations-2.10.5.jar:lib/antlr4-runtime-4.5.1.jar:lib/curator-framework-4.3.0.jar:lib/druid-console-24.0.0.jar:lib/datasketches-java-3.2.0.jar:lib/RoaringBitmap-0.9.0.jar:lib/guice-4.1.0.jar:lib/log4j-api-2.18.0.jar:lib/jakarta.inject-api-1.0.3.jar:lib/netty-codec-socks-4.1.68.Final.jar:lib/jackson-annotations-2.10.5.jar:lib/derbyclient-10.14.2.0.jar:lib/aether-connector-file-0.9.0.M2.jar:lib/aws-java-sdk-ec2-1.12.264.jar:lib/disruptor-3.3.6.jar:lib/datasketches-memory-2.0.0.jar:lib/wagon-provider-api-2.4.jar:lib/xz-1.8.jar:lib/jetty-util-ajax-9.4.48.v20220622.jar:lib/commons-dbcp2-2.0.1.jar:lib/netty-handler-proxy-4.1.68.Final.jar:lib/javax.activation-1.2.0.jar:lib/jcodings-1.0.43.jar:lib/cron-scheduler-0.1.jar:lib/jsr305-2.0.1.jar:lib/aws-java-sdk-kms-1.12.264.jar:lib/jakarta.xml.bind-api-2.3.2.jar:lib/FastInfoset-1.2.15.jar:lib/jackson-datatype-joda-2.10.5.jar:lib/jcl-over-slf4j-1.7.36.jar:lib/druid-services-24.0.0.jar:lib/jetty-server-9.4.48.v20220622.jar:lib/curator-x-discovery-4.3.0.jar:lib/jvm-attach-api-1.5.jar:lib/jackson-jaxrs-smile-provider-2.10.5.jar:lib/maven-settings-builder-3.1.1.jar:lib/javax.el-3.0.0.jar:lib/google-http-client-1.26.0.jar:lib/rhino-1.7.11.jar:lib/jdbi-2.63.1.jar:lib/extendedset-24.0.0.jar:lib/netty-handler-4.1.68.Final.jar:lib/error_prone_annotations-2.11.0.jar:lib/commons-lang-2.6.jar:lib/jetty-util-9.4.48.v20220622.jar:lib/aether-api-0.9.0.M2.jar:lib/maven-aether-provider-3.1.1.jar:lib/commons-lang3-3.8.1.jar:lib/plexus-interpolation-1.19.jar:lib/httpcore-4.4.11.jar:lib/commons-pool2-2.2.jar:lib/druid-hll-24.0.0.jar:lib/jetty-continuation-9.4.48.v20220622.jar:lib/joni-2.1.27.jar:lib/istack-commons-runtime-3.0.7.jar:lib/aws-java-sdk-s3-1.12.264.jar:lib/ion-java-1.0.2.jar:lib/netty-common-4.1.68.Final.jar:lib/netty-transport-4.1.68.Final.jar:lib/jetty-rewrite-9.4.48.v20220622.jar:lib/jersey-server-1.19.4.jar:lib/derbynet-10.14.2.0.jar:lib/jetty-servlets-9.4.48.v20220622.jar:lib/tesla-aether-0.0.5.jar:lib/jmespath-java-1.12.264.jar:lib/zookeeper-3.5.9.jar:lib/async-http-client-netty-utils-2.5.3.jar:lib/druid-processing-24.0.0.jar:lib/jetty-http-9.4.48.v20220622.jar:lib/slf4j-api-1.7.36.jar:lib/jackson-dataformat-cbor-2.10.5.jar:lib/jakarta.activation-api-1.2.1.jar:lib/aws-java-sdk-core-1.12.264.jar:lib/jaxb-api-2.3.1.jar:lib/j2objc-annotations-1.1.jar:lib/aether-spi-0.9.0.M2.jar:lib/jsr311-api-1.1.1.jar:lib/commons-math3-3.6.1.jar:lib/ipaddress-5.3.4.jar:lib/jersey-guice-1.19.4.jar:lib/airline-2.8.4.jar:lib/jackson-datatype-guava-2.10.5.jar:lib/commons-logging-1.1.1.jar:lib/asm-tree-9.3.jar:lib/netty-resolver-4.1.68.Final.jar:lib/jackson-core-2.10.5.jar:lib/commons-text-1.3.jar:lib/resilience4j-core-1.3.1.jar:lib/maven-model-3.1.1.jar:lib/netty-transport-native-unix-common-4.1.68.Final.jar:lib/checker-qual-2.5.7.jar:lib/okhttp-1.0.2.jar:lib/guice-multibindings-4.1.0.jar:lib/javax.servlet-api-3.1.0.jar:lib/jetty-io-9.4.48.v20220622.jar:lib/vavr-0.10.2.jar:lib/google-http-client-jackson2-1.26.0.jar:lib/netty-codec-dns-4.1.68.Final.jar:lib/opencsv-4.6.jar:lib/jackson-databind-2.10.5.1.jar:lib/aether-impl-0.9.0.M2.jar:lib/plexus-utils-3.0.24.jar:lib/jboss-logging-3.2.1.Final.jar:lib/esri-geometry-api-2.2.0.jar:lib/asm-analysis-9.3.jar:lib/druid-gcp-common-24.0.0.jar:lib/log4j-core-2.18.0.jar:lib/shims-0.9.0.jar:",
"java.class.version": "52.0",
"java.endorsed.dirs": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed",
"java.ext.dirs": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext",
"java.home": "/usr/lib/jvm/java-8-openjdk-amd64/jre",
"java.io.tmpdir": "/druid/data",
"java.library.path": "/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib",
"java.runtime.name": "OpenJDK Runtime Environment",
"java.runtime.version": "1.8.0_275-8u275-b01-1~deb9u1-b01",
"java.specification.name": "Java Platform API Specification",
"java.specification.vendor": "Oracle Corporation",
"java.specification.version": "1.8",
"java.util.logging.manager": "org.apache.logging.log4j.jul.LogManager",
"java.vendor": "Oracle Corporation",
"java.vendor.url": "http://java.oracle.com/",
"java.vendor.url.bug": "http://bugreport.sun.com/bugreport/",
"java.version": "1.8.0_275",
"java.vm.info": "mixed mode",
"java.vm.name": "OpenJDK 64-Bit Server VM",
"java.vm.specification.name": "Java Virtual Machine Specification",
"java.vm.specification.vendor": "Oracle Corporation",
"java.vm.specification.version": "1.8",
"java.vm.vendor": "Oracle Corporation",
"java.vm.version": "25.275-b01",
"line.separator": "\n",
"log4j.shutdownCallbackRegistry": "org.apache.druid.common.config.Log4jShutdown",
"log4j.shutdownHookEnabled": "true",
"log4j2.is.webapp": "false",
"net.spy.log.LoggerImpl": "net.spy.memcached.compat.log.SLF4JLogger",
"org.jboss.logging.provider": "slf4j",
"os.arch": "amd64",
"os.name": "Linux",
"os.version": "5.4.202+",
"path.separator": ":",
"sun.arch.data.model": "64",
"sun.boot.class.path": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes",
"sun.boot.library.path": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64",
"sun.cpu.endian": "little",
"sun.cpu.isalist": "",
"sun.io.unicode.encoding": "UnicodeLittle",
"sun.java.command": "org.apache.druid.cli.Main server middleManager",
"sun.java.launcher": "SUN_STANDARD",
"sun.jnu.encoding": "UTF-8",
"sun.management.compiler": "HotSpot 64-Bit Tiered Compilers",
"sun.os.patch.level": "unknown",
"user.dir": "/opt/druid",
"user.home": "/root",
"user.language": "en",
"user.name": "root",
"user.timezone": "UTC"
}
Coordinator/Overlord
{
"awt.toolkit": "sun.awt.X11.XToolkit",
"druid.coordinator.asOverlord.enabled": "true",
"druid.coordinator.asOverlord.overlordService": "druid/overlord",
"druid.coordinator.balancer.strategy": "cachingCost",
"druid.coordinator.kill.durationToRetain": "P180D",
"druid.coordinator.kill.on": "false",
"druid.coordinator.loadqueuepeon.type": "http",
"druid.coordinator.period": "PT30S",
"druid.coordinator.period.indexingPeriod": "PT5M",
"druid.coordinator.startDelay": "PT30S",
"druid.discovery.k8s.clusterIdentifier": "druid",
"druid.discovery.type": "k8s",
"druid.emitter": "prometheus",
"druid.emitter.prometheus.addServiceAsLabel": "true",
"druid.emitter.prometheus.port": "9999",
"druid.emitter.prometheus.strategy": "exporter",
"druid.emmiter.logging.logLevel": "warn",
"druid.extensions.loadList": "[ \"druid-kubernetes-extensions\", \"druid-basic-security\", \"prometheus-emitter\", \"postgresql-metadata-storage\", \"druid-google-extensions\", \"druid-pac4j\", \"druid-kafka-indexing-service\", \"druid-lookups-cached-global\", \"druid-virtual-columns\", \"druid-avro-extensions\", \"druid-datasketches\", \"druid-moving-average-query\", \"druid-time-min-max\" ]",
"druid.generic.useDefaultValueForNull": "true",
"druid.google.bucket": "***-druid-deepstorage",
"druid.host": "10.28.27.21",
"druid.indexer.logs.bucket": "***-druid-deepstorage",
"druid.indexer.logs.kill.durationToRetain": "604800000",
"druid.indexer.logs.kill.enabled": "false",
"druid.indexer.logs.prefix": "taskLogs",
"druid.indexer.logs.type": "google",
"druid.indexer.queue.startDelay": "PT30S",
"druid.indexer.runner.type": "httpRemote",
"druid.indexer.storage.type": "metadata",
"druid.javascript.enabled": "false",
"druid.lookup.enableLookupSyncOnStartup": "true",
"druid.lookup.lookupTier": "broker",
"druid.metadata.storage.connector.connectURI": "jdbc:postgresql://***.svc.cluster.local:5432/druid",
"druid.metadata.storage.connector.createTables": "true",
"druid.metadata.storage.type": "postgresql",
"druid.peon.mode": "remote",
"druid.port": "8081",
"druid.query.groupBy.applyLimitPushDownToSegment": "true",
"druid.query.groupBy.maxOnDiskStorage": "1073741824",
"druid.selectors.coordinator.serviceName": "druid/coordinator",
"druid.selectors.indexing.serviceName": "druid/overlord",
"druid.serverview.type": "http",
"druid.service": "druid/coordinator",
"druid.storage.type": "google",
"druid.zk.service.enabled": "false",
"file.encoding": "UTF-8",
"file.encoding.pkg": "sun.io",
"file.separator": "/",
"java.awt.graphicsenv": "sun.awt.X11GraphicsEnvironment",
"java.awt.printerjob": "sun.print.PSPrinterJob",
"java.class.path": "/tmp/conf/druid/cluster/_common:/tmp/conf/druid/cluster/master/coordinator-overlord:lib/zstd-jni-1.5.2-3.jar:lib/jackson-dataformat-smile-2.10.5.jar:lib/commons-beanutils-1.9.4.jar:lib/classmate-1.1.0.jar:lib/log4j-jul-2.18.0.jar:lib/javax.inject-1.jar:lib/avatica-metrics-1.17.0.jar:lib/avatica-server-1.17.0.jar:lib/aopalliance-1.0.jar:lib/aether-util-0.9.0.M2.jar:lib/jetty-servlet-9.4.48.v20220622.jar:lib/spymemcached-2.12.3.jar:lib/zookeeper-jute-3.5.9.jar:lib/guava-16.0.1.jar:lib/google-oauth-client-1.26.0.jar:lib/druid-core-24.0.0.jar:lib/validation-api-1.1.0.Final.jar:lib/janino-3.0.11.jar:lib/httpclient-4.5.13.jar:lib/accessors-smart-1.2.jar:lib/async-http-client-2.5.3.jar:lib/joda-time-2.10.5.jar:lib/json-smart-2.3.jar:lib/guice-servlet-4.1.0.jar:lib/vavr-match-0.10.2.jar:lib/curator-client-4.3.0.jar:lib/calcite-linq4j-1.21.0.jar:lib/commons-collections4-4.2.jar:lib/curator-recipes-4.3.0.jar:lib/netty-transport-native-epoll-4.1.68.Final-linux-x86_64.jar:lib/aggdesigner-algorithm-6.0.jar:lib/jna-4.5.1.jar:lib/jackson-jaxrs-json-provider-2.10.5.jar:lib/asm-9.3.jar:lib/netty-reactive-streams-2.0.0.jar:lib/metrics-core-4.0.0.jar:lib/commons-collections-3.2.2.jar:lib/guice-assistedinject-4.1.0.jar:lib/derby-10.14.2.0.jar:lib/fastutil-8.5.4.jar:lib/google-api-client-1.26.0.jar:lib/netty-buffer-4.1.68.Final.jar:lib/druid-sql-24.0.0.jar:lib/log4j-slf4j-impl-2.18.0.jar:lib/commons-compiler-3.0.11.jar:lib/caffeine-2.8.0.jar:lib/jackson-module-guice-2.10.5.jar:lib/jetty-security-9.4.48.v20220622.jar:lib/maven-repository-metadata-3.1.1.jar:lib/commons-io-2.11.0.jar:lib/jackson-jq-0.0.10.jar:lib/asm-commons-9.3.jar:lib/compress-lzf-1.0.4.jar:lib/commons-codec-1.13.jar:lib/protobuf-java-3.11.0.jar:lib/config-magic-0.9.jar:lib/airline-io-2.8.4.jar:lib/audience-annotations-0.5.0.jar:lib/fastutil-extra-8.5.4.jar:lib/txw2-2.3.1.jar:lib/jersey-core-1.19.4.jar:lib/avatica-core-1.17.0.jar:lib/netty-3.10.6.Final.jar:lib/fastutil-core-8.5.4.jar:lib/druid-indexing-hadoop-24.0.0.jar:lib/jetty-client-9.4.48.v20220622.jar:lib/druid-indexing-service-24.0.0.jar:lib/commons-compress-1.21.jar:lib/reactive-streams-1.0.2.jar:lib/maven-model-builder-3.1.1.jar:lib/javax.el-api-3.0.0.jar:lib/netty-resolver-dns-4.1.68.Final.jar:lib/hibernate-validator-5.2.5.Final.jar:lib/druid-aws-common-24.0.0.jar:lib/sigar-1.6.5.132.jar:lib/maven-settings-3.1.1.jar:lib/log4j-1.2-api-2.18.0.jar:lib/resilience4j-bulkhead-1.3.1.jar:lib/icu4j-55.1.jar:lib/jackson-jaxrs-base-2.10.5.jar:lib/stax-ex-1.8.jar:lib/aether-connector-okhttp-0.0.9.jar:lib/javax.activation-api-1.2.0.jar:lib/netty-codec-4.1.68.Final.jar:lib/calcite-core-1.21.0.jar:lib/netty-codec-http-4.1.68.Final.jar:lib/aws-java-sdk-sts-1.12.264.jar:lib/jersey-servlet-1.19.4.jar:lib/json-path-2.3.0.jar:lib/druid-server-24.0.0.jar:lib/jetty-proxy-9.4.48.v20220622.jar:lib/lz4-java-1.8.0.jar:lib/jaxb-runtime-2.3.1.jar:lib/maven-artifact-3.6.0.jar:lib/jackson-module-jaxb-annotations-2.10.5.jar:lib/antlr4-runtime-4.5.1.jar:lib/curator-framework-4.3.0.jar:lib/druid-console-24.0.0.jar:lib/datasketches-java-3.2.0.jar:lib/RoaringBitmap-0.9.0.jar:lib/guice-4.1.0.jar:lib/log4j-api-2.18.0.jar:lib/jakarta.inject-api-1.0.3.jar:lib/netty-codec-socks-4.1.68.Final.jar:lib/jackson-annotations-2.10.5.jar:lib/derbyclient-10.14.2.0.jar:lib/aether-connector-file-0.9.0.M2.jar:lib/aws-java-sdk-ec2-1.12.264.jar:lib/disruptor-3.3.6.jar:lib/datasketches-memory-2.0.0.jar:lib/wagon-provider-api-2.4.jar:lib/xz-1.8.jar:lib/jetty-util-ajax-9.4.48.v20220622.jar:lib/commons-dbcp2-2.0.1.jar:lib/netty-handler-proxy-4.1.68.Final.jar:lib/javax.activation-1.2.0.jar:lib/jcodings-1.0.43.jar:lib/cron-scheduler-0.1.jar:lib/jsr305-2.0.1.jar:lib/aws-java-sdk-kms-1.12.264.jar:lib/jakarta.xml.bind-api-2.3.2.jar:lib/FastInfoset-1.2.15.jar:lib/jackson-datatype-joda-2.10.5.jar:lib/jcl-over-slf4j-1.7.36.jar:lib/druid-services-24.0.0.jar:lib/jetty-server-9.4.48.v20220622.jar:lib/curator-x-discovery-4.3.0.jar:lib/jvm-attach-api-1.5.jar:lib/jackson-jaxrs-smile-provider-2.10.5.jar:lib/maven-settings-builder-3.1.1.jar:lib/javax.el-3.0.0.jar:lib/google-http-client-1.26.0.jar:lib/rhino-1.7.11.jar:lib/jdbi-2.63.1.jar:lib/extendedset-24.0.0.jar:lib/netty-handler-4.1.68.Final.jar:lib/error_prone_annotations-2.11.0.jar:lib/commons-lang-2.6.jar:lib/jetty-util-9.4.48.v20220622.jar:lib/aether-api-0.9.0.M2.jar:lib/maven-aether-provider-3.1.1.jar:lib/commons-lang3-3.8.1.jar:lib/plexus-interpolation-1.19.jar:lib/httpcore-4.4.11.jar:lib/commons-pool2-2.2.jar:lib/druid-hll-24.0.0.jar:lib/jetty-continuation-9.4.48.v20220622.jar:lib/joni-2.1.27.jar:lib/istack-commons-runtime-3.0.7.jar:lib/aws-java-sdk-s3-1.12.264.jar:lib/ion-java-1.0.2.jar:lib/netty-common-4.1.68.Final.jar:lib/netty-transport-4.1.68.Final.jar:lib/jetty-rewrite-9.4.48.v20220622.jar:lib/jersey-server-1.19.4.jar:lib/derbynet-10.14.2.0.jar:lib/jetty-servlets-9.4.48.v20220622.jar:lib/tesla-aether-0.0.5.jar:lib/jmespath-java-1.12.264.jar:lib/zookeeper-3.5.9.jar:lib/async-http-client-netty-utils-2.5.3.jar:lib/druid-processing-24.0.0.jar:lib/jetty-http-9.4.48.v20220622.jar:lib/slf4j-api-1.7.36.jar:lib/jackson-dataformat-cbor-2.10.5.jar:lib/jakarta.activation-api-1.2.1.jar:lib/aws-java-sdk-core-1.12.264.jar:lib/jaxb-api-2.3.1.jar:lib/j2objc-annotations-1.1.jar:lib/aether-spi-0.9.0.M2.jar:lib/jsr311-api-1.1.1.jar:lib/commons-math3-3.6.1.jar:lib/ipaddress-5.3.4.jar:lib/jersey-guice-1.19.4.jar:lib/airline-2.8.4.jar:lib/jackson-datatype-guava-2.10.5.jar:lib/commons-logging-1.1.1.jar:lib/asm-tree-9.3.jar:lib/netty-resolver-4.1.68.Final.jar:lib/jackson-core-2.10.5.jar:lib/commons-text-1.3.jar:lib/resilience4j-core-1.3.1.jar:lib/maven-model-3.1.1.jar:lib/netty-transport-native-unix-common-4.1.68.Final.jar:lib/checker-qual-2.5.7.jar:lib/okhttp-1.0.2.jar:lib/guice-multibindings-4.1.0.jar:lib/javax.servlet-api-3.1.0.jar:lib/jetty-io-9.4.48.v20220622.jar:lib/vavr-0.10.2.jar:lib/google-http-client-jackson2-1.26.0.jar:lib/netty-codec-dns-4.1.68.Final.jar:lib/opencsv-4.6.jar:lib/jackson-databind-2.10.5.1.jar:lib/aether-impl-0.9.0.M2.jar:lib/plexus-utils-3.0.24.jar:lib/jboss-logging-3.2.1.Final.jar:lib/esri-geometry-api-2.2.0.jar:lib/asm-analysis-9.3.jar:lib/druid-gcp-common-24.0.0.jar:lib/log4j-core-2.18.0.jar:lib/shims-0.9.0.jar:",
"java.class.version": "52.0",
"java.endorsed.dirs": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed",
"java.ext.dirs": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext",
"java.home": "/usr/lib/jvm/java-8-openjdk-amd64/jre",
"java.io.tmpdir": "/druid/data",
"java.library.path": "/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib",
"java.runtime.name": "OpenJDK Runtime Environment",
"java.runtime.version": "1.8.0_275-8u275-b01-1~deb9u1-b01",
"java.specification.name": "Java Platform API Specification",
"java.specification.vendor": "Oracle Corporation",
"java.specification.version": "1.8",
"java.util.logging.manager": "org.apache.logging.log4j.jul.LogManager",
"java.vendor": "Oracle Corporation",
"java.vendor.url": "http://java.oracle.com/",
"java.vendor.url.bug": "http://bugreport.sun.com/bugreport/",
"java.version": "1.8.0_275",
"java.vm.info": "mixed mode",
"java.vm.name": "OpenJDK 64-Bit Server VM",
"java.vm.specification.name": "Java Virtual Machine Specification",
"java.vm.specification.vendor": "Oracle Corporation",
"java.vm.specification.version": "1.8",
"java.vm.vendor": "Oracle Corporation",
"java.vm.version": "25.275-b01",
"line.separator": "\n",
"log4j.shutdownCallbackRegistry": "org.apache.druid.common.config.Log4jShutdown",
"log4j.shutdownHookEnabled": "true",
"log4j2.is.webapp": "false",
"net.spy.log.LoggerImpl": "net.spy.memcached.compat.log.SLF4JLogger",
"org.jboss.logging.provider": "slf4j",
"os.arch": "amd64",
"os.name": "Linux",
"os.version": "5.4.202+",
"path.separator": ":",
"sun.arch.data.model": "64",
"sun.boot.class.path": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes",
"sun.boot.library.path": "/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64",
"sun.cpu.endian": "little",
"sun.cpu.isalist": "",
"sun.io.unicode.encoding": "UnicodeLittle",
"sun.java.command": "org.apache.druid.cli.Main server coordinator",
"sun.java.launcher": "SUN_STANDARD",
"sun.jnu.encoding": "UTF-8",
"sun.management.compiler": "HotSpot 64-Bit Tiered Compilers",
"sun.os.patch.level": "unknown",
"user.dir": "/opt/druid",
"user.home": "/root",
"user.language": "en",
"user.name": "root",
"user.timezone": "UTC"
}
There are no error messages, but this is the log from the Middle-Manager and the Coordinator/Overlord leader. The other Middle-Managers have the same logs.
Middle-Manager
2022-11-08T17:56:41+0000 startup service middleManager
Setting druid.host=10.28.1.28 in /tmp/conf/druid/cluster/data/middleManager/runtime.properties
2022-11-08T17:56:49,272 WARN [main] org.apache.druid.k8s.discovery.K8sDiscoveryConfig - IF coordinator pods run in multiple namespaces, then you MUST provide coordinatorLeaderElectionConfigMapNamespace
2022-11-08T17:56:49,276 WARN [main] org.apache.druid.k8s.discovery.K8sDiscoveryConfig - IF overlord pods run in multiple namespaces, then you MUST provide overlordLeaderElectionConfigMapNamespace
2022-11-08T17:56:52,417 WARN [main] org.eclipse.jetty.server.handler.gzip.GzipHandler - minGzipSize of 0 is inefficient for short content, break even is size 23
2022-11-08T17:56:56,269 WARN [qtp546936087-66] org.apache.druid.indexing.common.config.TaskConfig - Batch processing mode argument value is null or not valid:[null], defaulting to[CLOSED_SEGMENTS]
Coordinator/Overlord
2022-11-08T14:21:20+0000 startup service coordinator
Setting druid.host=10.28.27.21 in /tmp/conf/druid/cluster/master/coordinator-overlord/runtime.properties
2022-11-08T14:21:25,787 WARN [main] org.apache.druid.k8s.discovery.K8sDiscoveryConfig - IF coordinator pods run in multiple namespaces, then you MUST provide coordinatorLeaderElectionConfigMapNamespace
2022-11-08T14:21:25,792 WARN [main] org.apache.druid.k8s.discovery.K8sDiscoveryConfig - IF overlord pods run in multiple namespaces, then you MUST provide overlordLeaderElectionConfigMapNamespace
2022-11-08T14:21:28,176 WARN [main] org.eclipse.jetty.server.handler.gzip.GzipHandler - minGzipSize of 0 is inefficient for short content, break even is size 23
2022-11-08T14:21:29,694 WARN [K8sDruidNodeDiscoveryProvider-ListenerExecutor] org.apache.druid.discovery.DruidNodeDiscoveryProvider$ServiceDruidNodeDiscovery - Node[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/broker', host='10.28.4.25', bindOnHost=false, port=-1, plaintextPort=8088, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='BROKER', services={lookupNodeService=LookupNodeService{lookupTier='broker'}}}] discovered but doesn't have service[dataNodeService]. Ignored.
2022-11-08T14:21:29,695 WARN [K8sDruidNodeDiscoveryProvider-ListenerExecutor] org.apache.druid.discovery.DruidNodeDiscoveryProvider$ServiceDruidNodeDiscovery - Node[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/broker', host='10.28.8.20', bindOnHost=false, port=-1, plaintextPort=8088, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='BROKER', services={lookupNodeService=LookupNodeService{lookupTier='broker'}}}] discovered but doesn't have service[dataNodeService]. Ignored.
2022-11-08T14:21:29,695 WARN [K8sDruidNodeDiscoveryProvider-ListenerExecutor] org.apache.druid.discovery.DruidNodeDiscoveryProvider$ServiceDruidNodeDiscovery - Node[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/broker', host='10.28.15.3', bindOnHost=false, port=-1, plaintextPort=8088, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='BROKER', services={lookupNodeService=LookupNodeService{lookupTier='broker'}}}] discovered but doesn't have service[dataNodeService]. Ignored.
2022-11-08T14:21:29,695 WARN [K8sDruidNodeDiscoveryProvider-ListenerExecutor] org.apache.druid.discovery.DruidNodeDiscoveryProvider$ServiceDruidNodeDiscovery - Node[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/broker', host='10.28.2.24', bindOnHost=false, port=-1, plaintextPort=8088, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='BROKER', services={lookupNodeService=LookupNodeService{lookupTier='broker'}}}] discovered but doesn't have service[dataNodeService]. Ignored.
2022-11-08T14:21:29,696 WARN [K8sDruidNodeDiscoveryProvider-ListenerExecutor] org.apache.druid.discovery.DruidNodeDiscoveryProvider$ServiceDruidNodeDiscovery - Node[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/broker', host='10.28.0.12', bindOnHost=false, port=-1, plaintextPort=8088, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='BROKER', services={lookupNodeService=LookupNodeService{lookupTier='broker'}}}] discovered but doesn't have service[dataNodeService]. Ignored.
This seems to be the same symptom as this thread on the forum.
This error doesn't happen when using ZooKeeper for leader election, but using ZooKeeper brings our whole cluster down whenever something forcefully breaks the connection between the Coordinators and ZooKeeper due to #13167
Related prints
There was supposed to be 20 middle_managers
We call the Middle Manager pods indexers

If a task lucks out to be assigned to a Middle Manager, it completes fine. Otherwise, it stays there until either we restart Middle Manager for them to reappear or they timeout with this message:

PS: The message is truncated on the JSON object for the task, not on the UI. I couldn't find the whole message anywhere.
Just to be clear, you are saying this only happens when using the ZK-less discovery in druid-kubernetes-extensions, right? I'm not familiar with how that extension works, although if you are having a problem with ZK-based discovery too then that lack of familiarity isn't as important 🙂
Yes, this only happens with ZK-less discovery.
PR #13295 fixed the problem with the ZK-based discovery, so we are using a custom image built from that PR and waiting for a release now that it was merged.
Looks like the issue happens, because peon unannounces the middlemanager it runs on when it terminates and removes druidDiscoveryAnnouncement-cluster-identifier and druidDiscoveryAnnouncement-id-hash labels from the middlemanager pod.
Labels with no peons:
druidDiscoveryAnnouncement-cluster-identifier: dev
druidDiscoveryAnnouncement-id-hash: "1276105065"
druidDiscoveryAnnouncement-middleManager: "true"
Labels with peon:
druidDiscoveryAnnouncement-cluster-identifier: dev
druidDiscoveryAnnouncement-id-hash: "1276108075"
druidDiscoveryAnnouncement-middleManager: "true"
druidDiscoveryAnnouncement-peon: "true"
Labels after peon finishes:
druidDiscoveryAnnouncement-middleManager: "true"
Hello, I am facing same issue where middle maangers are disappearing for every index_kafka task in kubernetes deployment of Druid, any fix or workaround found foe the issue? Please let me know.
This issue basically makes the kubernetes druid extension broken and unusable.
This issue has been marked as stale due to 280 days of inactivity. It will be closed in 4 weeks if no further activity occurs. If this issue is still relevant, please simply write any comment. Even if closed, you can still revive the issue at any time or discuss it on the [email protected] list. Thank you for your contributions.
/fresh
I'm having the exact same issue here with the latest version of Druid (v32.0.1). I have a Kafka job running and after the job completes successfully the MiddleManager disappears.
As Sh1ftry pointed out previously the Kubernetes extension seems to remove the Kubernetes labels from the MiddleManager after peon is completed.
Before:
Labels: app=druid
apps.kubernetes.io/pod-index=0
component=middleManager
controller-revision-hash=druid-cluster-middlemanagers-7fcff47567
druidDiscoveryAnnouncement-cluster-identifier=cluster
druidDiscoveryAnnouncement-id-hash=837274055
druidDiscoveryAnnouncement-middleManager=true
druid_cr=cluster
nodeSpecUniqueStr=druid-cluster-middlemanagers
statefulset.kubernetes.io/pod-name=druid-cluster-middlemanagers-0
Annotations: druidNodeInfo-middleManager:
{"druidNode":{"service":"druid/middleManager","host":"10.0.164.21","bindOnHost":false,"plaintextPort":8088,"port":-1,"tlsPort":-1,"enableP...
After Kafka job is completed successfully:
Labels: app=druid
apps.kubernetes.io/pod-index=0
component=middleManager
controller-revision-hash=druid-cluster-middlemanagers-7fcff47567
druidDiscoveryAnnouncement-middleManager=true
druid_cr=cluster
nodeSpecUniqueStr=druid-cluster-middlemanagers
statefulset.kubernetes.io/pod-name=druid-cluster-middlemanagers-0
Annotations: druidNodeInfo-middleManager:
{"druidNode":{"service":"druid/middleManager","host":"10.0.154.9","bindOnHost":false,"plaintextPort":8088,"port":-1,"tlsPort":-1,"enablePl...
This leads to errors in the cluster not being able to find the MiddleManager:
2025-04-15T11:56:43,927 ERROR [org.apache.druid.k8s.discovery.K8sDruidNodeDiscoveryProvider$NodeRoleWatchermiddleManager] org.apache.druid.discovery.BaseNodeRoleWatcher - Noticed disappearance of unknown druid node [http://10.0.154.9:8088] of role [middleManager].
The labels seem to get removed by the K8sDruidNodeAnnouncer class:
2025-04-15T11:52:25,333 INFO [task-runner-0-priority-0] org.apache.druid.k8s.discovery.K8sDruidNodeAnnouncer - Unannouncing DiscoveryDruidNode[DiscoveryDruidNode{druidNode=DruidNode{serviceName='druid/middleManager', host='10.0.154.9', bindOnHost=false, port=-1, plaintextPort=8100, enablePlaintextPort=true, tlsPort=-1, enableTlsPort=false}, nodeRole='PEON', services={dataNodeService=DataNodeService{tier='_default_tier', maxSize=0, serverType=indexer-executor, priority=0}, lookupNodeService=LookupNodeService{lookupTier='__default'}}', startTime=2025-04-15T11:51:24.122Z}]
The jobs in the cluster which have finished successfully are still displayed as "Running".
Is there any fix planned for this as of now or any kind of configuration which can mitigate this issue? As pointed out previously this basically makes the whole Kubernetes extension still un-usuable.
Thanks a lot! :)
EDIT: In case anyone else is experiencing the same problem and is looking for a solution to get Druid working without Zookeeper, check out the druid-kubernetes-overlord-extensions extension - that extension uses Kubernetes Jobs as workers and hence doesn't experience that issue. I used it together with the Kubernetes operator and found a working example here: https://github.com/iunera/druid-cluster-config/tree/main/kubernetes/druid/druidcluster
Also running into this issue on v33.0.0.
@com98 , I was trying the druid-kubernetes-overlord-extensions but seems to not work while using google cloud storage for indexer logs. Can you share what you were using for log storage?
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.druid.indexing.overlord.DruidOverlord$1.becomeLeader(DruidOverlord.java:193) ~[druid-indexing-service-33.0.0.jar:33.0.0]
at org.apache.druid.k8s.discovery.K8sDruidLeaderSelector.lambda$startLeaderElector$0(K8sDruidLeaderSelector.java:77) ~[?:?]
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539) [?:?]
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) [?:?]
at java.base/java.lang.Thread.run(Thread.java:840) [?:?]
Caused by: java.lang.reflect.InvocationTargetException
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:?]
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) ~[?:?]
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:?]
at java.base/java.lang.reflect.Method.invoke(Method.java:569) ~[?:?]
at org.apache.druid.java.util.common.lifecycle.Lifecycle$AnnotationBasedHandler.start(Lifecycle.java:446) ~[druid-processing-33.0.0.jar:33.0.0]
at org.apache.druid.java.util.common.lifecycle.Lifecycle.start(Lifecycle.java:341) ~[druid-processing-33.0.0.jar:33.0.0]
at org.apache.druid.indexing.overlord.DruidOverlord$1.becomeLeader(DruidOverlord.java:190) ~[druid-indexing-service-33.0.0.jar:33.0.0]
... 6 more
Caused by: org.apache.commons.lang3.NotImplementedException: this druid.indexer.logs.type [class org.apache.druid.storage.google.GoogleTaskLogs] does not support managing task payloads yet. You will have to switch to using environment variables
This is still current for us in v34.0.0.
I'm having the same issue in v33.0.0