elasticsearch icon indicating copy to clipboard operation
elasticsearch copied to clipboard

[CI] DataStreamLifecycleDownsampleDisruptionIT testDataStreamLifecycleDownsampleRollingRestart failing

Open DaveCTurner opened this issue 1 year ago • 7 comments

Build scan: https://gradle-enterprise.elastic.co/s/dpndfunqwdkgi/tests/:x-pack:plugin:downsample:internalClusterTest/org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT/testDataStreamLifecycleDownsampleRollingRestart

Reproduction line:

./gradlew ':x-pack:plugin:downsample:internalClusterTest' --tests "org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT.testDataStreamLifecycleDownsampleRollingRestart" -Dtests.seed=C9F9364FCAA9D709 -Dtests.locale=en -Dtests.timezone=America/Argentina/Ushuaia -Druntime.java=17

Applicable branches: main

Reproduces locally?: Didn't try

Failure history: https://gradle-enterprise.elastic.co/scans/tests?tests.container=org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT&tests.test=testDataStreamLifecycleDownsampleRollingRestart

Failure excerpt:

org.elasticsearch.node.NodeClosedException: node closed {node_t1}{2xl5JabcT_CLxtbNFu-Wkw}{6FSOPBFgT46ckf_D_OIBYQ}{node_t1}{127.0.0.1}{127.0.0.1:21644}{d}{8.11.0}{7000099-8500001}{xpack.installed=true}

  at __randomizedtesting.SeedInfo.seed([C9F9364FCAA9D709:1E60A1FF9CA43247]:0)
  at org.elasticsearch.transport.TransportService.sendRequestInternal(TransportService.java:937)
  at org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:841)
  at org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:751)
  at org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:739)
  at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.doStart(TransportMasterNodeAction.java:244)
  at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$2.onNewClusterState(TransportMasterNodeAction.java:305)
  at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onNewClusterState(ClusterStateObserver.java:341)
  at org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:171)
  at org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:119)
  at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retry(TransportMasterNodeAction.java:301)
  at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retryOnNextState(TransportMasterNodeAction.java:282)
  at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$1.handleException(TransportMasterNodeAction.java:262)
  at org.elasticsearch.transport.TransportService$UnregisterChildTransportResponseHandler.handleException(TransportService.java:1703)
  at org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1420)
  at org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:1304)
  at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
  at org.elasticsearch.common.util.concurrent.EsExecutors$DirectExecutorService.execute(EsExecutors.java:238)
  at org.elasticsearch.transport.TransportService.onConnectionClosed(TransportService.java:1300)
  at org.elasticsearch.transport.ConnectionManager$DelegatingNodeConnectionListener.onConnectionClosed(ConnectionManager.java:85)
  at org.elasticsearch.transport.ClusterConnectionManager.lambda$internalOpenConnection$8(ClusterConnectionManager.java:369)
  at org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:128)
  at org.elasticsearch.action.support.SubscribableListener$SuccessResult.complete(SubscribableListener.java:273)
  at org.elasticsearch.action.support.SubscribableListener.tryComplete(SubscribableListener.java:193)
  at org.elasticsearch.action.support.SubscribableListener.setResult(SubscribableListener.java:238)
  at org.elasticsearch.action.support.SubscribableListener.onResponse(SubscribableListener.java:136)
  at org.elasticsearch.transport.CloseableConnection.close(CloseableConnection.java:46)
  at org.elasticsearch.transport.TcpTransport$NodeChannels.close(TcpTransport.java:298)
  at org.elasticsearch.core.IOUtils.close(IOUtils.java:71)
  at org.elasticsearch.core.IOUtils.closeWhileHandlingException(IOUtils.java:169)
  at org.elasticsearch.transport.ClusterConnectionManager.pendingConnectionsComplete(ClusterConnectionManager.java:351)
  at org.elasticsearch.core.AbstractRefCounted$1.closeInternal(AbstractRefCounted.java:117)
  at org.elasticsearch.core.AbstractRefCounted.decRef(AbstractRefCounted.java:69)
  at org.elasticsearch.transport.ClusterConnectionManager.internalClose(ClusterConnectionManager.java:334)
  at org.elasticsearch.transport.ClusterConnectionManager.close(ClusterConnectionManager.java:323)
  at org.elasticsearch.core.IOUtils.close(IOUtils.java:71)
  at org.elasticsearch.core.IOUtils.close(IOUtils.java:87)
  at org.elasticsearch.core.IOUtils.close(IOUtils.java:63)
  at org.elasticsearch.transport.TransportService.doStop(TransportService.java:348)
  at org.elasticsearch.common.component.AbstractLifecycleComponent.stop(AbstractLifecycleComponent.java:80)
  at org.elasticsearch.node.Node.stopIfStarted(Node.java:1649)
  at org.elasticsearch.node.Node.stopIfStarted(Node.java:1643)
  at org.elasticsearch.node.Node.stop(Node.java:1633)
  at org.elasticsearch.node.Node.close(Node.java:1661)
  at org.elasticsearch.test.InternalTestCluster$NodeAndClient.close(InternalTestCluster.java:1060)
  at org.elasticsearch.test.InternalTestCluster$NodeAndClient.closeForRestart(InternalTestCluster.java:1004)
  at org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1870)
  at org.elasticsearch.test.InternalTestCluster.rollingRestart(InternalTestCluster.java:1857)
  at org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT.lambda$testDataStreamLifecycleDownsampleRollingRestart$0(DataStreamLifecycleDownsampleDisruptionIT.java:105)
  at org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT$Disruptor.run(DataStreamLifecycleDownsampleDisruptionIT.java:178)
  at java.lang.Thread.run(Thread.java:833)

DaveCTurner avatar Sep 13 '23 10:09 DaveCTurner

Pinging @elastic/es-data-management (Team:Data Management)

elasticsearchmachine avatar Sep 13 '23 10:09 elasticsearchmachine

Medium risk as it's a new feature

andreidan avatar Oct 12 '23 13:10 andreidan

https://gradle-enterprise.elastic.co/s/u7d4p4muduxgo

joegallo avatar Feb 08 '24 15:02 joegallo

This failure might be spurious, I'm not sure -- I figured there was no harm in reopening the ticket to have somebody take a look. For example, it's possible the fix really is in the 8.12 branch, but perhaps it didn't make it in time for the 8.12.1 release? 🤷

joegallo avatar Feb 08 '24 15:02 joegallo

One more failure instance https://gradle-enterprise.elastic.co/s/r26qd5stdyq3u

volodk85 avatar Feb 08 '24 22:02 volodk85

One more failure at elasticsearch / periodic / platform-support / 8.12 / oraclelinux-8 / platform-support-unix

  2> REPRODUCE WITH: ./gradlew ':x-pack:plugin:downsample:internalClusterTest' --tests "org.elasticsearch.xpack.downsample.DataStreamLifecycleDownsampleDisruptionIT.testDataStreamLifecycleDownsampleRollingRestart" -Dtests.seed=2752E6E3BBEB194B -Dtests.locale=nl -Dtests.timezone=Canada/Saskatchewan -Druntime.java=21	
  2> org.elasticsearch.node.NodeClosedException: node closed {node_t1}{99Wtm4GqSlqx33fGEt7ZRQ}{cHR1C01QQ2SUhL8k_UqAlg}{node_t1}{127.0.0.1}{127.0.0.1:14922}{d}{8.12.2}{7000099-8500010}{xpack.installed=true}	
        at __randomizedtesting.SeedInfo.seed([2752E6E3BBEB194B:F0CB7153EDE6FC05]:0)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$2.onClusterServiceClose(TransportMasterNodeAction.java:311)	
        at app//org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onClusterServiceClose(ClusterStateObserver.java:348)	
        at app//org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.onClose(ClusterStateObserver.java:275)	
        at app//org.elasticsearch.cluster.service.ClusterApplierService.addTimeoutListener(ClusterApplierService.java:254)	
        at app//org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:178)	
        at app//org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:119)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retry(TransportMasterNodeAction.java:302)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retryOnNextState(TransportMasterNodeAction.java:283)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$1.handleException(TransportMasterNodeAction.java:263)	
        at app//org.elasticsearch.transport.TransportService.handleSendRequestException(TransportService.java:857)	
        at app//org.elasticsearch.transport.TransportService.getConnectionOrFail(TransportService.java:779)	
        at app//org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:755)	
        at app//org.elasticsearch.transport.TransportService.sendRequest(TransportService.java:745)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.doStart(TransportMasterNodeAction.java:245)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$2.onNewClusterState(TransportMasterNodeAction.java:306)	
        at app//org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onNewClusterState(ClusterStateObserver.java:341)	
        at app//org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:171)	
        at app//org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:119)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retry(TransportMasterNodeAction.java:302)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retryOnNextState(TransportMasterNodeAction.java:283)	
        at app//org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$1.handleException(TransportMasterNodeAction.java:263)	
        at app//org.elasticsearch.transport.TransportService$UnregisterChildTransportResponseHandler.handleException(TransportService.java:1718)	
        at app//org.elasticsearch.transport.TransportService$ContextRestoreResponseHandler.handleException(TransportService.java:1435)	
        at app//org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:1319)	
        at app//org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:983)	
        at app//org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)	
        at [email protected]/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)	
        at [email protected]/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)	
        at [email protected]/java.lang.Thread.run(Thread.java:1583)

kingherc avatar Feb 14 '24 10:02 kingherc

Still failing in main it seems: https://gradle-enterprise.elastic.co/s/uqld27x6h52bo. Looks like one or more genuine bugs in how we handle shutdowns; NB https://github.com/elastic/elasticsearch/issues/105549 isn't really the bug here but it makes it harder to see what's going on at least.

DaveCTurner avatar Feb 15 '24 12:02 DaveCTurner

The problem here seems to be a premature @After method being called that wipes the cluster. Not terribly sure why it was called as there doesn't seem to be any timeout or assertion being tripped

[2024-02-15T08:57:19,095][INFO ][o.e.x.d.DataStreamLifecycleDownsampleDisruptionIT] [testDataStreamLifecycleDownsampleRollingRestart] [DataStreamLifecycleDownsampleDisruptionIT#testDataStreamLifecycleDownsampleRollingRestart]: cleaning up after test


[2024-02-15T08:57:22,843][ERROR][o.e.x.d.DownsampleShardIndexer] [node_t3] Shard [[.ds-metrics-foo-2024.02.15-000001][0]] failed to populate downsample index.
org.elasticsearch.common.util.concurrent.EsRejectedExecutionException: rejected execution of TimedRunnable{original=org.elasticsearch.action.bulk.TransportBulkAction$2/org.elasticsearch.action.ActionListenerImplementations$RunBeforeActionListener/org.elasticsearch.action.ActionListenerImplementations$RunBeforeActionListener/org.elasticsearch.tasks.TaskManager$1{org.elasticsearch.action.bulk.Retry2$RetryHandler@744d4860}{Task{id=226, type='transport', action='indices:data/write/bulk', description='requests[6595], indices[downsample-5m-.ds-metrics-foo-2024.02.15-000001]', parentTask=unset, startTime=1707998242839, startTimeNanos=601323583773}}/org.elasticsearch.action.support.TransportAction$$Lambda/0x00007fb3bc9e1638@5c21a006/org.elasticsearch.action.bulk.TransportBulkAction$$Lambda/0x00007fb3bcb119a8@650d5947, creationTimeNanos=601326990197, startTimeNanos=0, finishTimeNanos=-1, failedOrRejected=false} on TaskExecutionTimeTrackingEsThreadPoolExecutor[name = node_t3/write, queue capacity = 10000, task execution EWMA = 1.6s, total task execution time = 20s, org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor@852613c[Shutting down, pool size = 1, active threads = 1, queued tasks = 0, completed tasks = 3]] (shutdown)
	at org.elasticsearch.common.util.concurrent.EsRejectedExecutionHandler.newRejectedException(EsRejectedExecutionHandler.java:51) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.EsAbortPolicy.rejectedExecution(EsAbortPolicy.java:35) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:841) ~[?:?]
	at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1376) ~[?:?]
	at org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor.execute(EsThreadPoolExecutor.java:72) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.bulk.TransportBulkAction.forkAndExecute(TransportBulkAction.java:294) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.bulk.TransportBulkAction.ensureClusterStateThenForkAndExecute(TransportBulkAction.java:289) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.bulk.TransportBulkAction.doExecute(TransportBulkAction.java:248) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.bulk.TransportBulkAction.doExecute(TransportBulkAction.java:84) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.support.TransportAction$RequestFilterChain.proceed(TransportAction.java:96) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.support.TransportAction.execute(TransportAction.java:68) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.tasks.TaskManager.registerAndExecute(TaskManager.java:196) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]




[2024-02-15T08:57:22,999][ERROR][o.e.x.d.DownsampleShardPersistentTaskExecutor] [node_t3] Downsampling task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m non-retriable failure [Rejecting request because bulk add has been cancelled by the caller]
[2024-02-15T08:57:23,001][TRACE][o.e.d.l.DataStreamLifecycleService] [node_t0] Data stream lifecycle job triggered: data_stream_lifecycle, 1707998243000, 1707998243001
[2024-02-15T08:57:23,001][TRACE][o.e.d.l.DataStreamLifecycleService] [node_t0] Data stream lifecycle issues rollover request for data stream [metrics-foo]
[2024-02-15T08:57:23,001][WARN ][o.e.p.AllocatedPersistentTask] [node_t3] task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m] failed with an exception
org.elasticsearch.common.util.concurrent.EsRejectedExecutionException: Rejecting request because bulk add has been cancelled by the caller
	at org.elasticsearch.action.bulk.BulkProcessor2.addWithBackpressure(BulkProcessor2.java:336) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.bulk.BulkProcessor2.addWithBackpressure(BulkProcessor2.java:303) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.xpack.downsample.DownsampleShardIndexer$TimeSeriesBucketCollector.indexBucket(DownsampleShardIndexer.java:448) ~[main/:?]



[2024-02-15T08:57:23,003][WARN ][o.e.p.AllocatedPersistentTask] [node_t3] notification for task [rollup-shard[c]] with id [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m] failed
org.elasticsearch.node.NodeClosedException: node closed {node_t3}{34xjImOAT_2nOt22zrHDXg}{4aK8biOvRLqEE9BbuKW7kg}{node_t3}{127.0.0.1}{127.0.0.1:20683}{d}{8.14.0}{7000099-8502001}{xpack.installed=true}
	at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$2.onClusterServiceClose(TransportMasterNodeAction.java:311) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onClusterServiceClose(ClusterStateObserver.java:387) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.onClose(ClusterStateObserver.java:285) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.addTimeoutListener(ClusterApplierService.java:254) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:179) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver.waitForNextChange(ClusterStateObserver.java:120) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retry(TransportMasterNodeAction.java:302) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction.retryOnNextState(TransportMasterNodeAction.java:283) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.action.support.master.TransportMasterNodeAction$AsyncSingleAction$1.handleException(TransportMasterNodeAction.java:263) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.transport.TransportService.handleSendRequestException(TransportService.java:858) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]	



// node_t3 restarted 


[2024-02-15T08:57:23,541][INFO ][o.e.c.s.MasterService    ] [node_t0] node-join[{node_t3}{34xjImOAT_2nOt22zrHDXg}{B-PheGS_Sii14FFu1bn5yg}{node_t3}{127.0.0.1}{127.0.0.1:20683}{d}{8.14.0}{7000099-8502001} joining after restart, removed [801ms] ago with reason [disconnected]], term: 2, version: 51, delta: added {{node_t3}{34xjImOAT_2nOt22zrHDXg}{B-PheGS_Sii14FFu1bn5yg}{node_t3}{127.0.0.1}{127.0.0.1:20683}{d}{8.14.0}{7000099-8502001}}
[2024-02-15T08:57:23,566][INFO ][o.e.c.s.ClusterApplierService] [node_t2] added {{node_t3}{34xjImOAT_2nOt22zrHDXg}{B-PheGS_Sii14FFu1bn5yg}{node_t3}{127.0.0.1}{127.0.0.1:20683}{d}{8.14.0}{7000099-8502001}}, term: 2, version: 51, reason: ApplyCommitRequest{term=2, version=51, 	



// as part of the test cleanup the data stream was deleted

[2024-02-15T08:57:24,239][INFO ][o.e.d.a.DeleteDataStreamTransportAction] [node_t0] removing data stream [metrics-foo]
[2024-02-15T08:57:24,240][INFO ][o.e.c.m.MetadataDeleteIndexService] [node_t0] [.ds-metrics-foo-2024.02.15-000001/EwtGzqPUQoCSR4GX2VARdA] deleting index
[2024-02-15T08:57:24,240][INFO ][o.e.c.m.MetadataDeleteIndexService] [node_t0] [.ds-metrics-foo-2024.02.15-000002/lHhFJKOJSV-VR91AI7Tl3g] deleting index


// downsampling fails on node_t3 because the source index was deleted

[2024-02-15T08:57:24,275][ERROR][o.e.x.d.DownsampleShardPersistentTaskExecutor] [node_t3] Downsampling task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m failing because source index not assigned
[2024-02-15T08:57:24,348][ERROR][o.e.x.d.TransportDownsampleAction] [node_t0] error while waiting for downsampling persistent task
org.elasticsearch.ElasticsearchException: downsample task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m] failed
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:425) ~[main/:?]
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:417) ~[main/:?]
	at org.elasticsearch.persistent.PersistentTasksService$1.onNewClusterState(PersistentTasksService.java:166) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onNewClusterState(ClusterStateObserver.java:380) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.clusterChanged(ClusterStateObserver.java:231) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListener(ClusterApplierService.java:560) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListeners(ClusterApplierService.java:547) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:505) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:429) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:154) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:917) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:217) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:183) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?]
	at java.lang.Thread.run(Thread.java:1583) ~[?:?]
[2024-02-15T08:57:24,356][ERROR][o.e.d.l.DataStreamLifecycleService] [node_t0] Data stream lifecycle encountered an error trying to downsample index [.ds-metrics-foo-2024.02.15-000001]. Data stream lifecycle will attempt to downsample the index on its next run.
org.elasticsearch.ElasticsearchException: downsample task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m] failed
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:425) ~[main/:?]
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:417) ~[main/:?]
	at org.elasticsearch.persistent.PersistentTasksService$1.onNewClusterState(PersistentTasksService.java:166) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onNewClusterState(ClusterStateObserver.java:380) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.clusterChanged(ClusterStateObserver.java:231) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListener(ClusterApplierService.java:560) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListeners(ClusterApplierService.java:547) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:505) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:429) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:154) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:917) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:217) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:183) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) ~[?:?]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) ~[?:?]
	at java.lang.Thread.run(Thread.java:1583) ~[?:?]
[2024-02-15T08:57:24,357][TRACE][o.e.d.l.DataStreamLifecycleService] [node_t0] Data stream lifecycle encountered an error trying to downsample index [.ds-metrics-foo-2024.02.15-000001]. Data stream lifecycle will attempt to downsample the index on its next run.
Failing since [1707998244343], operation retried [1] times
org.elasticsearch.ElasticsearchException: downsample task [downsample-downsample-5m-.ds-metrics-foo-2024.02.15-000001-0-5m] failed
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:425) ~[main/:?]
	at org.elasticsearch.xpack.downsample.TransportDownsampleAction$2.onResponse(TransportDownsampleAction.java:417) ~[main/:?]
	at org.elasticsearch.persistent.PersistentTasksService$1.onNewClusterState(PersistentTasksService.java:166) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ContextPreservingListener.onNewClusterState(ClusterStateObserver.java:380) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.ClusterStateObserver$ObserverClusterStateListener.clusterChanged(ClusterStateObserver.java:231) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListener(ClusterApplierService.java:560) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateListeners(ClusterApplierService.java:547) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:505) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:429) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:154) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]
	at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:917) ~[elasticsearch-8.14.0-SNAPSHOT.jar:8.14.0-SNAPSHOT]

andreidan avatar Mar 22 '24 09:03 andreidan