cadence icon indicating copy to clipboard operation
cadence copied to clipboard

Convert replicator panics into errors, rather than crashing the process

Open Groxx opened this issue 5 months ago • 0 comments

Sample crash that led to this PR:

panic: runtime error: invalid memory address or nil pointer dereference [recovered]
	panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x855b880]

goroutine 14136 [running]:
github.com/uber/cadence/service/history/ndc.(*historyReplicatorImpl).applyEvents.func1()
	external/com_github_uber_cadence/service/history/ndc/history_replicator.go:345 +0x7c
panic({0x163ea00?, 0xabb9e50?})
	GOROOT/src/runtime/panic.go:792 +0x132
github.com/uber/cadence/common/persistence/serialization.workflowExecutionInfoFromThrift(0xc007521208)
	external/com_github_uber_cadence/common/persistence/serialization/thrift_mapper.go:299 +0xea0
github.com/uber/cadence/common/persistence/serialization.(*thriftDecoder).workflowExecutionInfoFromBlob(0xc009071e00?, {0xc005e68000, 0xde3, 0xde3})
	external/com_github_uber_cadence/common/persistence/serialization/thrift_decoder.go:70 +0x69
github.com/uber/cadence/common/persistence/serialization.(*parser).WorkflowExecutionInfoFromBlob(0x0?, {0xc005e68000, 0xde3, 0xde3}, {0x20b4794?, 0xc0059f25e8?})
	external/com_github_uber_cadence/common/persistence/serialization/parser.go:253 +0x5c
github.com/uber/cadence/common/persistence/sql.(*sqlExecutionStore).populateWorkflowMutableState(0xc0059f27c8?, {0xe3, {0xc00a0d6530, 0x10, 0x10}, {0xc0054c6ff0, 0x22}, {0xc00a0d6540, 0x10, 0x10}, ...})
	external/com_github_uber_cadence/common/persistence/sql/sql_execution_store.go:1069 +0x53
github.com/uber/cadence/common/persistence/sql.(*sqlExecutionStore).GetWorkflowExecution(0xc008ccba70, {0x2b3d918, 0xc007e05110}, 0xc00ba42c00)
	external/com_github_uber_cadence/common/persistence/sql/sql_execution_store.go:378 +0xc2c
github.com/uber/cadence/common/persistence.(*executionManagerImpl).GetWorkflowExecution(0xc00aa1d3c0, {0x2b3d918, 0xc007e05110}, 0xc008a76780)
	external/com_github_uber_cadence/common/persistence/execution_manager.go:83 +0xd9
github.com/uber/cadence/common/persistence/wrappers/ratelimited.(*ratelimitedExecutionManager).GetWorkflowExecution(0xc008cd1440, {0x2b3d918, 0xc007e05110}, 0xc008a76780)
	external/com_github_uber_cadence/common/persistence/wrappers/ratelimited/execution_generated.go:154 +0x58
github.com/uber/cadence/common/persistence/wrappers/metered.(*meteredExecutionManager).GetWorkflowExecution.func1()
	external/com_github_uber_cadence/common/persistence/wrappers/metered/execution_generated.go:380 +0x4f
github.com/uber/cadence/common/persistence/wrappers/metered.(*base).callWithDomainAndShardScope(0xc008ce5130, 0x4, 0xc0059f2c18, {0x2af9dc8, 0xc00a396e20}, {0x2af9dc8, 0xc00a396e40}, {0xc0059f2c50, 0x1, 0x1})
	external/com_github_uber_cadence/common/persistence/wrappers/metered/base.go:184 +0x35f
github.com/uber/cadence/common/persistence/wrappers/metered.(*meteredExecutionManager).GetWorkflowExecution(0xc008ce5130, {0x2b3d918?, 0xc007e05110?}, 0xc008a76780)
	external/com_github_uber_cadence/common/persistence/wrappers/metered/execution_generated.go:390 +0x785
github.com/uber/cadence/service/history/shard.(*contextImpl).GetWorkflowExecution(0x615a8e5?, {0x2b3d918?, 0xc007e05110?}, 0x1377d20?)
	external/com_github_uber_cadence/service/history/shard/context.go:618 +0xdd
github.com/uber/cadence/service/history/execution.getWorkflowExecutionWithRetry.func1({0x2b3d918?, 0xc007e05110?})
	external/com_github_uber_cadence/service/history/execution/context.go:1225 +0x3f
github.com/uber/cadence/common/backoff.(*ThrottleRetry).Do(0xc00ba42ac0, {0x2b3d9c0, 0xc00a6f3ab0}, 0xc005ccaee8)
	external/com_github_uber_cadence/common/backoff/retry.go:186 +0x129
github.com/uber/cadence/service/history/execution.getWorkflowExecutionWithRetry({0x2b3d9c0, 0xc00a6f3ab0}, {0x2ba65c0, 0xc00903c5a0}, {0x2b67730, 0xc00b400a68}, {0x2ad24a0, 0xc005346480}, 0xc008a76780)
	external/com_github_uber_cadence/service/history/execution/context.go:1233 +0x16a
github.com/uber/cadence/service/history/execution.NewContext.func2({0x2b3d9c0, 0xc00a6f3ab0}, 0xc008a76780)
	external/com_github_uber_cadence/service/history/execution/context.go:236 +0xd1
github.com/uber/cadence/service/history/execution.(*contextImpl).LoadWorkflowExecutionWithTaskVersion(0xc00ab9ab00, {0x2b3d9c0, 0xc00a6f3ab0}, 0x67)
	external/com_github_uber_cadence/service/history/execution/context.go:361 +0x1cd
github.com/uber/cadence/service/history/ndc.(*historyReplicatorImpl).applyEvents(0xc008b54340, {0x2b3d9c0, 0xc00a6f3ab0}, {0x2b7d810, 0xc00c4d7440})
	external/com_github_uber_cadence/service/history/ndc/history_replicator.go:359 +0x1b9
github.com/uber/cadence/service/history/ndc.(*historyReplicatorImpl).ApplyEvents(0xc008b54340, {0x2b3d9c0, 0xc00a6f3ab0}, 0xc00ba42980)
	external/com_github_uber_cadence/service/history/ndc/history_replicator.go:324 +0xd8
github.com/uber/cadence/service/history/engine/engineimpl.(*historyEngineImpl).ReplicateEventsV2(0x6fc23ac00?, {0x2b3d9c0?, 0xc00a6f3ab0?}, 0xc005346420?)
	external/com_github_uber_cadence/service/history/engine/engineimpl/history_engine.go:352 +0x2c
github.com/uber/cadence/service/history/replication.(*taskExecutorImpl).handleHistoryReplicationTaskV2.func2()
	external/com_github_uber_cadence/service/history/replication/task_executor.go:235 +0x2f
github.com/uber/cadence/service/history/replication.(*taskExecutorImpl).handleHistoryReplicationTaskV2(0xc00960c900, 0xc00eb93700?, 0xf?)
	external/com_github_uber_cadence/service/history/replication/task_executor.go:239 +0x445
github.com/uber/cadence/service/history/replication.(*taskExecutorImpl).execute(0x0?, 0x0?, 0xd8?)
	external/com_github_uber_cadence/service/history/replication/task_executor.go:96 +0x7d
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processTaskOnce(0xc009612000, 0xc00eb806c0)
	external/com_github_uber_cadence/service/history/replication/task_processor.go:465 +0x89
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processSingleTask.func1.1({0x2b3d918?, 0xc007e04750?})
	external/com_github_uber_cadence/service/history/replication/task_processor.go:408 +0x4b
github.com/uber/cadence/common/backoff.(*ThrottleRetry).Do(0xc00ba42840, {0x2b3d918, 0xc007e04750}, 0xc005ccb940)
	external/com_github_uber_cadence/common/backoff/retry.go:186 +0x129
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processSingleTask.func1({0x2b3d918, 0xc007e04750})
	external/com_github_uber_cadence/service/history/replication/task_processor.go:401 +0xd6
github.com/uber/cadence/common/backoff.(*ThrottleRetry).Do(0xc00ba42780, {0x2b3d7c8, 0xae08580}, 0xc005ccbcd8)
	external/com_github_uber_cadence/common/backoff/retry.go:186 +0x129
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processSingleTask(0xc009612000, 0xc00eb806c0)
	external/com_github_uber_cadence/service/history/replication/task_processor.go:418 +0x12b
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processResponse(0xc009612000, 0xc00eb836e0)
	external/com_github_uber_cadence/service/history/replication/task_processor.go:316 +0x1fe
github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).processorLoop(0xc009612000)
	external/com_github_uber_cadence/service/history/replication/task_processor.go:233 +0x6ec
created by github.com/uber/cadence/service/history/replication.(*taskProcessorImpl).Start in goroutine 2421
	external/com_github_uber_cadence/service/history/replication/task_processor.go:167 +0x85

^ a bit over 7kb, so I've set 16kb as the buffer size. happy to increase, just doesn't seem all that likely to be necessary.

Groxx avatar Jul 24 '25 19:07 Groxx