dragonfly
dragonfly copied to clipboard
crash: race condition between RENAME and JSON.SET causes memory tracking underflow
Concurrent RENAME and JSON.SET operations can crash the server with Check failed: false Can't decrease 0 from X in compact_object.cc.
Reproduction:
import asyncio
import random
from redis import asyncio as aioredis
PORT = 6379
NUM_KEYS = 10
NUM_ITERATIONS = 500
NUM_WORKERS = 20
async def create_key(client, name):
t = random.choice(["string", "json", "list", "set", "hash"])
try:
if t == "string":
await client.set(name, "x" * 100)
elif t == "json":
await client.execute_command("JSON.SET", name, "$", '{"a":1}')
elif t == "list":
await client.rpush(name, "a", "b", "c")
elif t == "set":
await client.sadd(name, "m1", "m2")
elif t == "hash":
await client.hset(name, "f", "v")
except: pass
async def rename_worker(port):
client = aioredis.Redis(port=port)
for _ in range(NUM_ITERATIONS):
src, dst = f"k{random.randint(0, NUM_KEYS-1)}", f"k{random.randint(0, NUM_KEYS-1)}"
if random.random() < 0.3:
await create_key(client, src)
if src != dst:
try: await client.rename(src, dst)
except: pass
await client.aclose()
async def json_worker(port):
client = aioredis.Redis(port=port)
for _ in range(NUM_ITERATIONS):
try:
await client.execute_command("JSON.SET", f"k{random.randint(0, NUM_KEYS-1)}", "$", '{"a":1}')
except: pass
await client.aclose()
async def main():
client = aioredis.Redis(port=PORT)
for i in range(NUM_KEYS):
await create_key(client, f"k{i}")
await client.aclose()
tasks = []
for _ in range(NUM_WORKERS // 2):
tasks.append(asyncio.create_task(rename_worker(PORT)))
tasks.append(asyncio.create_task(json_worker(PORT)))
await asyncio.gather(*tasks)
asyncio.run(main())
F20251208 13:41:04.492736 100273 json_family.cc:99] Check failed: GetPrimeValue().MallocUsed() != 0
*** Check failure stack trace: ***
@ 0x6125226434ef google::LogMessage::Fail()
@ 0x612522643435 google::LogMessage::SendToLog()
@ 0x612522642be8 google::LogMessage::Flush()
@ 0x612522646ce0 google::LogMessageFatal::~LogMessageFatal()
@ 0x612521a0e8d8 dfly::(anonymous namespace)::JsonAutoUpdater::SetJsonSize()
@ 0x612521a10e02 dfly::(anonymous namespace)::SetFullJson()
@ 0x612521a17f6b dfly::(anonymous namespace)::OpSet()
@ 0x612521a19220 _ZZN4dfly10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS_14CommandContextEENKUlPNS_11TransactionEPNS_11EngineShardEE_clESE_SG_
@ 0x612521a2565a _ZZN4dfly11Transaction18ScheduleSingleHopTIZNS_10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS_14CommandContextEEUlPS0_PNS_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_ENKUlSF_SH_E_clESF_SH_
@ 0x612521a323bf _ZSt13__invoke_implIN6facade8OpStatusERKZN4dfly11Transaction18ScheduleSingleHopTIZNS2_10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS2_14CommandContextEEUlPS3_PNS2_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_EUlSI_SK_E_JSI_SK_EESN_St14__invoke_otherOT0_DpOT1_
@ 0x612521a31772 _ZSt8__invokeIRKZN4dfly11Transaction18ScheduleSingleHopTIZNS0_10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS0_14CommandContextEEUlPS1_PNS0_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_EUlSG_SI_E_JSG_SI_EENSt15__invoke_resultISL_JDpT0_EE4typeESM_DpOSR_
@ 0x612521a2f716 _ZSt6invokeIRKZN4dfly11Transaction18ScheduleSingleHopTIZNS0_10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS0_14CommandContextEEUlPS1_PNS0_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_EUlSG_SI_E_JSG_SI_EENSt13invoke_resultISL_JDpT0_EE4typeESM_DpOSR_
@ 0x612521a2c690 _ZN4absl12lts_2025051219functional_internal12InvokeObjectIZN4dfly11Transaction18ScheduleSingleHopTIZNS3_10JsonFamily3SetENS0_4SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS3_14CommandContextEEUlPS4_PNS3_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_EUlSH_SJ_E_NS4_14RunnableResultEJSH_SJ_EEET0_NS1_7VoidPtrEDpNS1_8ForwardTIT1_E4typeE
@ 0x61252205a4f6 absl::lts_20250512::FunctionRef<>::operator()()
@ 0x612522046e7d dfly::Transaction::RunCallback()
@ 0x61252204bf2e dfly::Transaction::ScheduleInShard()
@ 0x612522048004 dfly::Transaction::ScheduleInternal()
@ 0x6125220492ed dfly::Transaction::Execute()
@ 0x6125220491bd dfly::Transaction::ScheduleSingleHop()
@ 0x612521a25730 _ZN4dfly11Transaction18ScheduleSingleHopTIZNS_10JsonFamily3SetEN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKNS_14CommandContextEEUlPS0_PNS_11EngineShardEE_EEDTclfp_fpTLDnEEEOT_
@ 0x612521a195db dfly::JsonFamily::Set()
@ 0x6125218b9922 _ZN3fu27abi_4006detail10invocation6invokeIRKPFvN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKN4dfly14CommandContextEEJSC_SG_EEEDTclcl7forwardIT_Efp_Espcl7forwardIT0_Efp0_EEEOSL_DpOSM_
@ 0x6125218b8ea9 fu2::abi_400::detail::type_erasure::invocation_table::function_trait<>::internal_invoker<>::invoke()
@ 0x612521ff214f _ZNK3fu27abi_4006detail12type_erasure6tables6vtableINS1_8propertyILb0ELb0EJKFvN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKN4dfly14CommandContextEEEEEE6invokeILm0EJPKNS2_13data_accessorERKmSE_SI_EEEDcDpOT0_
@ 0x612521ff21e8 _ZN3fu27abi_4006detail12type_erasure7erasureILb1ENS1_6configILb1ELb1ENS_16capacity_defaultEEENS1_8propertyILb0ELb0EJKFvN4absl12lts_202505124SpanIKSt17basic_string_viewIcSt11char_traitsIcEEEERKN4dfly14CommandContextEEEEEE6invokeILm0ERKSN_JSG_SK_EEEDcOT0_DpOT1_
@ 0x612521ff2266 fu2::abi_400::detail::type_erasure::invocation_table::operator_impl<>::operator()()
@ 0x612521fee357 dfly::CommandId::Invoke()
@ 0x612521b3880d dfly::Service::InvokeCmd()
@ 0x612521b375fd dfly::Service::DispatchCommand()
@ 0x6125223468d4 _ZZN6facade10Connection10ParseRedisEjENKUlvE_clEv
@ 0x612522353297 _ZSt13__invoke_implIvRKZN6facade10Connection10ParseRedisEjEUlvE_JEET_St14__invoke_otherOT0_DpOT1_
@ 0x61252235275c _ZSt8__invokeIRKZN6facade10Connection10ParseRedisEjEUlvE_JEENSt15__invoke_resultIT_JDpT0_EE4typeEOS6_DpOS7_
*** SIGABRT received at time=1765194064 on cpu 0 ***
PC: @ 0x72d43fa3bb2c (unknown) pthread_kill
@ 0x6125226d4823 64 absl::lts_20250512::WriteFailureInfo()
@ 0x6125226d4aa7 96 absl::lts_20250512::AbslFailureSignalHandler()
@ 0x72d43f9e2330 1760 (unknown)
@ 0x72d43f9e227e 32 raise
@ 0x72d43f9c58ff 192 abort
@ 0x61252264e23c 176 google::DumpStackTraceAndExit()
@ 0x6125226434ef 16 google::LogMessage::Fail()
@ 0x612522643435 160 google::LogMessage::SendToLog()
@ 0x612522642be8 80 google::LogMessage::Flush()
@ 0x612522646ce0 32 google::LogMessageFatal::~LogMessageFatal()
@ 0x612521a0e8d8 192 dfly::(anonymous namespace)::JsonAutoUpdater::SetJsonSize()
@ 0x612521a10e02 1072 dfly::(anonymous namespace)::SetFullJson()
@ 0x612521a17f6b 192 dfly::(anonymous namespace)::OpSet()
@ 0x612521a19220 176 dfly::JsonFamily::Set()::{lambda()#1}::operator()()
@ 0x612521a2565a 64 dfly::Transaction::ScheduleSingleHopT<>()::{lambda()#1}::operator()()
@ 0x612521a323bf 64 std::__invoke_impl<>()
@ 0x612521a31772 64 std::__invoke<>()
@ 0x612521a2f716 64 std::invoke<>()
@ 0x612521a2c690 96 absl::lts_20250512::functional_internal::InvokeObject<>()
@ 0x61252205a4f6 64 absl::lts_20250512::FunctionRef<>::operator()()
@ 0x612522046e7d 240 dfly::Transaction::RunCallback()
@ 0x61252204bf2e 448 dfly::Transaction::ScheduleInShard()
@ 0x612522048004 336 dfly::Transaction::ScheduleInternal()
@ 0x6125220492ed 64 dfly::Transaction::Execute()
@ 0x6125220491bd 48 dfly::Transaction::ScheduleSingleHop()
@ 0x612521a25730 128 dfly::Transaction::ScheduleSingleHopT<>()
@ 0x612521a195db 480 dfly::JsonFamily::Set()
@ 0x6125218b9922 64 fu2::abi_400::detail::invocation::invoke<>()
@ 0x6125218b8ea9 96 fu2::abi_400::detail::type_erasure::invocation_table::function_trait<>::internal_invoker<>::invoke()
@ 0x612521ff214f 112 fu2::abi_400::detail::type_erasure::tables::vtable<>::invoke<>()
@ 0x612521ff21e8 112 fu2::abi_400::detail::type_erasure::erasure<>::invoke<>()
@ 0x612521ff2266 80 fu2::abi_400::detail::type_erasure::invocation_table::operator_impl<>::operator()()
@ ... and at least 28 more frames```
reproduces with --proactor_threads=1
maybe this is not a race condition, but this is still a bug
@vyavdoshenko do I need to set some specific number of proactor threads? With 1 and 2 threads I cannot reproduce this locally. I also tried higher keys and iterations
NUM_KEYS = 100
NUM_ITERATIONS = 9000
I tried to reproduce the issue on the fresh main branch (Intel and ARM builds). It doesn't produce any longer.