valkey icon indicating copy to clipboard operation
valkey copied to clipboard

[CRASH] Valkey 7.2 crashes with io-threads and a module that uses ValkeyModule_FreeThreadSafeContext

Open dmitrypol opened this issue 1 month ago • 2 comments

Crash report

=== REDIS BUG REPORT START: Cut & paste starting from here ===
19110:M 06 Nov 2025 14:56:21.541 # === ASSERTION FAILED ===
19110:M 06 Nov 2025 14:56:21.541 # ==> networking.c:1495 'io_threads_op == IO_THREADS_OP_IDLE' is not true

------ STACK TRACE ------

Backtrace:
0   valkey-server                       0x0000000102c42b53 unlinkClient + 1107
1   valkey-server                       0x0000000102c41ed7 freeClient + 2183
2   valkey-server                       0x0000000102d1182e VM_FreeThreadSafeContext + 14
3   module1.so                          0x0000000102e9fb72 my_thread_function + 34
4   libsystem_pthread.dylib             0x00007ff8189b6e05 _pthread_start + 115
5   libsystem_pthread.dylib             0x00007ff8189b2857 thread_start + 15

------ INFO OUTPUT ------
# Server
redis_version:7.2.4
server_name:valkey
valkey_version:7.2.11
redis_git_sha1:94e48ee3
redis_git_dirty:1
redis_build_id:2540c66b6e521100
redis_mode:standalone
os:Darwin 24.6.0 x86_64
arch_bits:64
monotonic_clock:POSIX clock_gettime
multiplexing_api:kqueue
atomicvar_api:c11-builtin
gcc_version:4.2.1
process_id:19110
process_supervised:no
run_id:c3f199431102a9dadd3e83a9b1950fcbf08e9ebe
tcp_port:6379
server_time_usec:1762469781542022
uptime_in_seconds:11
uptime_in_days:0
hz:10
configured_hz:10
lru_clock:862101
config_file:
io_threads_active:0
listener0:name=tcp,bind=*,bind=-::*,port=6379

# Clients
connected_clients:51
cluster_connections:0
maxclients:10000
client_recent_max_input_buffer:16896
client_recent_max_output_buffer:0
blocked_clients:0
tracking_clients:0
clients_in_timeout_table:0
total_blocking_keys:0
total_blocking_keys_on_nokey:0

# Memory
used_memory:2716368
used_memory_human:2.59M
used_memory_rss:4558848
used_memory_rss_human:4.35M
used_memory_peak:3481328
used_memory_peak_human:3.32M
used_memory_peak_perc:78.03%
used_memory_overhead:2086024
used_memory_startup:1131680
used_memory_dataset:630344
used_memory_dataset_perc:39.78%
allocator_allocated:2713280
allocator_active:4526080
allocator_resident:4526080
total_system_memory:34359738368
total_system_memory_human:32.00G
used_memory_lua:32768
used_memory_vm_eval:32768
used_memory_lua_human:32.00K
used_memory_scripts_eval:0
number_of_cached_scripts:0
number_of_functions:0
number_of_libraries:0
used_memory_vm_functions:33792
used_memory_vm_total:66560
used_memory_vm_total_human:65.00K
used_memory_functions:216
used_memory_scripts:216
used_memory_scripts_human:216B
maxmemory:0
maxmemory_human:0B
maxmemory_policy:noeviction
allocator_frag_ratio:1.67
allocator_frag_bytes:1812800
allocator_rss_ratio:1.00
allocator_rss_bytes:0
rss_overhead_ratio:1.01
rss_overhead_bytes:32768
mem_fragmentation_ratio:1.68
mem_fragmentation_bytes:1845568
mem_not_counted_for_evict:0
mem_replication_backlog:0
mem_total_replication_buffers:0
mem_clients_slaves:0
mem_clients_normal:953904
mem_cluster_links:0
mem_aof_buffer:0
mem_allocator:libc
active_defrag_running:0
lazyfree_pending_objects:0
lazyfreed_objects:0

# Persistence
loading:0
async_loading:0
current_cow_peak:0
current_cow_size:0
current_cow_size_age:0
current_fork_perc:0.00
current_save_keys_processed:0
current_save_keys_total:0
rdb_changes_since_last_save:0
rdb_bgsave_in_progress:0
rdb_last_save_time:1762469770
rdb_last_bgsave_status:ok
rdb_last_bgsave_time_sec:-1
rdb_current_bgsave_time_sec:-1
rdb_saves:0
rdb_last_cow_size:0
rdb_last_load_keys_expired:0
rdb_last_load_keys_loaded:4
aof_enabled:0
aof_rewrite_in_progress:0
aof_rewrite_scheduled:0
aof_last_rewrite_time_sec:-1
aof_current_rewrite_time_sec:-1
aof_last_bgrewrite_status:ok
aof_rewrites:0
aof_rewrites_consecutive_failures:0
aof_last_write_status:ok
aof_last_cow_size:0
module_fork_in_progress:0
module_fork_last_cow_size:0

# Stats
total_connections_received:52
total_commands_processed:74311
instantaneous_ops_per_sec:34539
total_net_input_bytes:446043
total_net_output_bytes:520443
total_net_repl_input_bytes:0
total_net_repl_output_bytes:0
instantaneous_input_kbps:202.40
instantaneous_output_kbps:236.11
instantaneous_input_repl_kbps:0.00
instantaneous_output_repl_kbps:0.00
rejected_connections:0
sync_full:0
sync_partial_ok:0
sync_partial_err:0
expired_keys:0
expired_stale_perc:0.00
expired_time_cap_reached_count:0
expire_cycle_cpu_milliseconds:0
evicted_keys:0
evicted_clients:0
total_eviction_exceeded_time:0
current_eviction_exceeded_time:0
keyspace_hits:0
keyspace_misses:0
pubsub_channels:0
pubsub_patterns:0
pubsubshard_channels:0
latest_fork_usec:0
total_forks:0
migrate_cached_sockets:0
slave_expires_tracked_keys:0
active_defrag_hits:0
active_defrag_misses:0
active_defrag_key_hits:0
active_defrag_key_misses:0
total_active_defrag_time:0
current_active_defrag_time:0
tracking_total_keys:0
tracking_total_items:0
tracking_total_prefixes:0
unexpected_error_replies:0
total_error_replies:0
dump_payload_sanitizations:0
total_reads_processed:74312
total_writes_processed:74310
io_threaded_reads_processed:0
io_threaded_writes_processed:3724
reply_buffer_shrinks:51
reply_buffer_expands:0
eventloop_cycles:59399
eventloop_duration_sum:1708852
eventloop_duration_cmd_sum:11443
instantaneous_eventloop_cycles_per_sec:26719
instantaneous_eventloop_duration_usec:30
acl_access_denied_auth:0
acl_access_denied_cmd:0
acl_access_denied_key:0
acl_access_denied_channel:0

# Replication
role:master
connected_slaves:0
master_failover_state:no-failover
master_replid:16847150e616fc5e44ed4991ede3df9ba3c4f8fd
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:0
second_repl_offset:-1
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0

# CPU
used_cpu_sys:1.496825
used_cpu_user:0.688168
used_cpu_sys_children:0.000000
used_cpu_user_children:0.000000

# Modules
module:name=module1,ver=1,api=1,filters=0,usedby=[],using=[],options=[]

# Commandstats
cmdstat_hello:calls=1,usec=14,usec_per_call=14.00,rejected_calls=0,failed_calls=0
cmdstat_cmd1:calls=12,usec=393,usec_per_call=32.75,rejected_calls=0,failed_calls=0
cmdstat_config|get:calls=2,usec=13,usec_per_call=6.50,rejected_calls=0,failed_calls=0
cmdstat_ping:calls=74298,usec=11023,usec_per_call=0.15,rejected_calls=0,failed_calls=0

# Errorstats

# Latencystats
latency_percentiles_usec_hello:p50=14.015,p99=14.015,p99.9=14.015
latency_percentiles_usec_cmd1:p50=25.087,p99=84.479,p99.9=84.479
latency_percentiles_usec_config|get:p50=2.007,p99=11.007,p99.9=11.007
latency_percentiles_usec_ping:p50=0.001,p99=1.003,p99.9=1.003

# Cluster
cluster_enabled:0

# Keyspace
db0:keys=4,expires=0,avg_ttl=0

------ CLIENT LIST OUTPUT ------
id=19 addr=127.0.0.1:52109 laddr=127.0.0.1:6379 fd=16 name= age=2 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=16890 argv-mem=0 multi-mem=0 rbs=1024 rbp=7 obl=0 oll=0 omem=0 tot-mem=18704 events=r cmd=ping user=default redir=-1 resp=2 lib-name= lib-ver=
...

------ MODULES INFO OUTPUT ------

------ CONFIG DEBUG OUTPUT ------
activedefrag no
io-threads-do-reads no
lazyfree-lazy-expire no
lazyfree-lazy-eviction no
repl-diskless-load disabled
lazyfree-lazy-server-del no
sanitize-dump-payload no
list-compress-depth 0
lazyfree-lazy-user-del no
io-threads 2
replica-read-only yes
client-query-buffer-limit 1gb
proto-max-bulk-len 512mb
slave-read-only yes
lazyfree-lazy-user-flush no
repl-diskless-sync yes

=== REDIS BUG REPORT END. Make sure to include from START to END. ===

Additional information

Create "src/modules/module1.c" with the following code:

#include <pthread.h>
#include "../valkeymodule.h"

ValkeyModuleCtx *reply_ctx;

void *my_thread_function(void *arg) {
    VALKEYMODULE_NOT_USED(arg);
    reply_ctx = ValkeyModule_GetThreadSafeContext(NULL);
    ValkeyModule_FreeThreadSafeContext(reply_ctx);
    return NULL;
}

int cmd1(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
    VALKEYMODULE_NOT_USED(argv);
    VALKEYMODULE_NOT_USED(argc);
    pthread_t thread;
    pthread_create(&thread, NULL, my_thread_function, NULL);
    return ValkeyModule_ReplyWithSimpleString(ctx, "OK");
}

int ValkeyModule_OnLoad(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, int argc) {
    VALKEYMODULE_NOT_USED(argv);
    VALKEYMODULE_NOT_USED(argc);
    ValkeyModule_Init(ctx,"module1",1,VALKEYMODULE_APIVER_1);
    ValkeyModule_CreateCommand(ctx, "cmd1", cmd1, "", 0, 0, 0);
    return VALKEYMODULE_OK;
}


Update src/modules/Makefile
module1.xo: ../valkeymodule.h
module1.so: module1.xo
	$(LD) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) -lc

cd src/modules && make module1.so

src/valkey-server --io-threads 2 --loadmodule src/modules/module1.so

run valkey-benchmark
run valkey-cli and execute cmd1 a number of times to crash Valkey 7.2

Valkey 8 does not crash and the line in networking.c changed to:
    serverAssert(c->io_read_state != CLIENT_PENDING_IO && c->io_write_state != CLIENT_PENDING_IO);

dmitrypol avatar Nov 06 '25 23:11 dmitrypol

Do later versions also crash?

madolson avatar Nov 07 '25 22:11 madolson

@madolson Valkey 8 does not crash and the line in networking.c changed to: serverAssert(c->io_read_state != CLIENT_PENDING_IO && c->io_write_state != CLIENT_PENDING_IO);

dmitrypol avatar Nov 08 '25 14:11 dmitrypol