freeswitch icon indicating copy to clipboard operation
freeswitch copied to clipboard

* [Core] Fix possible deadlock in switch_core_media_set_codec() This commit results in a deadlock

Open cdevelop opened this issue 1 year ago • 8 comments

SHA-1: b4e43214f7c29e658e9a44239a45343d28186779

  • [Core] Fix possible deadlock in switch_core_media_set_codec()

This commit results in a deadlock

Thread 3 switch_core_session_read_frame switch_mutex_lock(session->codec_read_mutex); sofia_receive_message switch_mutex_lock(tech_pvt->sofia_mutex); switch_core_media_set_codec switch_core_session_lock_codec_write(session); switch_core_session_lock_codec_read(session);

Thread 2 switch_core_session_write_frame switch_mutex_lock(session->codec_write_mutex); sofia_receive_message switch_mutex_lock(tech_pvt->sofia_mutex);

Thread 3 (Thread 0x7f8700ddc700 (LWP 44877)):
#0  0x00007f8d324e854d in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x00007f8d324e3eb6 in _L_lock_941 () from /lib64/libpthread.so.0
#2  0x00007f8d324e3daf in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x00007f8d351eeb49 in fspr_thread_mutex_lock (mutex=<optimized out>) at locks/unix/thread_mutex.c:92
#4  0x00007f8d3507ca25 in switch_mutex_lock (lock=<optimized out>) at src/switch_apr.c:310
#5  0x00007f8d3509a98c in switch_core_session_lock_codec_write (session=<optimized out>) at src/switch_core_codec.c:64
#6  0x00007f8d350d0746 in switch_core_media_set_codec (session=session@entry=0x7f87302945b8, force=force@entry=0, codec_flags=0) at src/switch_core_media.c:3604
#7  0x00007f8d350d604e in switch_core_media_activate_rtp (session=0x7f87302945b8) at src/switch_core_media.c:8589
#8  0x00007f8d198103ce in sofia_media_activate_rtp (tech_pvt=tech_pvt@entry=0x7f873045e8e8) at sofia_media.c:58
#9  0x00007f8d1981044e in sofia_media_tech_media (tech_pvt=tech_pvt@entry=0x7f873045e8e8, r_sdp=<optimized out>, type=type@entry=SDP_TYPE_REQUEST) at sofia_media.c:189
#10 0x00007f8d197e3b4a in sofia_handle_sip_i_state (de=0x7f8d083ecdd0, tags=0x7f86987aa770, sip=0x0, sofia_private=<optimized out>, nh=0x7f8ad06dc470, profile=0x26cb1e0, nua=0x7f8d0802b090, phrase=0x7f86987aab3c "Session Progress", status=183, session=0x7f87302945b8) at sofia.c:7683
#11 our_sofia_event_callback (event=nua_i_state, status=<optimized out>, phrase=0x7f86987aab3c "Session Progress", nua=0x7f8d0802b090, profile=0x26cb1e0, nh=0x7f8ad06dc470, sofia_private=0x7f8ad01d70d0, sip=0x0, de=de@entry=0x7f8d083ecdd0, tags=0x7f86987aa770) at sofia.c:1813
#12 0x00007f8d197e7a4b in sofia_process_dispatch_event (dep=0x7f8700dd6a20) at sofia.c:2253
#13 0x00007f8d197a8fc9 in sofia_receive_message (session=0x7f87302945b8, msg=0x7f8700dd7190) at mod_sofia.c:1347
#14 0x00007f8d350adee5 in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f8700dd7190, file=file@entry=0x7f8d3522d6c5 "src/switch_ivr.c", func=func@entry=0x7f8d3522e750 <__func__.18977> "switch_ivr_parse_signal_data", line=line@entry=893) at src/switch_core_session.c:854
#15 0x00007f8d35175c5c in switch_ivr_parse_signal_data (session=0x7f87302945b8, all=all@entry=SWITCH_FALSE, only_session_thread=only_session_thread@entry=SWITCH_TRUE) at src/switch_ivr.c:893
#16 0x00007f8d35082921 in switch_channel_check_signal (channel=<optimized out>, in_thread_only=in_thread_only@entry=SWITCH_TRUE) at src/switch_channel.c:2266
#17 0x00007f8d350ec74c in switch_core_media_receive_message (session=session@entry=0x7f87302945b8, msg=<optimized out>) at src/switch_core_media.c:13211
#18 0x00007f8d350ade3f in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f8700dd7620, file=file@entry=0x7f8d3521107d "src/switch_core_io.c", func=func@entry=0x7f8d352113d0 <__func__.19321> "switch_core_session_read_frame", line=line@entry=416) at src/switch_core_session.c:927
#19 0x00007f8d350b951b in switch_core_session_read_frame (session=session@entry=0x7f87302945b8, frame=frame@entry=0x7f8700dd9758, flags=flags@entry=0, stream_id=stream_id@entry=0) at src/switch_core_io.c:416
#20 0x00007f8d35147119 in audio_bridge_thread (obj=obj@entry=0x7f8ad0094ea0, thread=0x0) at src/switch_ivr_bridge.c:799
#21 0x00007f8d35148092 in audio_bridge_on_exchange_media (session=0x7f87302945b8) at src/switch_ivr_bridge.c:979
#22 0x00007f8d350b657a in switch_core_session_run (session=0x7f87302945b8) at src/switch_core_state_machine.c:650
#23 0x00007f8d350afb4e in switch_core_session_thread (thread=<optimized out>, obj=0x7f87302945b8) at src/switch_core_session.c:1727
#24 0x00007f8d350ab22a in switch_core_session_thread_pool_worker (thread=0x7f86d5d68a70, obj=<optimized out>) at src/switch_core_session.c:1791
#25 0x00007f8d351f4c30 in dummy_worker (opaque=0x7f86d5d68a70) at threadproc/unix/thread.c:151
#26 0x00007f8d324e1ea5 in start_thread () from /lib64/libpthread.so.0
#27 0x00007f8d31b35b0d in clone () from /lib64/libc.so.6

Thread 2 (Thread 0x7f88a9cbb700 (LWP 10894)):
#0  0x00007f8d324e854d in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x00007f8d324e3eb6 in _L_lock_941 () from /lib64/libpthread.so.0
#2  0x00007f8d324e3daf in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x00007f8d351eeb49 in fspr_thread_mutex_lock (mutex=<optimized out>) at locks/unix/thread_mutex.c:92
#4  0x00007f8d3507ca25 in switch_mutex_lock (lock=<optimized out>) at src/switch_apr.c:310
#5  0x00007f8d197a929c in sofia_receive_message (session=0x7f87302945b8, msg=0x7f88a9cb8030) at mod_sofia.c:1526
#6  0x00007f8d350ae16b in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f88a9cb8030, file=file@entry=0x7f8d352142a2 "src/switch_core_media.c", func=func@entry=0x7f8d35218fe0 <__func__.40598> "switch_core_session_write_frame", line=line@entry=15951) at src/switch_core_session.c:931
#7  0x00007f8d350f428f in switch_core_session_write_frame (session=session@entry=0x7f87302945b8, frame=0x7f86d6eb88a8, flags=flags@entry=0, stream_id=stream_id@entry=0) at src/switch_core_media.c:15951
#8  0x00007f8d351471e3 in audio_bridge_thread (obj=obj@entry=0x7f87307df670, thread=0x0) at src/switch_ivr_bridge.c:818
#9  0x00007f8d35149296 in switch_ivr_multi_threaded_bridge (session=session@entry=0x7f86d6eb3d68, peer_session=0x7f87302945b8, input_callback=<optimized out>, session_data=session_data@entry=0x0, peer_session_data=peer_session_data@entry=0x0) at src/switch_ivr_bridge.c:1794
#10 0x00007f8cfb179012 in audio_bridge_function (session=<optimized out>, data=<optimized out>) at mod_dptools.c:3673
#11 0x00007f8d350b2413 in switch_core_session_exec (session=session@entry=0x7f86d6eb3d68, application_interface=application_interface@entry=0x1f5b708, arg=arg@entry=0x7f86e908f868 "sofia/out/${destination_number}@39.164.247.5:2090") at src/switch_core_session.c:2965
#12 0x00007f8d350b2aa9 in switch_core_session_execute_application_get_flags (session=session@entry=0x7f86d6eb3d68, app=0x7f86e908f860 "bridge", arg=0x7f86e908f868 "sofia/out/${destination_number}@39.164.247.5:2090", flags=flags@entry=0x0) at src/switch_core_session.c:2825
#13 0x00007f8d350b69ac in switch_core_standard_on_execute (session=0x7f86d6eb3d68) at src/switch_core_state_machine.c:350
#14 switch_core_session_run (session=0x7f86d6eb3d68) at src/switch_core_state_machine.c:647
#15 0x00007f8d350afb4e in switch_core_session_thread (thread=<optimized out>, obj=0x7f86d6eb3d68) at src/switch_core_session.c:1727
#16 0x00007f8d350ab22a in switch_core_session_thread_pool_worker (thread=0x7f8720418810, obj=<optimized out>) at src/switch_core_session.c:1791
#17 0x00007f8d351f4c30 in dummy_worker (opaque=0x7f8720418810) at threadproc/unix/thread.c:151
#18 0x00007f8d324e1ea5 in start_thread () from /lib64/libpthread.so.0
#19 0x00007f8d31b35b0d in clone () from /lib64/libc.so.6

Thread 1 (Thread 0x7f8d3590d8c0 (LWP 35705)):
#0  0x00007f8d31b2cb43 in select () from /lib64/libc.so.6
#1  0x00007f8d351f6275 in fspr_sleep (t=<optimized out>) at time/unix/time.c:246
#2  0x00007f8d350bdef2 in switch_core_runtime_loop (bg=bg@entry=1) at src/switch_core.c:1201
#3  0x0000000000402f3b in main (argc=4, argv=<optimized out>) at src/switch.c:1229

cdevelop avatar Oct 25 '23 14:10 cdevelop

SWITCH_DECLARE(switch_core_session_t *) switch_core_session_request_uuid(switch_endpoint_interface_t *endpoint_interface, switch_call_direction_t direction, switch_originate_flag_t originate_flags, switch_memory_pool_t **pool, const char *use_uuid)

...

switch_mutex_init(&session->codec_init_mutex, SWITCH_MUTEX_NESTED, session->pool);
switch_mutex_init(&session->codec_read_mutex, SWITCH_MUTEX_NESTED, session->pool);
session->codec_write_mutex = session->codec_read_mutex;
//switch_mutex_init(&session->codec_write_mutex, SWITCH_MUTEX_NESTED, session->pool);

This will solve the problem

cdevelop avatar Oct 31 '23 11:10 cdevelop

It seems you can reproduce it on demand. Can you please provide a way to reproduce this (dialplan, maybe unit-tests, etc) ?

dragos-oancea avatar Oct 31 '23 11:10 dragos-oancea

The reasons for this problem are:
Thread 2 switch_core_session_write_frame lock session->codec_write_mutex switch_mutex_lock(session->codec_write_mutex) Call an external function switch_core_session_receive_message sofia_receive_message Blocked switch_mutex_lock(tech_pvt->sofia_mutex)

Thread 3 sofia_receive_message switch_mutex_lock(tech_pvt->sofia_mutex) switch_core_media_set_codec
Blocked switch_mutex_lock(session->codec_write_mutex)

The solution to this problem is to change switch_core_session_receive_message to switch_core_session_queue_message

https://github.com/signalwire/freeswitch/pull/2300

cdevelop avatar Nov 05 '23 02:11 cdevelop

It seems you can reproduce it on demand. Can you please provide a way to reproduce this (dialplan, maybe unit-tests, etc) ?

he told me it happens every day 3-10 locked channels on a production system. #2300 fixed his problem, but if we have new patch we can ask him to test.

seven1240 avatar Nov 05 '23 03:11 seven1240

I'm experiencing the very same issue and my backtrace looks exactly the same as described by @cdevelop. I didn't know that this was already reported, so I opened too an issue https://github.com/signalwire/freeswitch/issues/2372 reporting it.

greenbea avatar Jan 26 '24 07:01 greenbea

I just pushed a patch and created a PR that at least fixes the deadlock. I cannot say with all certainty that we should even be in sofia_receive_message() when this deadlock occurs, but at least this prevents unnecessary locking of the tech_pvt->sofia_mutex.

mishehu avatar Feb 15 '24 02:02 mishehu

SHA-1: b4e4321

  • [Core] Fix possible deadlock in switch_core_media_set_codec()

This commit results in a deadlock

Thread 3 switch_core_session_read_frame switch_mutex_lock(session->codec_read_mutex); sofia_receive_message switch_mutex_lock(tech_pvt->sofia_mutex); switch_core_media_set_codec switch_core_session_lock_codec_write(session); switch_core_session_lock_codec_read(session);

Thread 2 switch_core_session_write_frame switch_mutex_lock(session->codec_write_mutex); sofia_receive_message switch_mutex_lock(tech_pvt->sofia_mutex);

Thread 3 (Thread 0x7f8700ddc700 (LWP 44877)):
#0  0x00007f8d324e854d in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x00007f8d324e3eb6 in _L_lock_941 () from /lib64/libpthread.so.0
#2  0x00007f8d324e3daf in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x00007f8d351eeb49 in fspr_thread_mutex_lock (mutex=<optimized out>) at locks/unix/thread_mutex.c:92
#4  0x00007f8d3507ca25 in switch_mutex_lock (lock=<optimized out>) at src/switch_apr.c:310
#5  0x00007f8d3509a98c in switch_core_session_lock_codec_write (session=<optimized out>) at src/switch_core_codec.c:64
#6  0x00007f8d350d0746 in switch_core_media_set_codec (session=session@entry=0x7f87302945b8, force=force@entry=0, codec_flags=0) at src/switch_core_media.c:3604
#7  0x00007f8d350d604e in switch_core_media_activate_rtp (session=0x7f87302945b8) at src/switch_core_media.c:8589
#8  0x00007f8d198103ce in sofia_media_activate_rtp (tech_pvt=tech_pvt@entry=0x7f873045e8e8) at sofia_media.c:58
#9  0x00007f8d1981044e in sofia_media_tech_media (tech_pvt=tech_pvt@entry=0x7f873045e8e8, r_sdp=<optimized out>, type=type@entry=SDP_TYPE_REQUEST) at sofia_media.c:189
#10 0x00007f8d197e3b4a in sofia_handle_sip_i_state (de=0x7f8d083ecdd0, tags=0x7f86987aa770, sip=0x0, sofia_private=<optimized out>, nh=0x7f8ad06dc470, profile=0x26cb1e0, nua=0x7f8d0802b090, phrase=0x7f86987aab3c "Session Progress", status=183, session=0x7f87302945b8) at sofia.c:7683
#11 our_sofia_event_callback (event=nua_i_state, status=<optimized out>, phrase=0x7f86987aab3c "Session Progress", nua=0x7f8d0802b090, profile=0x26cb1e0, nh=0x7f8ad06dc470, sofia_private=0x7f8ad01d70d0, sip=0x0, de=de@entry=0x7f8d083ecdd0, tags=0x7f86987aa770) at sofia.c:1813
#12 0x00007f8d197e7a4b in sofia_process_dispatch_event (dep=0x7f8700dd6a20) at sofia.c:2253
#13 0x00007f8d197a8fc9 in sofia_receive_message (session=0x7f87302945b8, msg=0x7f8700dd7190) at mod_sofia.c:1347
#14 0x00007f8d350adee5 in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f8700dd7190, file=file@entry=0x7f8d3522d6c5 "src/switch_ivr.c", func=func@entry=0x7f8d3522e750 <__func__.18977> "switch_ivr_parse_signal_data", line=line@entry=893) at src/switch_core_session.c:854
#15 0x00007f8d35175c5c in switch_ivr_parse_signal_data (session=0x7f87302945b8, all=all@entry=SWITCH_FALSE, only_session_thread=only_session_thread@entry=SWITCH_TRUE) at src/switch_ivr.c:893
#16 0x00007f8d35082921 in switch_channel_check_signal (channel=<optimized out>, in_thread_only=in_thread_only@entry=SWITCH_TRUE) at src/switch_channel.c:2266
#17 0x00007f8d350ec74c in switch_core_media_receive_message (session=session@entry=0x7f87302945b8, msg=<optimized out>) at src/switch_core_media.c:13211
#18 0x00007f8d350ade3f in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f8700dd7620, file=file@entry=0x7f8d3521107d "src/switch_core_io.c", func=func@entry=0x7f8d352113d0 <__func__.19321> "switch_core_session_read_frame", line=line@entry=416) at src/switch_core_session.c:927
#19 0x00007f8d350b951b in switch_core_session_read_frame (session=session@entry=0x7f87302945b8, frame=frame@entry=0x7f8700dd9758, flags=flags@entry=0, stream_id=stream_id@entry=0) at src/switch_core_io.c:416
#20 0x00007f8d35147119 in audio_bridge_thread (obj=obj@entry=0x7f8ad0094ea0, thread=0x0) at src/switch_ivr_bridge.c:799
#21 0x00007f8d35148092 in audio_bridge_on_exchange_media (session=0x7f87302945b8) at src/switch_ivr_bridge.c:979
#22 0x00007f8d350b657a in switch_core_session_run (session=0x7f87302945b8) at src/switch_core_state_machine.c:650
#23 0x00007f8d350afb4e in switch_core_session_thread (thread=<optimized out>, obj=0x7f87302945b8) at src/switch_core_session.c:1727
#24 0x00007f8d350ab22a in switch_core_session_thread_pool_worker (thread=0x7f86d5d68a70, obj=<optimized out>) at src/switch_core_session.c:1791
#25 0x00007f8d351f4c30 in dummy_worker (opaque=0x7f86d5d68a70) at threadproc/unix/thread.c:151
#26 0x00007f8d324e1ea5 in start_thread () from /lib64/libpthread.so.0
#27 0x00007f8d31b35b0d in clone () from /lib64/libc.so.6

Thread 2 (Thread 0x7f88a9cbb700 (LWP 10894)):
#0  0x00007f8d324e854d in __lll_lock_wait () from /lib64/libpthread.so.0
#1  0x00007f8d324e3eb6 in _L_lock_941 () from /lib64/libpthread.so.0
#2  0x00007f8d324e3daf in pthread_mutex_lock () from /lib64/libpthread.so.0
#3  0x00007f8d351eeb49 in fspr_thread_mutex_lock (mutex=<optimized out>) at locks/unix/thread_mutex.c:92
#4  0x00007f8d3507ca25 in switch_mutex_lock (lock=<optimized out>) at src/switch_apr.c:310
#5  0x00007f8d197a929c in sofia_receive_message (session=0x7f87302945b8, msg=0x7f88a9cb8030) at mod_sofia.c:1526
#6  0x00007f8d350ae16b in switch_core_session_perform_receive_message (session=session@entry=0x7f87302945b8, message=message@entry=0x7f88a9cb8030, file=file@entry=0x7f8d352142a2 "src/switch_core_media.c", func=func@entry=0x7f8d35218fe0 <__func__.40598> "switch_core_session_write_frame", line=line@entry=15951) at src/switch_core_session.c:931
#7  0x00007f8d350f428f in switch_core_session_write_frame (session=session@entry=0x7f87302945b8, frame=0x7f86d6eb88a8, flags=flags@entry=0, stream_id=stream_id@entry=0) at src/switch_core_media.c:15951
#8  0x00007f8d351471e3 in audio_bridge_thread (obj=obj@entry=0x7f87307df670, thread=0x0) at src/switch_ivr_bridge.c:818
#9  0x00007f8d35149296 in switch_ivr_multi_threaded_bridge (session=session@entry=0x7f86d6eb3d68, peer_session=0x7f87302945b8, input_callback=<optimized out>, session_data=session_data@entry=0x0, peer_session_data=peer_session_data@entry=0x0) at src/switch_ivr_bridge.c:1794
#10 0x00007f8cfb179012 in audio_bridge_function (session=<optimized out>, data=<optimized out>) at mod_dptools.c:3673
#11 0x00007f8d350b2413 in switch_core_session_exec (session=session@entry=0x7f86d6eb3d68, application_interface=application_interface@entry=0x1f5b708, arg=arg@entry=0x7f86e908f868 "sofia/out/${destination_number}@39.164.247.5:2090") at src/switch_core_session.c:2965
#12 0x00007f8d350b2aa9 in switch_core_session_execute_application_get_flags (session=session@entry=0x7f86d6eb3d68, app=0x7f86e908f860 "bridge", arg=0x7f86e908f868 "sofia/out/${destination_number}@39.164.247.5:2090", flags=flags@entry=0x0) at src/switch_core_session.c:2825
#13 0x00007f8d350b69ac in switch_core_standard_on_execute (session=0x7f86d6eb3d68) at src/switch_core_state_machine.c:350
#14 switch_core_session_run (session=0x7f86d6eb3d68) at src/switch_core_state_machine.c:647
#15 0x00007f8d350afb4e in switch_core_session_thread (thread=<optimized out>, obj=0x7f86d6eb3d68) at src/switch_core_session.c:1727
#16 0x00007f8d350ab22a in switch_core_session_thread_pool_worker (thread=0x7f8720418810, obj=<optimized out>) at src/switch_core_session.c:1791
#17 0x00007f8d351f4c30 in dummy_worker (opaque=0x7f8720418810) at threadproc/unix/thread.c:151
#18 0x00007f8d324e1ea5 in start_thread () from /lib64/libpthread.so.0
#19 0x00007f8d31b35b0d in clone () from /lib64/libc.so.6

Thread 1 (Thread 0x7f8d3590d8c0 (LWP 35705)):
#0  0x00007f8d31b2cb43 in select () from /lib64/libc.so.6
#1  0x00007f8d351f6275 in fspr_sleep (t=<optimized out>) at time/unix/time.c:246
#2  0x00007f8d350bdef2 in switch_core_runtime_loop (bg=bg@entry=1) at src/switch_core.c:1201
#3  0x0000000000402f3b in main (argc=4, argv=<optimized out>) at src/switch.c:1229

I've encountered the same issue. Could you please provide a minimal test case to reproduce the bug?Thask you!

0xFF88 avatar Jun 14 '24 11:06 0xFF88

Ok silly question time. Our install is on Debian via apt-get, how can I implement the fix

Thanks

glennvos avatar Jun 27 '24 11:06 glennvos