isolated-vm icon indicating copy to clipboard operation
isolated-vm copied to clipboard

Isolate deadlocks since 4.3.7

Open marcusdarmstrong opened this issue 2 years ago • 8 comments

Hi there,

Since the fix for #302 instead of main thread deadlocks in our application we've been observing a slow stream of isolate deadlocks, reproducible in the application by sending it concurrent traffic (i.e. the situation is always 2+ isolates running in parallel, with interleaved interaction with the main thread). That's a big improvement since it constitutes a recoverable failure, but is still unfortunate obviously.

I've been unable thus far to produce a small test case that identifies a root cause, though I am working on it—but at this point I figured filing the issue and seeing if the cause was perhaps obvious to you might be a faster path forward while I work to produce a simple test case.

What I can provide at this point is a process sample from a hung isolate:

    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*)  (in isolated_vm.node) + 164  [0x110342524]
    +       2820 ivm::IsolatedScheduler::SendWake()::$_2::__invoke(bool, void*)  (in isolated_vm.node) + 49  [0x1103387b1]
    +         2820 ivm::IsolateEnvironment::AsyncEntry()  (in isolated_vm.node) + 2405  [0x11032bda5]
    +           2820 ivm::(anonymous namespace)::TransferablePromiseHolder::ResolveTask::Run()  (in isolated_vm.node) + 195  [0x1103917f3]
    +             2820 v8::Promise::Resolver::Resolve(v8::Local<v8::Context>, v8::Local<v8::Value>)  (in node) + 379  [0x10381b48b]
    +               2820 v8::internal::Isolate::FireCallCompletedCallback(v8::internal::MicrotaskQueue*)  (in node) + 56  [0x10394d178]
    +                 2820 v8::internal::MicrotaskQueue::PerformCheckpoint(v8::Isolate*)  (in node) + 97  [0x103959cd1]
    +                   2820 v8::internal::MicrotaskQueue::RunMicrotasks(v8::internal::Isolate*)  (in node) + 322  [0x103959e62]
    +                     2820 v8::internal::Execution::TryRunMicrotasks(v8::internal::Isolate*, v8::internal::MicrotaskQueue*, v8::internal::MaybeHandle<v8::internal::Object>*)  (in node) + 81  [0x103932ae1]
    +                       2820 v8::internal::(anonymous namespace)::InvokeWithTryCatch(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&)  (in node) + 83  [0x103932a03]
    +                         2820 v8::internal::(anonymous namespace)::Invoke(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&)  (in node) + 2448  [0x103932540]
    +                           2820 Builtins_JSRunMicrotasksEntry  (in node) + 131  [0x10406f683]
    +                             2820 Builtins_RunMicrotasks  (in node) + 614  [0x104093166]
    +                               2820 Builtins_PromiseFulfillReactionJob  (in node) + 46  [0x10412432e]
    +                                 2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                   2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                     2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                       2820 Builtins_CEntry_Return1_DontSaveFPRegs_ArgvOnStack_BuiltinExit  (in node) + 57  [0x1040ddfb9]
    +                                         2820 v8::internal::Builtin_Impl_HandleApiCall(v8::internal::BuiltinArguments, v8::internal::Isolate*)  (in node) + 255  [0x10385cf8f]
    +                                           2820 v8::internal::MaybeHandle<v8::internal::Object> v8::internal::(anonymous namespace)::HandleApiCallHelper<false>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::FunctionTemplateInfo>, v8::internal::Handle<v8::internal::Object>, v8::internal::BuiltinArguments)  (in node) + 550  [0x10385d836]
    +                                             2820 v8::internal::FunctionCallbackArguments::Call(v8::internal::CallHandlerInfo)  (in node) + 462  [0x10385e13e]
    +                                               2820 void ivm::detail::RunBarrier<ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Callback(v8::FunctionCallbackInfo<v8::Value> const&)::'lambda'()>(0)  (in isolated_vm.node) + 27  [0x110386ccb]
    +                                                 2820 void ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Spread<0ul, 1ul, 2ul, 3ul>(v8::FunctionCallbackInfo<v8::Value> const&, std::__1::integer_sequence<unsigned long, 0ul, 1ul, 2ul, 3ul>)::'lambda'()::operator()() const  (in isolated_vm.node) + 524  [0x110386f5c]
    +                                                   2820 ivm::ThreePhaseTask::RunSync(ivm::IsolateHolder&, bool)  (in isolated_vm.node) + 597  [0x11033ca35]
    +                                                     2820 ivm::Scheduler::AsyncWait::Wait()  (in isolated_vm.node) + 111  [0x1103384bf]
    +                                                       2820 std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)  (in libc++.1.dylib) + 18  [0x7fff202e8d72]
    +                                                         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +                                                           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]

Thanks for all the help and I'll be back with a minimal reproduction as soon as I have one.

--Marcus

marcusdarmstrong avatar May 24 '22 14:05 marcusdarmstrong

If you have stack traces for the rest of the threads in a single process that would be helpful. Since many threads are interacting with each other, a single stack trace is only part of the picture.

This stack trace shows an isolate being woken up by a resolved promise (probably via apply {promise:true} argument). The woken isolate then calls applySync on a function reference which belongs to the default nodejs isolate, and it appears that the default nodejs isolate doesn't respond to the invocation for some reason.

laverdet avatar May 24 '22 15:05 laverdet

If you have stack traces for the rest of the threads in a single process that would be helpful. Since many threads are interacting with each other, a single stack trace is only part of the picture.

Certainly. We're running an HTTP server here with 16 isolates spun up to handle individual requests. Isolates only ever interact with the main thread. The reproduction model I'm using here consists of spamming the server with two parallel curl loops. After triggering a deadlock I've stopped the curl loops, allowed the service to settle down, and taken this process dump:

Call graph:
    2820 Thread_619154   DispatchQueue_1: com.apple.main-thread  (serial)
    + 2820 start  (in libdyld.dylib) + 1  [0x7fff2039af3d]
    +   2820 node::Start(int, char**)  (in node) + 200  [0x1036415a8]
    +     2820 node::NodeMainInstance::Run(node::EnvSerializeInfo const*)  (in node) + 121  [0x1036b7cd9]
    +       2820 node::NodeMainInstance::Run(int*, node::Environment*)  (in node) + 97  [0x1036b8051]
    +         2820 node::SpinEventLoop(node::Environment*)  (in node) + 260  [0x1035aa074]
    +           2791 uv_run  (in node) + 401  [0x104052901]
    +           ! 2786 uv__io_poll  (in node) + 947  [0x104065b43]
    +           ! : 2786 kevent  (in libsystem_kernel.dylib) + 10  [0x7fff2034ec4a]
    +           ! 3 uv__io_poll  (in node) + 1,947,...  [0x104065791,0x104065b43,...]
    +           ! 2 uv__io_poll  (in node) + 987  [0x104065b6b]
    +           !   1 uv__hrtime  (in node) + 23  [0x104062fe7]
    +           !   | 1 uv_once  (in node) + 9  [0x104060999]
    +           !   |   1 pthread_once  (in libsystem_pthread.dylib) + 43  [0x7fff2037c170]
    +           !   1 uv__hrtime  (in node) + 29  [0x104062fed]
    +           !     1 mach_continuous_time  (in libsystem_kernel.dylib) + 20  [0x7fff2034a72e]
    +           !       1 _mach_continuous_time  (in libsystem_kernel.dylib) + 32  [0x7fff20354dd7]
    +           21 uv_run  (in node) + 417  [0x104052911]
    +           ! 21 uv__run_check  (in node) + 164  [0x104059634]
    +           !   14 node::Environment::CheckImmediate(uv_check_s*)  (in node) + 90  [0x10361233a]
    +           !   : 5 node::Environment::RunAndClearNativeImmediates(bool)  (in node) + 80  [0x1036128b0]
    +           !   : | 4 v8::Object::New(v8::Isolate*)  (in node) + 159  [0x10381784f]
    +           !   : | + 2 v8::internal::Factory::NewJSObject(v8::internal::Handle<v8::internal::JSFunction>, v8::internal::AllocationType)  (in node) + 31  [0x1039971cf]
    +           !   : | + ! 2 v8::internal::JSFunction::EnsureHasInitialMap(v8::internal::Handle<v8::internal::JSFunction>)  (in node) + 0,22  [0x103bbe300,0x103bbe316]
    +           !   : | + 1 v8::internal::Factory::NewJSObject(v8::internal::Handle<v8::internal::JSFunction>, v8::internal::AllocationType)  (in node) + 121  [0x103997229]
    +           !   : | + ! 1 v8::internal::Factory::AllocateRawWithAllocationSite(v8::internal::Handle<v8::internal::Map>, v8::internal::AllocationType, v8::internal::Handle<v8::internal::AllocationSite>)  (in node) + 142  [0x10398cf7e]
    +           !   : | + 1 v8::internal::Factory::NewJSObject(v8::internal::Handle<v8::internal::JSFunction>, v8::internal::AllocationType)  (in node) + 145  [0x103997241]
    +           !   : | +   1 v8::internal::Factory::InitializeJSObjectFromMap(v8::internal::JSObject, v8::internal::Object, v8::internal::Map)  (in node) + 55  [0x103997797]
    +           !   : | 1 v8::Object::New(v8::Isolate*)  (in node) + 23  [0x1038177c7]
    +           !   : 3 node::Environment::RunAndClearNativeImmediates(bool)  (in node) + 108  [0x1036128cc]
    +           !   : | 2 node::InternalCallbackScope::InternalCallbackScope(node::Environment*, v8::Local<v8::Object>, node::async_context const&, int)  (in node) + 286  [0x1035a89ce]
    +           !   : | 1 node::InternalCallbackScope::InternalCallbackScope(node::Environment*, v8::Local<v8::Object>, node::async_context const&, int)  (in node) + 304  [0x1035a89e0]
    +           !   : |   1 node::AsyncHooks::push_async_context(double, double, v8::Local<v8::Object>)  (in node) + 367  [0x1035a8c3f]
    +           !   : |     1 v8::internal::GlobalHandles::Create(v8::internal::Object)  (in node) + 296  [0x10396a1b8]
    +           !   : 3 node::Environment::RunAndClearNativeImmediates(bool)  (in node) + 171  [0x10361290b]
    +           !   : | 2 node::Environment::RunAndClearNativeImmediates(bool)::$_7::operator()(node::CallbackQueue<void, node::Environment*>*) const  (in node) + 45  [0x10361309d]
    +           !   : | + 2 v8::TryCatch::TryCatch(v8::Isolate*)  (in node) + 53  [0x103801a65]
    +           !   : | +   2 v8::internal::GetCurrentStackPosition()  (in node) + 0  [0x103d95330]
    +           !   : | 1 node::Environment::RunAndClearNativeImmediates(bool)::$_7::operator()(node::CallbackQueue<void, node::Environment*>*) const  (in node) + 289  [0x103613191]
    +           !   : |   1 node::errors::TryCatchScope::~TryCatchScope()  (in node) + 6  [0x103676b36]
    +           !   : 3 node::Environment::RunAndClearNativeImmediates(bool)  (in node) + 434  [0x103612a12]
    +           !   :   3 node::InternalCallbackScope::~InternalCallbackScope()  (in node) + 14  [0x1035a87de]
    +           !   :     1 node::InternalCallbackScope::Close()  (in node) + 137  [0x1035a8d19]
    +           !   :     + 1 node::AsyncHooks::pop_async_context(double)  (in node) + 140  [0x1035a8f7c]
    +           !   :     1 node::InternalCallbackScope::Close()  (in node) + 494  [0x1035a8e7e]
    +           !   :     + 1 node::Environment::RunWeakRefCleanup()  (in node) + 0  [0x10361dfd0]
    +           !   :     1 node::InternalCallbackScope::Close()  (in node) + 241  [0x1035a8d81]
    +           !   4 node::Environment::CheckImmediate(uv_check_s*)  (in node) + 80  [0x103612330]
    +           !   : 4 v8::Context::Enter()  (in node) + 34,30  [0x1037f85a2,0x1037f859e]
    +           !   1 node::Environment::CheckImmediate(uv_check_s*)  (in node) + 65  [0x103612321]
    +           !   : 1 v8::HandleScope::HandleScope(v8::Isolate*)  (in node) + 41  [0x1037f7f99]
    +           !   :   1 v8::internal::ThreadId::GetCurrentThreadId()  (in node) + 13  [0x10395ce5d]
    +           !   :     1 tlv_get_addr  (in libdyld.dylib) + 4  [0x7fff203914d0]
    +           !   1 v8::Isolate::SetIdle(bool)  (in node) + 0  [0x103820800]
    +           !   1 v8::internal::Isolate::SetIdle(bool)  (in node) + 40  [0x10394eb88]
    +           7 uv_run  (in node) + 205  [0x10405283d]
    +           ! 5 uv__run_timers  (in node) + 103  [0x10404df87]
    +           ! : 3 node::performance::ELDHistogram::OnInterval()  (in node) + 87  [0x1036e0677]
    +           ! : | 3 hdr_record_value  (in node) + 16,121  [0x1043ac8d0,0x1043ac939]
    +           ! : 1 node::performance::ELDHistogram::OnInterval()  (in node) + 50  [0x1036e0652]
    +           ! : | 1 uv_mutex_lock  (in node) + 9  [0x104060409]
    +           ! : |   1 pthread_mutex_lock  (in libsystem_pthread.dylib) + 1  [0x7fff2037ad83]
    +           ! : 1 node::performance::ELDHistogram::OnInterval()  (in node) + 55  [0x1036e0657]
    +           ! :   1 uv__hrtime  (in node) + 50  [0x104063002]
    +           ! 1 uv__run_timers  (in node) + 57  [0x10404df59]
    +           ! : 1 uv_timer_stop  (in node) + 4  [0x10404da94]
    +           ! 1 uv__run_timers  (in node) + 97  [0x10404df81]
    +           !   1 uv_timer_start  (in node) + 0  [0x10404d7c0]
    +           1 uv_run  (in node) + 409  [0x104052909]
    +             1 uv__metrics_update_idle_time  (in node) + 14  [0x104051d6e]
    2820 Thread_619165
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::WorkerThreadsTaskRunner::DelayedTaskScheduler::Run()  (in node) + 361  [0x1036e5529]
    +       2820 uv_run  (in node) + 401  [0x104052901]
    +         2820 uv__io_poll  (in node) + 947  [0x104065b43]
    +           2820 kevent  (in libsystem_kernel.dylib) + 10  [0x7fff2034ec4a]
    2820 Thread_619166
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::(anonymous namespace)::PlatformWorkerThread(void*)  (in node) + 440  [0x1036e2758]
    +       2820 node::TaskQueue<v8::Task>::BlockingPop()  (in node) + 72  [0x1036e5738]
    +         2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +           2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +             2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_619167
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::(anonymous namespace)::PlatformWorkerThread(void*)  (in node) + 440  [0x1036e2758]
    +       2820 node::TaskQueue<v8::Task>::BlockingPop()  (in node) + 72  [0x1036e5738]
    +         2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +           2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +             2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_619168
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::(anonymous namespace)::PlatformWorkerThread(void*)  (in node) + 440  [0x1036e2758]
    +       2820 node::TaskQueue<v8::Task>::BlockingPop()  (in node) + 72  [0x1036e5738]
    +         2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +           2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +             2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_619169
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::(anonymous namespace)::PlatformWorkerThread(void*)  (in node) + 440  [0x1036e2758]
    +       2820 node::TaskQueue<v8::Task>::BlockingPop()  (in node) + 72  [0x1036e5738]
    +         2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +           2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +             2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_619187
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 node::inspector::(anonymous namespace)::StartIoThreadMain(void*)  (in node) + 19  [0x103753de3]
    +       2820 uv_sem_wait  (in node) + 16  [0x104060a50]
    +         2820 semaphore_wait_trap  (in libsystem_kernel.dylib) + 10  [0x7fff2034a2f6]
    2820 Thread_620430
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 worker  (in node) + 89  [0x10404d569]
    +       2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_620431
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 worker  (in node) + 89  [0x10404d569]
    +       2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_620432
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 worker  (in node) + 89  [0x10404d569]
    +       2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_620433
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 worker  (in node) + 89  [0x10404d569]
    +       2820 uv_cond_wait  (in node) + 9  [0x104060449]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_625376
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*)  (in isolated_vm.node) + 108  [0x1103424ec]
    +       2820 std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)  (in libc++.1.dylib) + 18  [0x7fff202e8d72]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_625377
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*)  (in isolated_vm.node) + 164  [0x110342524]
    +       2820 ivm::IsolatedScheduler::SendWake()::$_2::__invoke(bool, void*)  (in isolated_vm.node) + 49  [0x1103387b1]
    +         2820 ivm::IsolateEnvironment::AsyncEntry()  (in isolated_vm.node) + 2405  [0x11032bda5]
    +           2820 ivm::(anonymous namespace)::TransferablePromiseHolder::ResolveTask::Run()  (in isolated_vm.node) + 195  [0x1103917f3]
    +             2820 v8::Promise::Resolver::Resolve(v8::Local<v8::Context>, v8::Local<v8::Value>)  (in node) + 379  [0x10381b48b]
    +               2820 v8::internal::Isolate::FireCallCompletedCallback(v8::internal::MicrotaskQueue*)  (in node) + 56  [0x10394d178]
    +                 2820 v8::internal::MicrotaskQueue::PerformCheckpoint(v8::Isolate*)  (in node) + 97  [0x103959cd1]
    +                   2820 v8::internal::MicrotaskQueue::RunMicrotasks(v8::internal::Isolate*)  (in node) + 322  [0x103959e62]
    +                     2820 v8::internal::Execution::TryRunMicrotasks(v8::internal::Isolate*, v8::internal::MicrotaskQueue*, v8::internal::MaybeHandle<v8::internal::Object>*)  (in node) + 81  [0x103932ae1]
    +                       2820 v8::internal::(anonymous namespace)::InvokeWithTryCatch(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&)  (in node) + 83  [0x103932a03]
    +                         2820 v8::internal::(anonymous namespace)::Invoke(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&)  (in node) + 2448  [0x103932540]
    +                           2820 Builtins_JSRunMicrotasksEntry  (in node) + 131  [0x10406f683]
    +                             2820 Builtins_RunMicrotasks  (in node) + 614  [0x104093166]
    +                               2820 Builtins_PromiseFulfillReactionJob  (in node) + 46  [0x10412432e]
    +                                 2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                   2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                     2820 Builtins_InterpreterEntryTrampoline  (in node) + 202  [0x10407152a]
    +                                       2820 Builtins_CEntry_Return1_DontSaveFPRegs_ArgvOnStack_BuiltinExit  (in node) + 57  [0x1040ddfb9]
    +                                         2820 v8::internal::Builtin_Impl_HandleApiCall(v8::internal::BuiltinArguments, v8::internal::Isolate*)  (in node) + 255  [0x10385cf8f]
    +                                           2820 v8::internal::MaybeHandle<v8::internal::Object> v8::internal::(anonymous namespace)::HandleApiCallHelper<false>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::FunctionTemplateInfo>, v8::internal::Handle<v8::internal::Object>, v8::internal::BuiltinArguments)  (in node) + 550  [0x10385d836]
    +                                             2820 v8::internal::FunctionCallbackArguments::Call(v8::internal::CallHandlerInfo)  (in node) + 462  [0x10385e13e]
    +                                               2820 void ivm::detail::RunBarrier<ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Callback(v8::FunctionCallbackInfo<v8::Value> const&)::'lambda'()>(0)  (in isolated_vm.node) + 27  [0x110386ccb]
    +                                                 2820 void ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Spread<0ul, 1ul, 2ul, 3ul>(v8::FunctionCallbackInfo<v8::Value> const&, std::__1::integer_sequence<unsigned long, 0ul, 1ul, 2ul, 3ul>)::'lambda'()::operator()() const  (in isolated_vm.node) + 524  [0x110386f5c]
    +                                                   2820 ivm::ThreePhaseTask::RunSync(ivm::IsolateHolder&, bool)  (in isolated_vm.node) + 597  [0x11033ca35]
    +                                                     2820 ivm::Scheduler::AsyncWait::Wait()  (in isolated_vm.node) + 111  [0x1103384bf]
    +                                                       2820 std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)  (in libc++.1.dylib) + 18  [0x7fff202e8d72]
    +                                                         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +                                                           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_625378
    + 2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
    +   2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
    +     2820 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*)  (in isolated_vm.node) + 108  [0x1103424ec]
    +       2820 std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)  (in libc++.1.dylib) + 18  [0x7fff202e8d72]
    +         2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
    +           2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]
    2820 Thread_673758
      2820 thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
        2820 _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
          2820 void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*)  (in isolated_vm.node) + 108  [0x1103424ec]
            2820 std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)  (in libc++.1.dylib) + 18  [0x7fff202e8d72]
              2820 _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
                2820 __psynch_cvwait  (in libsystem_kernel.dylib) + 10  [0x7fff2034ccde]

Total number in stack (recursive counted multiple, when >=5):
        14       _pthread_start  (in libsystem_pthread.dylib) + 224  [0x7fff2037f8fc]
        14       thread_start  (in libsystem_pthread.dylib) + 15  [0x7fff2037b443]
        12       __psynch_cvwait  (in libsystem_kernel.dylib) + 0  [0x7fff2034ccd4]
        12       _pthread_cond_wait  (in libsystem_pthread.dylib) + 1298  [0x7fff2037fe49]
        8       uv_cond_wait  (in node) + 9  [0x104060449]

Sort by top of stack, same collapsed (when >= 5):
        __psynch_cvwait  (in libsystem_kernel.dylib)        33840
        kevent  (in libsystem_kernel.dylib)        5606
        semaphore_wait_trap  (in libsystem_kernel.dylib)        2820

This stack trace shows an isolate being woken up by a resolved promise (probably via apply {promise:true} argument). The woken isolate then calls applySync on a function reference which belongs to the default nodejs isolate, and it appears that the default nodejs isolate doesn't respond to the invocation for some reason.

Yeah, we use the promise proxying mechanism to provide isolates access to a fetch-like interface for network egress, which is the general "halting location". That's currently implemented with a copySync on the response, which would presumably be the synchronous invocation you mention, though I have speculatively tested a changeset that switched this over to async operations in suspicion that the sync calls were to blame, with no success.

marcusdarmstrong avatar May 24 '22 15:05 marcusdarmstrong

Instead of a call graph do you have a stack trace? I think the output you're sharing here is from lldb, in which case you can run bt all to get stack traces.

laverdet avatar May 24 '22 16:05 laverdet

I don't have a detailed analysis like the OP, but I think we're impacted by the same bug, as we're seeing (concurrent) isolates reaching their pre-set timeout since upgrading to 4.3.7, which we've never seen before.

everhardt avatar May 25 '22 07:05 everhardt

Sorry for the delay here, had to deal with some other issues in the mean time, but backtrace provided:

Executable module set to "/Users/marmstrong/.nvm/versions/node/v16.10.0/bin/node".
Architecture set to: x86_64h-apple-macosx-.
(lldb) bt all
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGSTOP
  * frame #0: 0x00007fff2034ec4a libsystem_kernel.dylib`kevent + 10
    frame #1: 0x000000010c15db43 node`uv__io_poll + 947
    frame #2: 0x000000010c14a901 node`uv_run + 401
    frame #3: 0x000000010b6a2074 node`node::SpinEventLoop(node::Environment*) + 260
    frame #4: 0x000000010b7b0051 node`node::NodeMainInstance::Run(int*, node::Environment*) + 97
    frame #5: 0x000000010b7afcd9 node`node::NodeMainInstance::Run(node::EnvSerializeInfo const*) + 121
    frame #6: 0x000000010b7395a8 node`node::Start(int, char**) + 200
    frame #7: 0x00007fff2039af3d libdyld.dylib`start + 1
    frame #8: 0x00007fff2039af3d libdyld.dylib`start + 1
  thread #2
    frame #0: 0x00007fff2034ec4a libsystem_kernel.dylib`kevent + 10
    frame #1: 0x000000010c15db43 node`uv__io_poll + 947
    frame #2: 0x000000010c14a901 node`uv_run + 401
    frame #3: 0x000000010b7dd529 node`node::WorkerThreadsTaskRunner::DelayedTaskScheduler::Run() + 361
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #3
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010b7dd738 node`node::TaskQueue<v8::Task>::BlockingPop() + 72
    frame #4: 0x000000010b7da758 node`node::(anonymous namespace)::PlatformWorkerThread(void*) + 440
    frame #5: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #6: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #4
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010b7dd738 node`node::TaskQueue<v8::Task>::BlockingPop() + 72
    frame #4: 0x000000010b7da758 node`node::(anonymous namespace)::PlatformWorkerThread(void*) + 440
    frame #5: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #6: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #5
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010b7dd738 node`node::TaskQueue<v8::Task>::BlockingPop() + 72
    frame #4: 0x000000010b7da758 node`node::(anonymous namespace)::PlatformWorkerThread(void*) + 440
    frame #5: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #6: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #6
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010b7dd738 node`node::TaskQueue<v8::Task>::BlockingPop() + 72
    frame #4: 0x000000010b7da758 node`node::(anonymous namespace)::PlatformWorkerThread(void*) + 440
    frame #5: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #6: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #7
    frame #0: 0x00007fff2034a2f6 libsystem_kernel.dylib`semaphore_wait_trap + 10
    frame #1: 0x000000010c158a50 node`uv_sem_wait + 16
    frame #2: 0x000000010b84bde3 node`node::inspector::(anonymous namespace)::StartIoThreadMain(void*) + 19
    frame #3: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #4: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #8
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010c145569 node`worker + 89
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #9
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010c145569 node`worker + 89
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #10
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010c145569 node`worker + 89
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #11
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x000000010c158449 node`uv_cond_wait + 9
    frame #3: 0x000000010c145569 node`worker + 89
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #12
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x00000001104304bf isolated_vm.node`ivm::Scheduler::AsyncWait::Wait() + 111
    frame #4: 0x0000000110434a35 isolated_vm.node`ivm::ThreePhaseTask::RunSync(ivm::IsolateHolder&, bool) + 597
    frame #5: 0x000000011047ef5c isolated_vm.node`void ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Spread<0ul, 1ul, 2ul, 3ul>(v8::FunctionCallbackInfo<v8::Value> const&, std::__1::integer_sequence<unsigned long, 0ul, 1ul, 2ul, 3ul>)::'lambda'()::operator()() const + 524
    frame #6: 0x000000011047eccb isolated_vm.node`void ivm::detail::RunBarrier<ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Callback(v8::FunctionCallbackInfo<v8::Value> const&)::'lambda'()>(0) + 27
    frame #7: 0x000000010b95613e node`v8::internal::FunctionCallbackArguments::Call(v8::internal::CallHandlerInfo) + 462
    frame #8: 0x000000010b955836 node`v8::internal::MaybeHandle<v8::internal::Object> v8::internal::(anonymous namespace)::HandleApiCallHelper<false>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::FunctionTemplateInfo>, v8::internal::Handle<v8::internal::Object>, v8::internal::BuiltinArguments) + 550
    frame #9: 0x000000010b954f8f node`v8::internal::Builtin_Impl_HandleApiCall(v8::internal::BuiltinArguments, v8::internal::Isolate*) + 255
    frame #10: 0x000000010c1d5fb9 node`Builtins_CEntry_Return1_DontSaveFPRegs_ArgvOnStack_BuiltinExit + 57
    frame #11: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #12: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #13: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #14: 0x000000010c21c32e node`Builtins_PromiseFulfillReactionJob + 46
    frame #15: 0x000000010c18b166 node`Builtins_RunMicrotasks + 614
    frame #16: 0x000000010c167683 node`Builtins_JSRunMicrotasksEntry + 131
    frame #17: 0x000000010ba2a540 node`v8::internal::(anonymous namespace)::Invoke(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 2448
    frame #18: 0x000000010ba2aa03 node`v8::internal::(anonymous namespace)::InvokeWithTryCatch(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 83
    frame #19: 0x000000010ba2aae1 node`v8::internal::Execution::TryRunMicrotasks(v8::internal::Isolate*, v8::internal::MicrotaskQueue*, v8::internal::MaybeHandle<v8::internal::Object>*) + 81
    frame #20: 0x000000010ba51e62 node`v8::internal::MicrotaskQueue::RunMicrotasks(v8::internal::Isolate*) + 322
    frame #21: 0x000000010ba51cd1 node`v8::internal::MicrotaskQueue::PerformCheckpoint(v8::Isolate*) + 97
    frame #22: 0x000000010ba45178 node`v8::internal::Isolate::FireCallCompletedCallback(v8::internal::MicrotaskQueue*) + 56
    frame #23: 0x000000010b91348b node`v8::Promise::Resolver::Resolve(v8::Local<v8::Context>, v8::Local<v8::Value>) + 379
    frame #24: 0x00000001104897f3 isolated_vm.node`ivm::(anonymous namespace)::TransferablePromiseHolder::ResolveTask::Run() + 195
    frame #25: 0x0000000110423da5 isolated_vm.node`ivm::IsolateEnvironment::AsyncEntry() + 2405
    frame #26: 0x00000001104307b1 isolated_vm.node`ivm::IsolatedScheduler::SendWake()::$_2::__invoke(bool, void*) + 49
    frame #27: 0x000000011043a524 isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 164
    frame #28: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #29: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #13
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x000000011043a4ec isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 108
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #14
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x00000001104304bf isolated_vm.node`ivm::Scheduler::AsyncWait::Wait() + 111
    frame #4: 0x0000000110434a35 isolated_vm.node`ivm::ThreePhaseTask::RunSync(ivm::IsolateHolder&, bool) + 597
    frame #5: 0x000000011047ef5c isolated_vm.node`void ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Spread<0ul, 1ul, 2ul, 3ul>(v8::FunctionCallbackInfo<v8::Value> const&, std::__1::integer_sequence<unsigned long, 0ul, 1ul, 2ul, 3ul>)::'lambda'()::operator()() const + 524
    frame #6: 0x000000011047eccb isolated_vm.node`void ivm::detail::RunBarrier<ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Apply<0>(v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::MaybeLocal<v8::Value>, v8::Maybe<ivm::ArrayRange>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Callback(v8::FunctionCallbackInfo<v8::Value> const&)::'lambda'()>(0) + 27
    frame #7: 0x000000010b95613e node`v8::internal::FunctionCallbackArguments::Call(v8::internal::CallHandlerInfo) + 462
    frame #8: 0x000000010b955836 node`v8::internal::MaybeHandle<v8::internal::Object> v8::internal::(anonymous namespace)::HandleApiCallHelper<false>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::FunctionTemplateInfo>, v8::internal::Handle<v8::internal::Object>, v8::internal::BuiltinArguments) + 550
    frame #9: 0x000000010b954f8f node`v8::internal::Builtin_Impl_HandleApiCall(v8::internal::BuiltinArguments, v8::internal::Isolate*) + 255
    frame #10: 0x000000010c1d5fb9 node`Builtins_CEntry_Return1_DontSaveFPRegs_ArgvOnStack_BuiltinExit + 57
    frame #11: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #12: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #13: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #14: 0x000000010c21c32e node`Builtins_PromiseFulfillReactionJob + 46
    frame #15: 0x000000010c18b166 node`Builtins_RunMicrotasks + 614
    frame #16: 0x000000010c167683 node`Builtins_JSRunMicrotasksEntry + 131
    frame #17: 0x000000010ba2a540 node`v8::internal::(anonymous namespace)::Invoke(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 2448
    frame #18: 0x000000010ba2aa03 node`v8::internal::(anonymous namespace)::InvokeWithTryCatch(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 83
    frame #19: 0x000000010ba2aae1 node`v8::internal::Execution::TryRunMicrotasks(v8::internal::Isolate*, v8::internal::MicrotaskQueue*, v8::internal::MaybeHandle<v8::internal::Object>*) + 81
    frame #20: 0x000000010ba51e62 node`v8::internal::MicrotaskQueue::RunMicrotasks(v8::internal::Isolate*) + 322
    frame #21: 0x000000010ba51cd1 node`v8::internal::MicrotaskQueue::PerformCheckpoint(v8::Isolate*) + 97
    frame #22: 0x000000010ba45178 node`v8::internal::Isolate::FireCallCompletedCallback(v8::internal::MicrotaskQueue*) + 56
    frame #23: 0x000000010b91348b node`v8::Promise::Resolver::Resolve(v8::Local<v8::Context>, v8::Local<v8::Value>) + 379
    frame #24: 0x00000001104897f3 isolated_vm.node`ivm::(anonymous namespace)::TransferablePromiseHolder::ResolveTask::Run() + 195
    frame #25: 0x0000000110423da5 isolated_vm.node`ivm::IsolateEnvironment::AsyncEntry() + 2405
    frame #26: 0x00000001104307b1 isolated_vm.node`ivm::IsolatedScheduler::SendWake()::$_2::__invoke(bool, void*) + 49
    frame #27: 0x000000011043a524 isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 164
    frame #28: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #29: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #15
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x00000001104304bf isolated_vm.node`ivm::Scheduler::AsyncWait::Wait() + 111
    frame #4: 0x0000000110434a35 isolated_vm.node`ivm::ThreePhaseTask::RunSync(ivm::IsolateHolder&, bool) + 597
    frame #5: 0x00000001104791e1 isolated_vm.node`v8::Local<v8::Value> ivm::ReferenceHandle::Get<0>(v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>) + 49
    frame #6: 0x00000001104794cd isolated_vm.node`void ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Get<0>(v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Spread<0ul, 1ul, 2ul>(v8::FunctionCallbackInfo<v8::Value> const&, std::__1::integer_sequence<unsigned long, 0ul, 1ul, 2ul>)::'lambda'()::operator()() const + 237
    frame #7: 0x000000011047935b isolated_vm.node`void ivm::detail::RunBarrier<ivm::detail::CallbackMaker<v8::Local<v8::Value> (*)(ivm::ReferenceHandle&, v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>), &(v8::Local<v8::Value> ivm::detail::unbind_member_function<v8::Local<v8::Value> (ivm::ReferenceHandle::*)(v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>)>::invoke<&(v8::Local<v8::Value> ivm::ReferenceHandle::Get<0>(v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>))>(ivm::ReferenceHandle&, v8::Local<v8::Value>, v8::MaybeLocal<v8::Object>)), -1, v8::FunctionCallbackInfo<v8::Value> const&>::Callback(v8::FunctionCallbackInfo<v8::Value> const&)::'lambda'()>(0) + 27
    frame #8: 0x000000010b95613e node`v8::internal::FunctionCallbackArguments::Call(v8::internal::CallHandlerInfo) + 462
    frame #9: 0x000000010b955836 node`v8::internal::MaybeHandle<v8::internal::Object> v8::internal::(anonymous namespace)::HandleApiCallHelper<false>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::HeapObject>, v8::internal::Handle<v8::internal::FunctionTemplateInfo>, v8::internal::Handle<v8::internal::Object>, v8::internal::BuiltinArguments) + 550
    frame #10: 0x000000010b954f8f node`v8::internal::Builtin_Impl_HandleApiCall(v8::internal::BuiltinArguments, v8::internal::Isolate*) + 255
    frame #11: 0x000000010c1d5fb9 node`Builtins_CEntry_Return1_DontSaveFPRegs_ArgvOnStack_BuiltinExit + 57
    frame #12: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #13: 0x000000010c16952a node`Builtins_InterpreterEntryTrampoline + 202
    frame #14: 0x000000010c21c32e node`Builtins_PromiseFulfillReactionJob + 46
    frame #15: 0x000000010c18b166 node`Builtins_RunMicrotasks + 614
    frame #16: 0x000000010c167683 node`Builtins_JSRunMicrotasksEntry + 131
    frame #17: 0x000000010ba2a540 node`v8::internal::(anonymous namespace)::Invoke(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 2448
    frame #18: 0x000000010ba2aa03 node`v8::internal::(anonymous namespace)::InvokeWithTryCatch(v8::internal::Isolate*, v8::internal::(anonymous namespace)::InvokeParams const&) + 83
    frame #19: 0x000000010ba2aae1 node`v8::internal::Execution::TryRunMicrotasks(v8::internal::Isolate*, v8::internal::MicrotaskQueue*, v8::internal::MaybeHandle<v8::internal::Object>*) + 81
    frame #20: 0x000000010ba51e62 node`v8::internal::MicrotaskQueue::RunMicrotasks(v8::internal::Isolate*) + 322
    frame #21: 0x000000010ba51cd1 node`v8::internal::MicrotaskQueue::PerformCheckpoint(v8::Isolate*) + 97
    frame #22: 0x000000010ba45178 node`v8::internal::Isolate::FireCallCompletedCallback(v8::internal::MicrotaskQueue*) + 56
    frame #23: 0x000000010b91348b node`v8::Promise::Resolver::Resolve(v8::Local<v8::Context>, v8::Local<v8::Value>) + 379
    frame #24: 0x00000001104897f3 isolated_vm.node`ivm::(anonymous namespace)::TransferablePromiseHolder::ResolveTask::Run() + 195
    frame #25: 0x0000000110423da5 isolated_vm.node`ivm::IsolateEnvironment::AsyncEntry() + 2405
    frame #26: 0x00000001104307b1 isolated_vm.node`ivm::IsolatedScheduler::SendWake()::$_2::__invoke(bool, void*) + 49
    frame #27: 0x000000011043a524 isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 164
    frame #28: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #29: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #16
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x000000011043a4ec isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 108
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15
  thread #17
    frame #0: 0x00007fff2034ccde libsystem_kernel.dylib`__psynch_cvwait + 10
    frame #1: 0x00007fff2037fe49 libsystem_pthread.dylib`_pthread_cond_wait + 1298
    frame #2: 0x00007fff202e8d72 libc++.1.dylib`std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&) + 18
    frame #3: 0x000000011043a4ec isolated_vm.node`void* std::__1::__thread_proxy<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct> >, ivm::thread_pool_t::new_thread(std::__1::lock_guard<std::__1::mutex>&)::$_1> >(void*) + 108
    frame #4: 0x00007fff2037f8fc libsystem_pthread.dylib`_pthread_start + 224
    frame #5: 0x00007fff2037b443 libsystem_pthread.dylib`thread_start + 15

marcusdarmstrong avatar May 25 '22 19:05 marcusdarmstrong

And just to note: that trace is pulled from a service instance that had three isolates lock up; those locks were, however, independent events unrelated to one another.

marcusdarmstrong avatar May 25 '22 19:05 marcusdarmstrong

I guess it's about time for an update. I've still not managed to create a completely minimal reproduction, though I've gotten closer: given sufficient time (<15 minutes) I have a script that consistently produces a deadlocked isolate. Unfortunately, this requires spamming an unrelated machine with http requests—I've not been able to trigger a deadlock connecting to a localhost endpoint, or without a request at all, which would obviously be the end goal.

The important bits:

  • It doesn't actually matter if there are multiple isolates involved. My script can reproduce the issue with a single isolate.
  • The script clearly is relying on some randomness in timing to trigger the underlying issue, but I haven't been able to establish what that is.
  • The network request appears to be critical to exposing whatever the timing issue happens to be. My best guess is that we need the callback into the main thread from the isolate to invoke some IO to get the scheduler in the bad state.
  • The particular host being connected to is also, unfortunately, critical to this timing. I've been able to reproduce the deadlocks while connecting to our production and test environment servers, but not my local linux box. I've put google.com in the attached snippet so as to not induce random readers of this ticket to loop http requests to our servers, but google.com doesn't seem to cause the deadlock.
  • The number of bytes does matter. I've only seen the script reproduce the issue with >500.
  • I've checked for both TLS and DNS being factors and they aren't.
const ivm = require('isolated-vm');
const { request } = require('http');
const crypto = require('crypto');

(async () => {
  const context = new ivm.Isolate().createContextSync();
  const body = crypto.randomBytes(1200).toString('hex');
  let reqId = 0;
  while (true) {
    try {
      const start = Date.now();
      await new Promise((resolve, reject) => {
        context.global.setSync(
          'makeRequest',
          new ivm.Reference(() => {
            request(
              'http://www.google.com',
              {
                method: 'POST',
              },
              resolve,
            )
              .on('error', reject)
              .end(body);
          }),
        );
        context.eval('makeRequest.apply(undefined, []);');
      });
      console.log('Handled Req', reqId++, `in ${Date.now() - start}ms`);
    } catch (e) {
      console.error('Error', e);
    }
  }
})();

marcusdarmstrong avatar Jul 13 '22 21:07 marcusdarmstrong

And of course, immediately after hitting the comment button I found that yahoo.com is a viable public server for reproducing the issue... at least for my network connection.

marcusdarmstrong avatar Jul 13 '22 21:07 marcusdarmstrong

Any news on this? We would love to upgrade to 4.4.1 but are hesitant as long as this remains an issue.

srieding avatar Oct 05 '22 07:10 srieding

Hi there please let me know if v4.4.2 works for you.

laverdet avatar Oct 29 '22 18:10 laverdet

It's running on production for a while now without any problems! 🎉

everhardt avatar Nov 01 '22 10:11 everhardt

Speaking on behalf of @marcusdarmstrong, we've had this deployed for a bit over a day now and are likewise seeing no problems 🌞.

Thanks for your continued support!

isker avatar Nov 03 '22 20:11 isker