dmlc-core icon indicating copy to clipboard operation
dmlc-core copied to clipboard

Perform thorough audit to address concurrency problems

Open hcho3 opened this issue 5 years ago • 3 comments

When I compiled dmlc-core with Thread Sanitizer, I got a massive heap of concurrency errors: log.txt.

This is bad news: multi-threaded code in dmlc-core (threaded iterator, thread groups, concurrent queues etc) have concurrency issues such as data races.

Related: dmlc/dmlc-core#550, dmlc/dmlc-core#505

cc @trivialfis

hcho3 avatar Aug 01 '19 22:08 hcho3

Excellent resource: C++ Concurrency in Action

hcho3 avatar Aug 01 '19 22:08 hcho3

Potential deadlock in lock-free queue:

[----------] 2 tests from Lockfree
[ RUN      ] Lockfree.ConcurrentQueue
==================
WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) (pid=12822)
  Cycle in lock order graph: M2609 (0x7b2400002f68) => M210 (0x7ffd8acc5a58) => M2609

  Mutex M210 acquired here while holding mutex M2609 in main thread:
    #0 pthread_mutex_lock <null> (libtsan.so.0+0x3fadb)
    #1 __gthread_mutex_lock /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:748 (dmlc_unit_tests+0x6c2bf)
    #2 __gthread_recursive_mutex_lock /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:810 (dmlc_unit_tests+0x6c345)
    #3 std::recursive_mutex::lock() <null> (dmlc_unit_tests+0x6fd48)
    #4 std::unique_lock<std::recursive_mutex>::lock() <null> (dmlc_unit_tests+0x75067)
    #5 std::unique_lock<std::recursive_mutex>::unique_lock(std::recursive_mutex&) /usr/include/c++/7/bits/std_mutex.h:197 (dmlc_unit_tests+0x72d04)
    #6 dmlc::ThreadGroup::add_thread(std::shared_ptr<dmlc::ThreadGroup::Thread>) /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:370 (dmlc_unit_tests+0x71732)
    #7 bool dmlc::ThreadGroup::Thread::launch<int (*)(int, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >), unsigned long, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > > >(std::shared_ptr<dmlc::ThreadGroup::Thread>, bool, int (*)(int, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >), unsigned long, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >) /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:776 (dmlc_unit_tests+0x76669)
    #8 bool dmlc::ThreadGroup::create<int (*)(int, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >), dmlc::ThreadGroup::Thread, unsigned long, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > > >(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool, int (*)(int, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >), unsigned long, std::shared_ptr<LFQThreadData<dmlc::moodycamel::ConcurrentQueue<int, dmlc::moodycamel::ConcurrentQueueDefaultTraits> > >) /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:493 (dmlc_unit_tests+0x7411c)
    #9 Lockfree_ConcurrentQueue_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_lockfree.cc:83 (dmlc_unit_tests+0x6cd4d)
    #10 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #11 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #12 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #13 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #14 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #15 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #16 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #17 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #18 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #19 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #20 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

    Hint: use TSAN_OPTIONS=second_deadlock_stack=1 to get more informative warning message

  Mutex M2609 acquired here while holding mutex M210 in main thread:
    #0 pthread_mutex_lock <null> (libtsan.so.0+0x3fadb)
    #1 __gthread_mutex_lock /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:748 (dmlc_unit_tests+0x6c2bf)
    #2 __gthread_recursive_mutex_lock /usr/include/x86_64-linux-gnu/c++/7/bits/gthr-default.h:810 (dmlc_unit_tests+0x6c345)
    #3 std::recursive_mutex::lock() <null> (dmlc_unit_tests+0x6fd48)
    #4 std::unique_lock<std::recursive_mutex>::lock() <null> (dmlc_unit_tests+0x75067)
    #5 std::unique_lock<std::recursive_mutex>::unique_lock(std::recursive_mutex&) /usr/include/c++/7/bits/std_mutex.h:197 (dmlc_unit_tests+0x72d04)
    #6 dmlc::ThreadGroup::Thread::get_id() const /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:247 (dmlc_unit_tests+0x709f2)
    #7 dmlc::ThreadGroup::is_this_thread_in() const /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:337 (dmlc_unit_tests+0x715e2)
    #8 dmlc::ThreadGroup::join_all() /home/ubuntu/dmlc-core/include/dmlc/thread_group.h:409 (dmlc_unit_tests+0x71e06)
    #9 Lockfree_ConcurrentQueue_Test::TestBody() /home/ubuntu/dmlc-core/test/unittest/unittest_lockfree.cc:86 (dmlc_unit_tests+0x6cdd7)
    #10 void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2443 (dmlc_unit_tests+0x1597ea)
    #11 void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2479 (dmlc_unit_tests+0x15121b)
    #12 testing::Test::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2517 (dmlc_unit_tests+0x12805b)
    #13 testing::TestInfo::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2693 (dmlc_unit_tests+0x128c34)
    #14 testing::TestCase::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:2811 (dmlc_unit_tests+0x1295be)
    #15 testing::internal::UnitTestImpl::RunAllTests() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:5177 (dmlc_unit_tests+0x1363f3)
    #16 bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x15b18e)
    #17 bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) <null> (dmlc_unit_tests+0x1526bd)
    #18 testing::UnitTest::Run() /home/ubuntu/dmlc-core/build/googletest-src/googletest/src/gtest.cc:4786 (dmlc_unit_tests+0x13479f)
    #19 RUN_ALL_TESTS() /home/ubuntu/dmlc-core/build/googletest-src/googletest/include/gtest/gtest.h:2341 (dmlc_unit_tests+0x8d1f6)
    #20 main /home/ubuntu/dmlc-core/test/unittest/unittest_main.cc:7 (dmlc_unit_tests+0x8d10a)

SUMMARY: ThreadSanitizer: lock-order-inversion (potential deadlock) (/usr/lib/x86_64-linux-gnu/libtsan.so.0+0x3fadb) in __interceptor_pthread_mutex_lock

hcho3 avatar Aug 01 '19 22:08 hcho3

#554 fixes #550 and in addition adds CMake option to compile with Sanitizer.

hcho3 avatar Aug 02 '19 01:08 hcho3