rt_mutex_take函数BUG,多互斥锁嵌套导致 “thread == rt_thread_self()” 检查过不去
使用rtthread 版本 SHA-1: aaf5462c6d4ae9466a0a8cd2a4bb00cfe1fef2f2
我自己测试百分之百复现的测试代码
static rt_mutex_t mutex1 = RT_NULL;
static rt_mutex_t mutex2 = RT_NULL;
static void threadA(void *pvParameter)
{
rt_thread_delay(300);
rt_kprintf("%s 1\n", __func__);
rt_mutex_take(mutex1, RT_WAITING_FOREVER);
rt_kprintf("%s 2\n", __func__);
rt_thread_delay(2000);
rt_kprintf("%s 3\n", __func__);
rt_mutex_release(mutex1);
rt_kprintf("%s 4\n", __func__);
}
static void threadB(void *pvParameter)
{
rt_thread_delay(600);
rt_kprintf("%s 1\n", __func__);
rt_mutex_take(mutex2, RT_WAITING_FOREVER);
rt_mutex_take(mutex1, RT_WAITING_FOREVER);
rt_kprintf("%s 2\n", __func__);
rt_thread_delay(2000);
rt_kprintf("%s 3\n", __func__);
rt_mutex_release(mutex1);
rt_mutex_release(mutex2);
rt_kprintf("%s 4\n", __func__);
}
static void threadC(void *pvParameter)
{
rt_thread_delay(900);
rt_kprintf("%s 1\n", __func__);
rt_mutex_take(mutex2, RT_WAITING_FOREVER);
rt_kprintf("%s 2\n", __func__);
rt_thread_delay(2000);
rt_kprintf("%s 3\n", __func__);
rt_mutex_release(mutex2);
rt_kprintf("%s 4\n", __func__);
}
int main(void)
{
rt_thread_t tid;
mutex1 = rt_mutex_create("mutex1", RT_IPC_FLAG_FIFO);
RT_ASSERT(mutex1);
mutex2 = rt_mutex_create("mutex2", RT_IPC_FLAG_FIFO);
RT_ASSERT(mutex2);
tid = rt_thread_create("threadA",
threadA,
RT_NULL,
2048,
10,
5);
RT_ASSERT(tid != RT_NULL);
rt_thread_startup(tid);
tid = rt_thread_create("threadB",
threadB,
RT_NULL,
2048,
9,
5);
RT_ASSERT(tid != RT_NULL);
rt_thread_startup(tid);
tid = rt_thread_create("threadC",
threadC,
RT_NULL,
2048,
8,
5);
RT_ASSERT(tid != RT_NULL);
rt_thread_startup(tid);
printf("main test ......\n");
while (1)
{
rt_thread_delay(1000);
}
}
控制台问题点输出日志
threadA 1
threadA 2
threadB 1
Enter mutex->owner = threadA
Exit mutex->owner = threadA
threadC 1
Enter mutex->owner = threadB
(thread == rt_thread_self()) assertion failed at function:_rt_thread_suspend, line number:885
Enter mutex->owner = threadA Exit mutex->owner = threadA Enter mutex->owner = threadB
这三行是我自己在内核代码里面加的输出 用于定位,出问题的点。在添加 rt_mutex_take 函数添加两行输出
rt_err_t rt_mutex_take(rt_mutex_t mutex, rt_int32_t timeout)
{
...
if (mutex->owner == thread)
{
...
}
else
{
/* whether the mutex has owner thread. */
if (mutex->owner == RT_NULL)
{
...
}
else
{
/* no waiting, return with timeout */
if (timeout == 0)
{
...
}
else
{
...
/* update the priority level of mutex */
if (priority < mutex->priority)
{
mutex->priority = priority;
if (mutex->priority < mutex->owner->current_priority)
{
if(mutex->owner != rt_thread_self())
rt_kprintf("Enter mutex->owner = %s\n",mutex->owner->name);
_thread_update_priority(mutex->owner, priority);
if(mutex->owner != rt_thread_self())
rt_kprintf("Exit mutex->owner = %s\n",mutex->owner->name);
}
}
...
}
}
}
...
return RT_EOK;
}
问题分析 rt_mutex_take >> _thread_update_priority (ipc.c 1202行调用) >> _ipc_list_suspend(ipc.c 770行调用) >> rt_thread_suspend(ipc.c 129行调用) 到这里已经能看出问题了,rt_thread_suspend 是不允许 挂起其他线程的,只能挂起自己。而经过一串调用,最后传参不是当前线程,导致检查过不去 死锁
补一个问题说明:当我使用三个线程,两个互斥锁的时候,程序老是死锁,日志输出 “(thread == rt_thread_self()) assertion failed at function:_rt_thread_suspend, line number:885”。三个线程 和 两个互斥锁的用法 看上面100% 能复现问题的代码。
@BernardXiong
@mysterywolf
好的 感谢返回 我们看一下~
谢谢,这部分后续也加dlog分析下
谢谢,这部分后续也加dlog分析下
我提交了一个PR来修复这个问题,不知道你们有更好的方法吗
我基于最新的代码测试了一下,好像没有复现。

我基于最新的代码测试了一下,好像没有复现。
我刚用qemu工程测试,问题依旧。测试代码是上面的。你的测试代码不够新,4.1.1是可以的。我现有项目已经退回到4.1.1 工程了。这个问题是后面加入新特性导致的

@BernardXiong
@mysterywolf
你好,这个问题还没有更新吗?我认为这个问题挺严重的,涉及到内核互斥锁的工程,嵌套互斥锁之后很容易出问题。问题只出现在 4.1.1 之后。经过测试4.1.1 是没问题的。
我用最新代码复现出来了
我用最新代码复现出来了
什么时候能解决这个问题那?我曾经提交过一个pr来解决问题。但是那个pr只是简单针对这个问题,可能考虑的不是很全面
感谢 已经有人在看了
查看ipc有更新,master最新代码再次测试未复现:
stm32f103-atk-nano,qemu
