rt-thread icon indicating copy to clipboard operation
rt-thread copied to clipboard

Whether to consider adding "rt_hw_spin_trylock" function before spinlock to avoid CPU busy waiting?【是否考虑在spinlock之前先尝试获取锁trylock,避免CPU忙等】

Open brainengineerX opened this issue 2 years ago • 3 comments

The Linux kernel spinlock will try to lock before each spinlock. This attempt to lock can bring performance gains under fierce lock competition. Refer to the following kernel code:

static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
	unsigned long contended, res;
	u32 slock;

	prefetchw(&lock->slock);
	do {
		__asm__ __volatile__(
		"	ldrex	%0, [%3]\n"
		"	mov	%2, #0\n"
		"	subs	%1, %0, %0, ror #16\n"
		"	addeq	%0, %0, %4\n"
		"	strexeq	%2, %0, [%3]"
		: "=&r" (slock), "=&r" (contended), "=&r" (res)
		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
		: "cc");
	} while (res);

	if (!contended) {
		smp_mb();
		return 1;
	} else {
		return 0;
	}
}

brainengineerX avatar Dec 07 '23 03:12 brainengineerX

Thanks. 有什么方法可以做改动后的性能测试方法吗?

BernardXiong avatar Dec 10 '23 07:12 BernardXiong

此函数在aarch64上已经有了,只不过其他架构还未添加

xqyjlj avatar Dec 11 '23 08:12 xqyjlj

Thanks. 有什么方法可以做改动后的性能测试方法吗?

多线程同时调同一把锁,用计时器观察不同情况下完成任务的时间。 也可以调整临界区的占用时长,模拟不同大小临界区情况下,锁的表现性能; 锁可以增加计数器来观察锁的竞争情况、失败情况、等待时长 以下是测试锁的例子

#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>

std::mutex myMutex;
int sharedVariable = 0;

void threadFunction(int competitionCount, int criticalSectionDuration, int lockYieldTime) {
    for (int i = 0; i < competitionCount; ++i) {
        // 加锁
        std::lock_guard<std::mutex> lock(myMutex);

        // 临界区(用 sleep 模拟)
        std::this_thread::sleep_for(std::chrono::milliseconds(criticalSectionDuration));

        // 锁出让时间
        std::this_thread::yield();
        std::this_thread::sleep_for(std::chrono::milliseconds(lockYieldTime));
    }
}

int main() {
    const int competitionCount = 1000;
    const int criticalSectionDuration = 10;  // 毫秒为单位
    const int lockYieldTime = 5;  // 毫秒为单位
    const int numThreads = std::thread::hardware_concurrency();

    auto startTime = std::chrono::steady_clock::now();

    std::vector<std::thread> threads;
    threads.reserve(numThreads);

    for (int i = 0; i < numThreads; ++i) {
        threads.emplace_back(threadFunction, competitionCount, criticalSectionDuration, lockYieldTime);
    }

    for (auto& thread : threads) {
        thread.join();
    }

    auto endTime = std::chrono::steady_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();

    std::cout << "总执行时间:" << duration << " 毫秒。" << std::endl;

    return 0;
}

可以参考下Linux的spinlock_api_smp.h:


#define LOCK_CONTENDED(_lock, try, lock)			\
do {								\
	if (!try(_lock)) {					\
		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
		lock(_lock);					\
	}							\
	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
} while (0)

....

static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
	unsigned long flags;

	local_irq_save(flags);
	preempt_disable();
	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
	return flags;
}

static inline void __raw_spin_lock_irq(raw_spinlock_t *lock)
{
	local_irq_disable();
	preempt_disable();
	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}

static inline void __raw_spin_lock_bh(raw_spinlock_t *lock)
{
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}

static inline void __raw_spin_lock(raw_spinlock_t *lock)
{
	preempt_disable();
	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
}

LCOK_CONTENDED自旋前都会trylock

brainengineerX avatar Jan 03 '24 07:01 brainengineerX