喜欢coding,因为那是一件伟大的事情,是将无生命的IC赋予灵魂的过程,让我拥有了和上帝一样的成就感。(w1c2g3@163.com)
全部博文(60)
分类: LINUX
2013-11-01 10:47:29
spin_lock是linux中最简单的锁机制,semaphore、mutex、seqlock都是基于spin_lock来实现。
spin_lock包括两个基本操作spin_lock、spin_unlock,以arm为例,看下其具体实现:
[include/linux/spinlock_types.h]
- typedef struct spinlock {
- union {
- struct raw_spinlock rlock;
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- # define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
- struct {
- u8 __padding[LOCK_PADSIZE];
- struct lockdep_map dep_map;
- };
- #endif
- };
- } spinlock_t;
先不care debug的部分,那么spinlock只有一个成员rlock,其type为raw_spinlock:
[include/linux/spinlock_types.h]
同样,raw_spinlock中只有一个成员raw_lock,其type为arch_spinlock_t:
- typedef struct raw_spinlock {
- arch_spinlock_t raw_lock;
- #ifdef CONFIG_GENERIC_LOCKBREAK
- unsigned int break_lock;
- #endif
- #ifdef CONFIG_DEBUG_SPINLOCK
- unsigned int magic, owner_cpu;
- void *owner;
- #endif
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
- #endif
- } raw_spinlock_t;
[arch/arm/include/asm/spinlock_types.h]
arch_spinlock_t是架构相关的type,其中包括slock和__raw_tickets两个成员。
- typedef struct {
- union {
- u32 slock;
- struct __raw_tickets {
- #ifdef __ARMEB__
- u16 next;
- u16 owner;
- #else
- u16 owner;
- u16 next;
- #endif
- } tickets;
- };
- } arch_spinlock_t;
__ARMEB__代表Big-Endian,这样slock被划分成两部分:
u32 slock
----------------------------------------------------------
| u16 next | u16 owner |
----------------------------------------------------------
bit31 bit16 bit15 bit0
初始化spinlock
[include/linux/spinlock.h]
- #define spin_lock_init(_lock) \
- do { \
- spinlock_check(_lock); \
- raw_spin_lock_init(&(_lock)->rlock); \
- } while (0)
spinlock_check是用来check参数是否是spinlock_t类型,通过rlock成员变量:
[include/linux/spinlock.h][include/linux/spinlock.h]
- static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
- {
- return &lock->rlock;
- }
[include/linux/spinlock_types.h]
- # define raw_spin_lock_init(lock) \
- do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
- #endif
[include/linux/spinlock_types.h]
- #define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
[arch/arm/include/asm/spinlock_types.h]
- #define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
- { \
- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
- SPIN_DEBUG_INIT(lockname) \
- SPIN_DEP_MAP_INIT(lockname) }
- #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
初始化完成后,slock = tickets.next = tickets.owner = 0
[include/linux/spinlock.h]
[include/linux/spinlock.h]
- static inline void spin_lock(spinlock_t *lock)
- {
- raw_spin_lock(&lock->rlock);
- }
[kernel/spinlock.c]
- #define raw_spin_lock(lock) _raw_spin_lock(lock)
[include/linux/spinlock_api_smp.h]
- void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
- {
- __raw_spin_lock(lock);
- }
- EXPORT_SYMBOL(_raw_spin_lock);
- static inline void __raw_spin_lock(raw_spinlock_t *lock)
- {
- preempt_disable();
- spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
- }
call到__raw_spin_lock后,首先会利用preempt_disable关闭调度器的抢占性,原因是避免在当前进程持有lock的情况下,CPU被中断后,会调到执行另一个进程,这样,当另一进程也试图spin_lock,因为spin_lock本身不会sleep,就会产生死锁现象。所以要暂时关闭调度器抢占性。
[include/linux/lockdep.h]
- ifdef CONFIG_DEBUG_LOCK_ALLOC
- # ifdef CONFIG_PROVE_LOCKING
- # define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
- # define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
- # else
- # define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
- # define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, NULL, i)
- # endif
- # define spin_release(l, n, i) lock_release(l, n, i)
- #else
- # define spin_acquire(l, s, t, i) do { } while (0)
- # define spin_release(l, n, i) do { } while (0)
- #endif
spin_acquire是供debug时使用,正常时定义为空。
[include/linux/lockdep.h]
- #define LOCK_CONTENDED(_lock, try, lock) \
- do { \
- if (!try(_lock)) { \
- lock_contended(&(_lock)->dep_map, _RET_IP_); \
- lock(_lock); \
- } \
- lock_acquired(&(_lock)->dep_map, _RET_IP_); \
- } while (0)
- #else /* CONFIG_LOCK_STAT */
- #define lock_contended(lockdep_map, ip) do {} while (0)
- #define lock_acquired(lockdep_map, ip) do {} while (0)
- #define LOCK_CONTENDED(_lock, try, lock) \
- lock(_lock)
- #endif /* CONFIG_LOCK_STAT */
LOCK_CONTENDED macro会依照CONFIG_LOCK_STAT有两种不同的定义:
Ifdef CONFIG_LOCK_STAT
首先会利用do_raw_spin_trylock来尝试lock一次,如果没有拿到的话,再调用do_raw_spin_lock来进行加lock。
Ifndef CONFIG_LOCK_STAT
直接调用do_raw_spin_lock来进行加lock。
在目前arm config里,没有定义CONFIG_LOCK_STAT,所以,我们直接跳到do_raw_spin_lock。
[include/linux/spinlock.h]
[arch/arm/include/asm/spinlock.h]
- static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
- {
- __acquire(lock);
- arch_spin_lock(&lock->raw_lock);
- }
- static inline void arch_spin_lock(arch_spinlock_t *lock)
- {
- unsigned long tmp;
- u32 newval;
- arch_spinlock_t lockval;
- __asm__ __volatile__(
- "1: ldrex %0, [%3]\n"
- " add %1, %0, %4\n"
- " strex %2, %1, [%3]\n"
- " teq %2, #0\n"
- " bne 1b"
- : "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
- : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
- : "cc");
- while (lockval.tickets.next != lockval.tickets.owner) {
- wfe();
- lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
- }
- smp_mb();
- }
arch_spin_lock的核心部分是通过内嵌汇编的方式完成的,因为要保证指令的原子性。其中,
%0 -> lockval
%1 -> newval
%2 -> tmp
%3 -> &lock->slock
%4 -> 1 << TICKET_SHIFT = (1<<16)
line 8:lockval = *lock
line 9:newval = lockval->slock + (1<<16)
line 10:这是整个函数的重点,若是exclusive access 则将tmp 设为 0 并且把newval存储到lock->slock,若是 non-exclusive access(有其他 CPU 来动过) 则 tmp设为 1并且不做存储lock->slock的动作。
line 11:判断tmp是否为0,置cpsr->Z
line 12:如果cpsr->Z不为0,即tmp != 0,jump to tag 1
在成功update lock->slock之后,因为union的关系,lock->tickets.next会增加1。因为此时lockval.tickets.next = lockval.tickets.owner = 0,所以没有走进while。
smp_mb仅仅用于保证SMP 的memory读写顺序。
[include/linux/spinlock.h]
[include/linux/spinlock.h]
- static inline void spin_unlock(spinlock_t *lock)
- {
- raw_spin_unlock(&lock->rlock);
- }
[kernel/spinlock.c]
- #define raw_spin_unlock(lock) _raw_spin_unlock(lock)
[include/linux/spinlock_api_smp.h]
- void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
- {
- __raw_spin_unlock(lock);
- }
- EXPORT_SYMBOL(_raw_spin_unlock);
- #endif
- static inline void __raw_spin_unlock(raw_spinlock_t *lock)
- {
- spin_release(&lock->dep_map, 1, _RET_IP_);
- do_raw_spin_unlock(lock);
- preempt_enable();
- }
spin_release用于debug,暂时不care。preempt_enable是重新打开调度器的抢占特性。
真正unlock的是do_raw_spin_unlock:
[include/linux/spinlock.h]
[arch/arm/include/asm/spinlock.h]
- static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
- {
- arch_spin_unlock(&lock->raw_lock);
- __release(lock);
- }
- static inline void arch_spin_unlock(arch_spinlock_t *lock)
- {
- smp_mb();
- lock->tickets.owner++;
- dsb_sev();
- }
arch_spin_unlock里仅仅将lock->tickets.owner+1。
dsb_sev中也会内嵌汇编实现:
[arch/arm/include/asm/spinlock.h]
- static inline void dsb_sev(void)
- {
- #if __LINUX_ARM_ARCH__ >= 7
- __asm__ __volatile__ (
- "dsb\n"
- SEV
- );
- #else
- __asm__ __volatile__ (
- "mcr p15, 0, %0, c7, c10, 4\n"
- SEV
- : : "r" (0)
- );
- #endif
- }
对于armv7架构,是调用dsb命令,保证它执行完毕后,才会执行程序中位于此指令后的指令。
可以看出,如果有其他的task再次调用spin_lock,就会因为lock时的lockval.tickets.next++,
从而,lockval.tickets.next != lockval.tickets.owner一直在loop中。直到持锁的task调用spin_unlock,lockval.tickets.owner++后,才会跳出loop。
这个机制是在Linux-2.6.25中加入内核,Linux-3.6中加入ARM架构的,也就是接下来介绍的,大名鼎鼎的 Ticket spinlocks。
Kernel中还提供了一种无等待的lock api,称为spin_trylock:
[include/linux/spinlock.h]
[include/linux/spinlock.h]
- static inline int spin_trylock(spinlock_t *lock)
- {
- return raw_spin_trylock(&lock->rlock);
- }
[kernel/spinlock.c]
- #define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock))
[include/linux/spinlock_api_smp.h]
- int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
- {
- return __raw_spin_trylock(lock);
- }
- EXPORT_SYMBOL(_raw_spin_trylock);
- static inline int __raw_spin_trylock(raw_spinlock_t *lock)
- {
- preempt_disable();
- if (do_raw_spin_trylock(lock)) {
- spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
- return 1;
- }
- preempt_enable();
- return 0;
- }
__raw_spin_trylock会在禁止调度器抢占的条件下运行,调用do_raw_spin_trylock来获取spinlock:
[include/linux/spinlock.h]
[arch/arm/include/asm/spinlock.h]
- static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
- {
- return arch_spin_trylock(&(lock)->raw_lock);
- }
- static inline int arch_spin_trylock(arch_spinlock_t *lock)
- {
- unsigned long contended, res;
- u32 slock;
- do {
- __asm__ __volatile__(
- " ldrex %0, [%3]\n"
- " mov %2, #0\n"
- " subs %1, %0, %0, ror #16\n"
- " addeq %0, %0, %4\n"
- " strexeq %2, %0, [%3]"
- : "=&r" (slock), "=&r" (contended), "=&r" (res)
- : "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
- : "cc");
- } while (res);
- if (!contended) {
- smp_mb();
- return 1;
- } else {
- return 0;
- }
- }
同样,会调到汇编来实现原子操作,变量标号:
%0 -> slock
%1 -> contended
%2 -> res
%3 -> &lock->slock
%4 -> 1 << TICKET_SHIFT = (1<<16)
line 8:slock = lock->slock
line 9:res = 0
line 10:contended = lockval.tickets.owner - lockval.tickets.next
line 11: if(contended == 0) slock.tickets.next++
line 12: 若是exclusive access 则将res 设为 0 并且把局部变量slock存储到lock->slock,若是 non-exclusive access(有其他 CPU 来动过) 则res设为 1并且不做存储lock->slock的动作。
line 16: 若是 non-exclusive access,do these again
如果成功lock返回1,失败返回0。
在arm架构,linux-3.6之前版本的kernel中,spinlock是通过一个整型数值来表示。值0表示lock可用。spin_lock通过原子方式设置这个数值为1,表示lock已被占用,如果有其他cpu想要再次lock,会一直循环check这个数值。当spin_unlock时,会将数值置0。假设有多个CPU在等待lock,并不能保证等待最久的CPU可以最先拿到lock。这就是传统spin_lock的“不公平”性。
传统自旋锁的“不公平”问题在锁竞争激烈的服务器系统中尤为严重,因此 Linux 内核开发者 Nick Piggin 在 Linux 内核 中引入了排队自旋锁:通过保存执行线程申请锁的顺序信息来解决“不公平”问题。
排队自旋锁仍然使用原有的 raw_spinlock_t 数据结构,但是赋予 slock 域新的含义。为了保存顺序信息,slock 域被分成两部分,分别保存锁持有者和未来锁申请者的票据序号(Ticket Number),如下图所示:
u32 slock
----------------------------------------------------------
| u16 next | u16 owner |
----------------------------------------------------------
bit31 bit16 bit15 bit0
owner 和 next 域均为 16 位,其中 owner 域为 slock 的低 16 位。可见排队自旋锁最多支持 2^16=65536 个处理器。
只有 next 域与 owner 域相等时,才表明锁处于未使用状态(此时也无人申请该锁)。排队自旋锁初始化时 slock 被置为 0,即 owner 和 next 置为0。内核执行线程申请自旋锁时,原子地将 next 域加 1,并将原值返回作为自己的票据序号。如果返回的票据序号等于申请时的 owner 值,说明自旋锁处于未使用状态,则直接获得锁;否则,该线程忙等待检查 owner 域是否等于自己持有的票据序号,一旦相等,则表明锁轮到自己获取。线程释放锁时,原子地将 owner 域加 1 即可,下一个线程将会发现这一变化,从忙等待状态中退出。线程将严格地按照申请顺序依次获取排队自旋锁,从而完全解决了“不公平”问题。
[1] 《深入Linux设备驱动程序内核机制》
[2] 《Linux Kernel Development》3rd Edition
[3]
[4] http://www.ibm.com/developerworks/cn/linux/l-cn-spinlock/