- #include<stdio.h>
-
#include<stdlib.h>
-
#include<pthread.h>
-
-
#define USE_SPINLOCK
-
#ifdef USE_SPINLOCK
-
pthread_spinlock_t spinlock;
-
#else
-
pthread_mutex_t mutex;
-
#endif
-
-
-
#define NR_THREAD 2
-
#define MILLION 1000000L
-
#define TIMES 100000
-
#define EXEC_TIMES 1000000
-
-
unsigned long long counter = 0;
-
-
inline int little()
-
{
-
counter++;
-
}
-
-
inline int big()
-
{
-
int j;
-
for(j = 0;j<TIMES;j++)
-
{
- counter++;
-
}
-
}
-
-
void * worker(void* arg)
-
{
-
int i;
-
for(i = 0;i<EXEC_TIMES;i++)
-
{
-
#ifdef USE_SPINLOCK
-
pthread_spin_lock(&spinlock);
-
#else
-
pthread_mutex_lock(&mutex);
-
#endif
-
-
little();
-
//big();
-
-
#ifdef USE_SPINLOCK
-
pthread_spin_unlock(&spinlock);
-
#else
-
pthread_mutex_unlock(&mutex);
-
#endif
-
-
}
-
-
return NULL;
-
}
-
int main()
-
{
-
int i;
-
struct timeval tv_start,tv_end;
-
unsigned long long interval = 0;
-
#ifdef USE_SPINLOCK
-
pthread_spin_init(&spinlock,0);
-
#else
-
pthread_mutex_init(&mutex,NULL);
-
#endif
-
pthread_t Tid[NR_THREAD];
-
-
gettimeofday(&tv_start,NULL);
-
for(i = 0;i<NR_THREAD;i++)
-
{
- if(pthread_create(&Tid[i],NULL,worker,NULL) != 0)
-
{
- fprintf(stderr,"pthread create failed
- when i = %d\n",i);
- return -1;
-
}
-
}
-
-
for(i = 0;i<NR_THREAD;i++)
-
{
- if(pthread_join(Tid[i],NULL))
-
{
- fprintf(stderr,"pthread join failed
- when i = %d\n",i);
- return -2;
-
}
-
}
-
-
gettimeofday(&tv_end,NULL);
- interval = MILLION*(tv_end.tv_sec - tv_start.tv_sec )
-
+ (tv_end.tv_usec - tv_start.tv_usec);
-
-
#ifdef USE_SPINLOCK
-
fprintf(stderr,"thread num %d spinlock version
- cost time %llu\n",NR_THREAD,interval);
-
#else
-
fprintf(stderr,"thread num %d mutex version
- cost time %llu\n",NR_THREAD,interval);
-
#endif
-
-
return 0;
-
}
1 临界区小,线程个数为2
- root@libin:~/program/C/thread/thread_lock_cmp# time ./mutex_2_comp
-
thread num 2 mutex version cost time 193155
-
-
real 0m0.195s
-
user 0m0.208s
-
sys 0m0.172s
-
root@libin:~/program/C/thread/thread_lock_cmp# time ./spinlock_2_comp
-
thread num 2 spinlock version cost time 179761
-
-
real 0m0.181s
-
user 0m0.360s
-
sys 0m0.000s
性能上看差不多,这是由于线程数比较小,竞争不激烈。关注下sys 时间,mutex锁版本的时间大,因为它会存在争不到锁而调用system wait情况。
2 临界区小,线程个数为10
- root@libin:~/program/C/thread/thread_lock_cmp# time ./mutex_10_comp
-
thread num 10 mutex version cost time 1456112
-
-
real 0m1.458s
-
user 0m1.840s
-
sys 0m3.808s
-
root@libin:~/program/C/thread/thread_lock_cmp# time ./spinlock_10_comp
-
thread num 10 spinlock version cost time 2425690
-
-
real 0m2.427s
-
user 0m9.577s
-
sys 0m0.016s
-
root@libin:~/program/C/thread/thread_lock_cmp#
看下10个线程的情况,自旋锁性能已经明显不如线程锁了。因为竞争变得激烈了。我使用systemtap观察了进程调度的频繁程度,每秒统计一次上下文切换的次数
- root@libin:~/program/systemtap# cat sched.stp
-
global cnt;
-
probe scheduler.cpu_on {cnt<<<1;}
-
-
probe timer.s(1){printf("%d\n", @count(cnt)); delete cnt;}
-
-
probe timer.s(40){exit();}
-
root@libin:~/program/systemtap#
线程锁上下文切换的情况:
- 2393
-
2275
-
2156
-
122098
-
72827
-
2741
-
4760
-
3159
看到中间有两个比较大的值,就是因为我执行了mutex版本的程序,而程序执行时间只有1.5秒左右,所以只有两个比较大的值。这就证明了mutex锁存在激烈竞争的情况下,会出现大量的上下文切换。
自旋锁版本执行期间,上下文切换没有明显变化,表明自旋锁不会引发上下文切换。它原地死循环。
3 临界区小,竞争特别激烈 100个线程。
先说mutex锁的情况:
- root@libin:~/program/C/thread/thread_lock_cmp# time ./mutex_100_comp
-
thread num 100 mutex version cost time 15101059
-
-
real 0m15.103s
-
user 0m18.337s
-
sys 0m40.827s
执行systemtap脚本的输出:
- 3567
-
2245
-
2291
-
82863
-
122166
-
110381
-
126612
-
126960
-
124175
-
126085
-
126417
-
120905
-
119271
-
120717
-
125181
-
124713
-
126694
-
125177
-
51845
-
4633
-
2633
就像10个线程的情况一样,在执行mutex版本期间,发生了大量的上下文切换。
top的输出如下:
- top - 12:46:38 up 2:27, 4 users, load average: 16.72, 7.52, 3.88
-
Tasks: 223 total, 3 running, 220 sleeping, 0 stopped, 0 zombie
-
Cpu0 : 35.0%us, 64.7%sy, 0.0%ni, 0.3%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
-
Cpu1 : 32.4%us, 67.0%sy, 0.3%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.3%si, 0.0%st
-
Cpu2 : 35.0%us, 65.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
-
Cpu3 : 34.8%us, 64.5%sy, 0.0%ni, 0.3%id, 0.0%wa, 0.0%hi, 0.3%si, 0.0%st
-
Mem: 1985648k total, 1441328k used, 544320k free, 110548k buffers
-
Swap: 1951736k total, 0k used, 1951736k free, 521980k cached
-
-
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
-
5774 root 20 0 802m 860 368 S 392 0.0 0:39.27 mutex_100_comp
可以看出,系统时间占60%以上,这是因为有进程调度。
下面看自旋锁,自旋锁就比较悲惨了,我起来自旋锁版本后,先去泡了壶茶,太慢了。
- root@libin:~/program/C/thread/thread_lock_cmp# time ./spinlock_100_comp
-
thread num 100 spinlock version cost time 233026239
-
-
real 3m53.028s
-
user 15m18.985s
-
sys 0m1.712s
上下文调度的情况我就不贴了,没有超3000次/s的。
贴下top的情况
- top - 12:49:45 up 2:30, 4 users, load average: 45.98, 15.50, 7.02
-
Tasks: 230 total, 1 running, 229 sleeping, 0 stopped, 0 zombie
-
Cpu0 :100.0%us, 0.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
-
Cpu1 : 99.4%us, 0.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.6%si, 0.0%st
-
Cpu2 :100.0%us, 0.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
-
Cpu3 : 98.4%us, 1.3%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.3%si, 0.0%st
-
Mem: 1985648k total, 1471804k used, 513844k free, 111164k buffers
-
Swap: 1951736k total, 0k used, 1951736k free, 523212k cached
-
-
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
-
5875 root 20 0 802m 860 368 S 395 0.0 2:24.78 spinlock_100_co
程序执行期间,CPU被浪费,我执行其他的任务,电脑特别的卡,卡的不能忍受。
临界区大的情况我就不继续写了。有兴趣的同学可以自己测试一下:
结论:
1 自旋锁适用于竞争不激烈,线程数较少,并且临界区小的情况。
2 线程锁竞争激烈的情况下,引发大量的上下文切换。所以由于竞争的存在,并不是线程愈多,效率越高。
3 保险情况下使用线程锁,因为,极端情况下,自旋锁不停的自旋,浪费CPU,影响效率。
参考文献:
1 Pthreads并行编程之spin lock与mutex性能对比分析
2 latencytop深度了解你的Linux系统的延迟
3 UNIX系统编程