softirq原理以及源码分析-wzzushx-ChinaUnix博客

plainer

首页　| 　博文目录　| 　关于我

wzzushx

博客访问： 830973
博文数量： 247
博客积分： 0
博客等级：民兵
技术积分： 501
用户组：普通用户
注册时间： 2013-07-12 21:53

个人简介

系统未建立

文章分类

全部博文（247）

language（5）

java（4）
arch（1）
opensrc（9）

dbus（3）

lib（4）
code-error（1）
NET（4）

dns（1）
android（1）
eCos（1）
软件工程（7）
timer（4）
linux（174）

memory（1）

tools（4）

技术原理（33）

sched（4）

compile（24）

android（1）

android（0）

cgroups（6）

socket（14）

诊断（5）

cache（5）

内存（14）
signal（0）
基础概念（39）

voip（1）
modutils（0）
powerpc（1）

epapr（1）
未分配的博文（0）

文章存档

2021年（1）

2020年（3）

2019年（5）

2018年（3）

2017年（44）

2016年（75）

2015年（52）

2014年（63）

2013年（1）

我的朋友

相关博文

softirq原理以及源码分析

分类： LINUX

2019-01-25 09:25:44

原文地址：softirq原理以及源码分析作者：liujunwei1234

Linux 的softirq机制是与SMP紧密不可分的。为此，整个softirq机制的设计与实现中自始自终都贯彻了一个思想：“谁触发，谁执行”（Who marks，Who runs），也即触发软中断的那个CPU负责执行它所触发的软中断，而且每个CPU都由它自己的软中断触发与控制机制。这个设计思想也使得softirq 机制充分利用了SMP系统的性能和特点。多个softirq可以并行执行，甚至同一个softirq可以在多个processor上同时执行。

一、softirq的实现

每个softirq在内核中通过struct softirq_action来表示，另外，通过全局属组softirq_vec标识当前内核支持的所有的softirq。

/* softirq mask and active fields moved to irq_cpustat_t in
* asm/hardirq.h to get better cache usage. KAO
*/
struct softirq_action
{
void (*action)(struct softirq_action *);
};
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;

Linux内核最多可以支持32个softirq（思考：为什么是32个？），但当前只实现了10个，如下：

enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};

二、softirq处理函数

struct softirq_action结构体中，只有一个函数指针成员action，即指向用户定义的softirq处理函数。当执行时，可以通过如下代码：

softirq_vec[i]->action(i);

一个注册的softirq在执行之前必须被激活，术语称为"raise the softirq"。被激活的softirq通常并不会立即执行，一般会在之后的某个时刻检查当前系统中是否有被pending的softirq，如果有就去执行，Linux内核中检查是否有softirq挂起的检查点主要有以下三类：

（1）硬件中断代码返回的时候

/*
* Exit an interrupt context. Process softirqs if needed and possible:
*/
void irq_exit(void)
{
account_system_vtime(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
rcu_irq_exit();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
tick_nohz_stop_sched_tick(0);
#endif
preempt_enable_no_resched();
}

（2）ksoftirqd内核服务线程运行的时候

static int run_ksoftirqd(void * __bind_cpu)
{
... ...
while (local_softirq_pending()) {
/* Preempt disable stops cpu going offline.
If already offline, we'll be on wrong CPU:
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
do_softirq();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
rcu_note_context_switch((long)__bind_cpu);
}
preempt_enable();
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
return 0;
... ...
}

（3）在一些内核子系统中显示的去检查挂起的softirq

int netif_rx_ni(struct sk_buff *skb)
{
int err;
preempt_disable();
err = netif_rx(skb);
if (local_softirq_pending())
do_softirq();
preempt_enable();
return err;
}

下面重点分析以下do_softirq()，了解Linux内核到底是怎么来处理softirq的。

asmlinkage void do_softirq(void)
{
unsigned long flags;
struct thread_info *curctx;
union irq_ctx *irqctx;
u32 *isp;
if (in_interrupt()) /*这个函数需要仔细理解???*/
return;
local_irq_save(flags);
if (local_softirq_pending()) {
curctx = current_thread_info();
irqctx = __get_cpu_var(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
call_on_stack(__do_softirq, isp);
/*
* Shouldnt happen, we returned above if in_interrupt():
*/
WARN_ON_ONCE(softirq_count());
}
local_irq_restore(flags);
}

do_softirq主要是完成了以下几个功能：

（1）检查当前processor上是否有pending的softirq

（2）如果有pending的softirq，为softirq的处理建立新的堆栈，即建立新的软中断上下文环境

（3）处理软中断__do_softirq

这里需要重点分析一下in_interrupt（）函数的含义。在linux内核中，为了方便判断当前执行路径在哪个上下文环境中，定义了几个接口：

#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
| NMI_MASK))
/*
* Are we doing bottom half or hardware interrupt processing?
* Are we in a softirq context? Interrupt context?
*/
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
/*
* Are we in NMI context?
*/
#define in_nmi() (preempt_count() & NMI_MASK)

从注释可以看出包括：硬件中断上下文，软件中断上下文，不可屏蔽上下文等。在这些宏中，都涉及到了preempt_count()这个宏，这个宏是一个比较重要的宏，在Linux源码中对其做了详细的注释：

/*
* We put the hardirq and softirq counter into the preemption
* counter. The bitmask has the following meaning:
*
* - bits 0-7 are the preemption count (max preemption depth: 256)
* - bits 8-15 are the softirq count (max # of softirqs: 256)
*
* The hardirq count can in theory reach the same as NR_IRQS.
* In reality, the number of nested IRQS is limited to the stack
* size as well. For archs with over 1000 IRQS it is not practical
* to expect that they will all nest. We give a max of 10 bits for
* hardirq nesting. An arch may choose to give less than 10 bits.
* m68k expects it to be 8.
*
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
* - bit 26 is the NMI_MASK
* - bit 28 is the PREEMPT_ACTIVE flag
*
* PREEMPT_MASK: 0x000000ff
* SOFTIRQ_MASK: 0x0000ff00
* HARDIRQ_MASK: 0x03ff0000
* NMI_MASK: 0x04000000
*/

从注释可以看出，preempt_count各个bit位的含义：

（1）bit0~7位表示抢占计数，即支持最大的抢占深度为256

（2）bit8~15位表示软中断计数，即支持最大的软中断的个数为256，需要注意的是，由于软中断还受制于pending状态，一个32位的变量，因此实际最大只能支持32个软中断。

（3）bit16~25位表示硬件中断嵌套层数，即最大可支持的嵌套层次为1024，实际情况下这是不可能的，因为中断的嵌套层数还受制于中断处理的栈空间的大小。

介绍了这么多，现在了重点分析下上面提到的in_interrupt到底表示什么意思？

#define in_interrupt() (irq_count())
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
| NMI_MASK))

从其宏定义可以看出，in_interrupt宏的值是硬件中断嵌套层数，软中断计数以及可屏蔽中断三者之和。回到do_softirq的代码中，如果in_interrupt的值大于0，就不会处理软中断，意思是当有硬件中断嵌套，其他软中断以及不可屏蔽中断的情况下，不会去处理软中断。对于中断的嵌套层数以及不可屏蔽中断是比较好理解的，对于软中断，应该去分析以下，在什么地方软中断的计数会增加：

__local_bh_disable((unsigned long)__builtin_return_address(0));
static inline void __local_bh_disable(unsigned long ip)
{
add_preempt_count(SOFTIRQ_OFFSET);
barrier();
}
# define add_preempt_count(val) do { preempt_count() += (val); } while (0)

从代码可以看出，禁止中断下半部分的函数会增加软中断的计数，即当有软中断的do_softirq在进行处理时，如果此时被硬件中断打断，而且在硬件中断中又激活了优先级更高的软中断，当硬件中断退出时，那么当再去执行do_softirq时，此时in_interrupt > 0，岂不是死锁了！！！希望大家指教。

实际的处理函数为__do_softirq：

asmlinkage void __do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
int max_restart = MAX_SOFTIRQ_RESTART; /*不启动ksoftirqd之前，最大的处理softirq的次数，经验值*/
int cpu;
/*取得当前被挂起的softirq，同时这里也解释了为什么Linux内核最多支持32个softirq,因为pending只有32bit*/
pending = local_softirq_pending();
account_system_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0));
lockdep_softirq_enter();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);/*获取了pending的softirq之后，清空所有pending的softirq的标志*/
local_irq_enable();
h = softirq_vec;
do {
if (pending & 1) { /*从最低位开始，循环右移逐位处理pending的softirq*/
int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(h - softirq_vec);
trace_softirq_entry(h, softirq_vec);
h->action(h); /*执行softirq的处理函数*/
trace_softirq_exit(h, softirq_vec);
if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %td %s %p"
"with preempt_count %08x,"
" exited with %08x?\n", h - softirq_vec,
softirq_to_name[h - softirq_vec],
h->action, prev_count, preempt_count());
preempt_count() = prev_count;
}
rcu_bh_qs(cpu);
}
h++;
pending >>= 1; /*循环右移*/
} while (pending);
local_irq_disable();
pending = local_softirq_pending();
if (pending && --max_restart) /*启动ksoftirqd的阈值*/
goto restart;
if (pending) /*启动ksoftirqd去处理softirq,此时说明pending的softirq比较多，比较频繁，上面的处理过程中，又不断有softirq被pending*/
wakeup_softirqd();
lockdep_softirq_exit();
account_system_vtime(current);
_local_bh_enable();
｝

三、使用softirq

softirq一般用在对实时性要求比较强的地方，当前的Linux内核中，只有两个子系统直接使用了softirq:网络子系统和块设备子系统。另外，增加新的softirq需要重新编译内核，因此，除非必须需要，最好考虑tasklet和kernel timer是否适合当前需要。

如果必须需要使用softirq，那么需要考虑的一个重要的问题就是新增加的softirq的优先级，默认情况下，softirq的数值越小优先级越高，根据实际经验，新增加的softirq最好在BLOCK_SOFTIRQ和TASKLET_SOFTIRQ之间。

softirq的处理函数通过open_softirq进行注册，此函数接收两个参数，一个是softirq的整数索引，另一个是该softirq对应的处理函数。例如在网络子系统中，注册了如下两个softirq及其处理函数：

open_softirq(NET_TX_SOFTIRQ, net_tx_action);

open_softirq(NET_RX_SOFTIRQ, net_rx_action);

前面提到，软中断处理函数注册后，还需要将该软中断激活，此软中断才能被执行，激活操作是通过raise_softirq函数来实现，在网络子系统中激活代码如下：

/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}

这里的__raise_softirq_irqoff和raise_softirq的区别是，前者在事先已经关中断的情况下可以被使用，后者自己完成中断的关闭和恢复。

阅读(4310) | 评论(0) | 转发(0) |

上一篇：Linux安全模块（LSM）

下一篇：为什么irq绑定单核后NET_RX_SOFTIRQ统计在所有CPU上

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6