1. Linux hrtimer的实现方案
Linux hrtimer的实现是依赖硬件(通过可编程定时器来实现)的支持的,而且此定时器有自己的专用寄存器, 硬中断和频率。比如我的板子上的对应参数如下:
Timer at Vir:0xE0100200 = Phy:0xE0100200, using Irq:27, at Freq:250000000,由此可见,其频率为250MHz,所以其精度为:1/250000000=4ns,比系统时钟jiffy(HZ=100,精度为10ms)的精度高得太多了。可是支持此高精度timer是需要付出硬件成本的。即它是一个硬件时钟。这里所说的硬件时钟特指的是硬件计时器时钟。
2. 硬件时钟 数据结构
和硬件计时器(本文又称作硬件时钟,区别于软件时钟)相关的数据结构主要有两个:
struct clocksource :对硬件设备的抽象,描述时钟源信息
- struct clocksource {
-
-
-
- char *name;
- struct list_head list;
- int rating;
- cycle_t (*read)(struct clocksource *cs);
- int (*enable)(struct clocksource *cs);
- void (*disable)(struct clocksource *cs);
- cycle_t mask;
- u32 mult;
- u32 shift;
- u64 max_idle_ns;
- unsigned long flags;
- cycle_t (*vread)(void);
- void (*suspend)(struct clocksource *cs);
- void (*resume)(struct clocksource *cs);
- #ifdef CONFIG_IA64
- void *fsys_mmio;
- #define CLKSRC_FSYS_MMIO_SET(mmio, addr) ((mmio) = (addr))
- #else
- #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0)
- #endif
-
-
-
-
-
- cycle_t cycle_last ____cacheline_aligned_in_smp;
- #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
-
- struct list_head wd_list;
- cycle_t wd_last;
- #endif
- };
struct clocksource {
/*
* First part of structure is read mostly
*/
char *name;
struct list_head list;
int rating;
cycle_t (*read)(struct clocksource *cs);
int (*enable)(struct clocksource *cs);
void (*disable)(struct clocksource *cs);
cycle_t mask;
u32 mult;
u32 shift;
u64 max_idle_ns;
unsigned long flags;
cycle_t (*vread)(void);
void (*suspend)(struct clocksource *cs);
void (*resume)(struct clocksource *cs);
#ifdef CONFIG_IA64
void *fsys_mmio; /* used by fsyscall asm code */
#define CLKSRC_FSYS_MMIO_SET(mmio, addr) ((mmio) = (addr))
#else
#define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0)
#endif
/*
* Second part is written at each timer interrupt
* Keep it in a different cache line to dirty no
* more than one cache line.
*/
cycle_t cycle_last ____cacheline_aligned_in_smp;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
/* Watchdog related data, used by the framework */
struct list_head wd_list;
cycle_t wd_last;
#endif
};
struct clock_event_device :时钟的事件信息,包括当硬件时钟中断发生时要执行那些操作(实际上保存了相应函数的指针)。本文将该结构称作为“时钟事件设备”。
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- struct clock_event_device {
- const char *name;
- unsigned int features;
- u64 max_delta_ns;
- u64 min_delta_ns;
- u32 mult;
- u32 shift;
- int rating;
- int irq;
- const struct cpumask *cpumask;
- int (*set_next_event)(unsigned long evt,
- struct clock_event_device *);
- void (*set_mode)(enum clock_event_mode mode,
- struct clock_event_device *);
- void (*event_handler)(struct clock_event_device *);
- void (*broadcast)(const struct cpumask *mask);
- struct list_head list;
- enum clock_event_mode mode;
- ktime_t next_event;
- unsigned long retries;
- };
/**
* struct clock_event_device - clock event device descriptor
* @name: ptr to clock event name
* @features: features
* @max_delta_ns: maximum delta value in ns
* @min_delta_ns: minimum delta value in ns
* @mult: nanosecond to cycles multiplier
* @shift: nanoseconds to cycles divisor (power of two)
* @rating: variable to rate clock event devices
* @irq: IRQ number (only for non CPU local devices)
* @cpumask: cpumask to indicate for which CPUs this device works
* @set_next_event: set next event function
* @set_mode: set mode function
* @event_handler: Assigned by the framework to be called by the low
* level handler of the event source
* @broadcast: function to broadcast events
* @list: list head for the management code
* @mode: operating mode assigned by the management code
* @next_event: local storage for the next event in oneshot mode
* @retries: number of forced programming retries
*/
struct clock_event_device {
const char *name;
unsigned int features;
u64 max_delta_ns;
u64 min_delta_ns;
u32 mult;
u32 shift;
int rating;
int irq;
const struct cpumask *cpumask;
int (*set_next_event)(unsigned long evt,
struct clock_event_device *);
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
void (*event_handler)(struct clock_event_device *);
void (*broadcast)(const struct cpumask *mask);
struct list_head list;
enum clock_event_mode mode;
ktime_t next_event;
unsigned long retries;
};
上述两个结构内核源代码中有较详细的注解,分别位于文件 clocksource.h 和 clockchips.h 中。需要特别注意的是结构 clock_event_device 的成员 event_handler ,它指定了当硬件时钟中断发生时,内核应该执行那些操作,也就是真正的时钟中断处理函数。
Linux 内核维护了两个链表,分别存储了系统中所有时钟源的信息和时钟事件设备的信息。这两个链表的表头在内核中分别是 clocksource_list 和 clockevent_devices 。
3. hrtimer是如何实现的呢?
下文就为之一一描述。
3.1 初始化hrtimer硬件定时器
3.1.1 设置硬件中断
前面已经看到,它有一个硬件中断,为了使此硬件中断能正常工作,肯定需要设置一个硬件中断,其参考代码如下:
- static unsigned long my_timer_irqnbr = 25;
- static struct irqaction my_timer_irqaction = {
- .name = "My HrTimer",
- .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
- .handler = my_timer_interrupt_handler,
- };
- setup_irq(my_timer_irqnbr, &my_timer_irqaction);
static unsigned long my_timer_irqnbr = 25; //硬件中断号
static struct irqaction my_timer_irqaction = {
.name = "My HrTimer",
.flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
.handler = my_timer_interrupt_handler, //中断处理函数
};
setup_irq(my_timer_irqnbr, &my_timer_irqaction);
设置中断之后,中断处理函数也有了。
3.1.2 初始化硬件时钟相关寄存器并注册此硬件时钟到系统中
- static struct clocksource myclocksource = {
- .name = "my_hrtimer_src",
- .rating = 300,
- .read = my_get_cycles,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- };
- static void __init my_clocksource_init(void)
- {
- unsigned long ctrl = 0;
- unsigned long count = (my_timer_freq / HZ);
- ...
- writel(count, my_timer_vaddr + MY_TIMER_COMPARATOR_LOW);
- writel(count, my_timer_vaddr + MY_TIMER_AUTO_INCREMENT);
- ctrl = (MY_TIMER_CTRL_IRQ_ENA | MY_TIMER_CTRL_COMP_ENA |
- MY_TIMER_CTRL_TIMER_ENA | MY_TIMER_CTRL_AUTO_INC);
- writel(ctrl, my_timer_vaddr + MY_TIMER_CONTROL);
- ...
- clocksource_calc_mult_shift(&myclocksource, my_timer_freq, 4);
-
- clocksource_register(&myclocksource);
- }
static struct clocksource myclocksource = {
.name = "my_hrtimer_src",
.rating = 300,
.read = my_get_cycles, //读取COUNT寄存器以获取cycle value
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static void __init my_clocksource_init(void)
{
unsigned long ctrl = 0;
unsigned long count = (my_timer_freq / HZ);
...
writel(count, my_timer_vaddr + MY_TIMER_COMPARATOR_LOW);
writel(count, my_timer_vaddr + MY_TIMER_AUTO_INCREMENT);
ctrl = (MY_TIMER_CTRL_IRQ_ENA | MY_TIMER_CTRL_COMP_ENA |
MY_TIMER_CTRL_TIMER_ENA | MY_TIMER_CTRL_AUTO_INC);
writel(ctrl, my_timer_vaddr + MY_TIMER_CONTROL);
...
clocksource_calc_mult_shift(&myclocksource, my_timer_freq, 4);
//向系统注册我的硬件时钟,即把它加入clocksource_list
clocksource_register(&myclocksource);
}
3.1.3 初始化时钟事件设备并注册到系统中
- static struct clock_event_device myclockevent = {
- .name = "my_timer_evt",
- .features = CLOCK_EVT_FEAT_PERIODIC,
- .set_mode = my_set_mode,
- .set_next_event = my_set_next_event,
- .rating = 300,
- .cpumask = cpu_all_mask,
- };
- static void __init my_clockevents_init(unsigned int timer_irq)
- {
- myclockevent.irq = timer_irq;
- clockevents_calc_mult_shift(&myclockevent, my_timer_freq, 4);
- myclockevent.max_delta_ns = clockevent_delta2ns(0xffffffff, &myclockevent);
- myclockevent.min_delta_ns = clockevent_delta2ns(0xf, &myclockevent);
-
- clockevents_register_device(&myclockevent);
- }
static struct clock_event_device myclockevent = {
.name = "my_timer_evt",
.features = CLOCK_EVT_FEAT_PERIODIC,
.set_mode = my_set_mode, //通过写寄存器设置clock_event_mode
.set_next_event = my_set_next_event, // 通过写寄存器写下一个事件
.rating = 300,
.cpumask = cpu_all_mask,
};
static void __init my_clockevents_init(unsigned int timer_irq)
{
myclockevent.irq = timer_irq;
clockevents_calc_mult_shift(&myclockevent, my_timer_freq, 4);
myclockevent.max_delta_ns = clockevent_delta2ns(0xffffffff, &myclockevent);
myclockevent.min_delta_ns = clockevent_delta2ns(0xf, &myclockevent);
//注册我的时钟事件设备,即把它加入clockevent_devices链表
clockevents_register_device(&myclockevent);
}
3.2 硬件中处理函数my_timer_interrupt_handler
- static irqreturn_t my_timer_interrupt_handler(int irq, void *dev_id)
- {
- struct clock_event_device *evt = &myclockevent;
-
- writel(value, register_addr);
- evt->event_handler(evt);
- return IRQ_HANDLED;
- }
static irqreturn_t my_timer_interrupt_handler(int irq, void *dev_id)
{
struct clock_event_device *evt = &myclockevent;
/* clear the interrupt */
writel(value, register_addr);
evt->event_handler(evt);
return IRQ_HANDLED;
}
硬件中断处理函数很简单,它直接调用clockevent的event_handler函数。前面的初始化中并没有初始化此event_handler,很显然是在使用过程中进行动态初始化的。下面看看hrtimer中是如何初始化此event_handler的。
4. hrtimer如何初始化clock_event_device的event_handler?
hrtimer的中断处理函数,很自然地想到了hrtimer_interrupt,哪这个东东与clock_event_device有关系吗?
此软中断TIMER_SOFTIRQ在run_local_timers函数中通过调用raise_softirq(TIMER_SOFTIRQ);来触发。(注:raise_softirq->raise_softirq_irqoff->__raise_softirq_irqoff)
init_timers(中调用open_softirq(TIMER_SOFTIRQ, run_timer_softirq);)
run_timer_softirq->
hrtimer_run_pending(Called from timer softirq every jiffy, expire hrtimers,check如果hrtimer_hres_enabled is on<=1>,则执行下面的代码切换到高精度模式)->
hrtimer_switch_to_hres->
tick_init_highres->
tick_switch_to_oneshot(hrtimer_interrupt)
<把hrtimer_interrupt赋值给dev->event_handler,即dev->event_handler = handler;>
看到没有?在每一次时钟软中断处理函数中,都会尝试把hrtimer切换到高精度模式,如果满足条件,就切换,切换之后高精度模式就被激活了,在hrtimer_run_pending检查是否被激活,如果被激活了,下面的代码就不用执行了。
5. hrtimer高精度模式下真正的中断处理函数
hrtimer_interrupt
6. hrtimer高精度式的触发过程
以下以nanosleep为例:
SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp)->
hrtimer_nanosleep->
do_nanosleep->
hrtimer_start_expires->
hrtimer_start_range_ns->
__hrtimer_start_range_ns->
enqueue_hrtimer(insert into rb_tree) then hrtimer_enqueue_reprogram-> hrtimer_reprogram->
tick_program_event->
tick_dev_program_event->
clockevents_program_event->
dev->set_next_event((unsigned long) clc, dev)<调用my clock_event_device的set_next_event方法设置register>