最近接到一个客户bug,说是运行perf fuzzer的时候,手机会crash掉。当时我懵了。Perf
fuzzer是什么鬼。
经过坚持不懈的google之后,终于找到了一些资料。
perf counters added to the mainline
fuzzing perf events
或者看kernel/tools/perf/design.txt文档
Perf event这东西本来的名字perf counter更加贴切。Counter也就是计数器。无非指一些软件事件发生的次数或者硬件时间的发生次数。
软件事件就是指software event 或者tracepoint,其实在我看来,这两者大同小异。不知道为啥分为两类。
硬件时间需要借助于硬件设备,不管是PMU还是CORE DEBUG(watchpoint/breakpoint).用于统计一段时间内的硬件时间。
每一种类型在perf event框架中抽象成为一个PMU设备。(既可以是硬件,也可以是软件)。
先从perf_sw_event这个函数入手吧。其实这个函数的目的就是根据event_id
找到相应的事件,然后增加计数器。记住我说的是计数器,也就是刚才说到的perf counter.
perf_pmu_register(&perf_swevent, "software",
PERF_TYPE_SOFTWARE);
perf_pmu_register(&perf_cpu_clock, NULL, -1);
perf_pmu_register(&perf_task_clock, NULL,
-1);
perf_tp_register();
ret = init_hw_breakpoint();
这边一共注册了5中PMU设备,分别是software event, cpu,
task, tracepoint, breakpoint.
Struct pmu定义在kernel/include/linux/perf_event.h文件中,其中包含很多的函数指针。是由perf_pmu_register的时候传入的。
之前一直没搞清楚这个perf event到底是怎么工作的,直到我看到了perf_sw_event函数。
简单的在内核中搜了一下这个函数,你们居然是这么玩的。
-
./arch/arm/mm/fault.c:335: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-
./arch/arm/mm/fault.c:339: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-
./arch/arm/mm/fault.c:343: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-
./arch/arm/kernel/swp_emulate.c:190: perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
-
./arch/mips/mm/fault.c:156: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
./arch/mips/mm/fault.c:168: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-
./arch/mips/mm/fault.c:172: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
好吧,都是在事件发生的时候默默的给计数器加一。相应的还有DECLARE_EVENT_CLASS跟perf_bp_event。
另外除了上面的pmu之外,还有真实的硬件PMU。
Kernel/arch/arm64/perf_event.c
init_hw_perf_events 硬件PMU在软件PMU的基础上进行了再一次的封装。
-
struct arm_pmu {
-
struct pmu pmu;
-
cpumask_t active_irqs;
-
const char *name;
-
irqreturn_t (*handle_irq)(int irq_num, void *dev);
-
void (*enable)(struct hw_perf_event *evt, int idx);
-
void (*disable)(struct hw_perf_event *evt, int idx);
-
int (*get_event_idx)(struct pmu_hw_events *hw_events,
-
struct hw_perf_event *hwc);
-
int (*set_event_filter)(struct hw_perf_event *evt,
-
struct perf_event_attr *attr);
-
u32 (*read_counter)(int idx);
-
void (*write_counter)(int idx, u32 val);
-
void (*start)(void);
-
void (*stop)(void);
-
void (*reset)(void *);
-
int (*request_irq)(struct arm_pmu *,
-
irq_handler_t handler);
-
void (*free_irq)(struct arm_pmu *);
-
int (*map_event)(struct perf_event *event);
-
int num_events;
-
int pmu_state;
-
atomic_t active_events;
-
struct mutex reserve_mutex;
-
u64 max_period;
-
struct platform_device *plat_device;
-
struct pmu_hw_events *(*get_hw_events)(void);
-
void (*save_pm_registers)(void *hcpu);
-
void (*restore_pm_registers)(void *hcpu);
-
};
添加了很多函数指针,比如map_event用于查询某一种硬件操作是否支持。
-
static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
-
{
-
armv8pmu.name = "arm/armv8-pmuv3";
-
armv8pmu.map_event = armv8_pmuv3_map_event;
-
armv8pmu.num_events = armv8pmu_read_num_pmnc_events();
-
armv8pmu.set_event_filter = armv8pmu_set_event_filter;
-
return &armv8pmu;
-
}
-
static struct arm_pmu armv8pmu = {
-
-
.handle_irq = armv8pmu_handle_irq,
-
.enable = armv8pmu_enable_event,
-
.disable = armv8pmu_disable_event,
-
.read_counter = armv8pmu_read_counter,
-
.write_counter = armv8pmu_write_counter,
-
.get_event_idx = armv8pmu_get_event_idx,
-
.start = armv8pmu_start,
-
.stop = armv8pmu_stop,
-
.reset = armv8pmu_reset,
-
.request_irq = armv8pmu_request_irq,
-
.free_irq = armv8pmu_free_irq,
-
.save_pm_registers = armv8pmu_save_pm_registers,
-
.restore_pm_registers = armv8pmu_restore_pm_registers,
-
.max_period = (1LLU << 32) - 1,
-
};
这么多新添加的函数指针,吓得我感觉从arm官网上下载文档看了一遍
还好还好,都是对于硬件操作的函数。
再次看一下perf_fuzzer是怎么工作的。strace看了一下
perf_event_open(0x5577398f28, 15581, 3, 0, PERF_FLAG_PID_CGROUP) = -1 EINVAL (Invalid argument)
perf_event_open(0x5577398f28, 0, 7, 0, PERF_FLAG_FD_NO_GROUP|0x80628200) = -1 EINVAL (Invalid argument)
perf_event_open(0x5577398f28, 0, 4, 0, 0x6060e610 /* PERF_FLAG_??? */) = -1 EINVAL (Invalid argument)
perf_event_open(0x5577398f28, 0, 3, -1, PERF_FLAG_FD_NO_GROUP|0x8) = -1 EINVAL (Invalid argument)
perf_event_open(0x5577398f28, 0, 7, -1, 0x8000 /* PERF_FLAG_??? */) = -1 EINVAL (Invalid argument)
perf_event_open(0x5577398f28, 0, 1, -1, 0) = 3
mmap(NULL, 10172, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0x7fb762a000
rt_sigaction(SIGRT_5, {0x5577168f5c, [], SA_SIGINFO}, NULL, 8) = 0
fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC) = 0
fcntl(3, F_SETSIG, 0x25) = 0
fcntl(3, F_SETOWN, 3774) = 0
ioctl(3, _IOC(_IOC_READ, 0x24, 0x07, 0x08), 0x7ffffe9578) = 0
close(3) = 0
munmap(0x7fb762a000, 10172) = 0
ppoll([{fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd
ppoll([{fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fb7997ff8) = 3780
prctl(PR_TASK_PERF_EVENTS_ENABLE) = 0
ppoll([{fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd
kill(3780, SIGKILL) = 0
wait4(3780, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGKILL}], 0, NULL) = 3780
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=3780, si_uid=2000, si_status=SIGKILL, si_utime=1, si_stime=0} ---
ppoll([{fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd=0, events=POLLIN}, {fd
openat(AT_FDCWD, "/proc/sys/kernel/perf_cpu_time_max_percent", O_RDONLY) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
翻来覆去就是这几个系统调用,最重要的还是传进来的perf_event_attr结构体。
如果perf fuzzer的代码看起来有点复杂的话,可以直接 中的例子。
头疼的是这个perf_event_attr结构体在不同的kernel版本上定义的不同(
天哪。。。。所以不要告诉我这玩意在3.10上跑的好好的,在跟3.18上不能跑),感觉这是将来的一个大坑。
其二,这玩意之前在intel的芯片上用的比较多,想想intel的台式机是什么性能,所以内核的默认参数perf_event_max_sample_rate设为10万次/每秒。
但是到了ARM平台上,如果用上ARM 硬件PMU的话。每秒10万次的中断,而且PMU本来就是用来测试每个单独CPU的,这些中断不能在CPU间
balance, 感觉整个世界要炸了。。然后watchdog bark或者NMI software lockdep的问题一大堆。