0 前言
本周,我们尝试通过qemu虚拟机启动一个真实的Linux内核,并看一下Linux在启动时到底做了什么事情。
1 实验
提示,本节前半部分与作业内容关系较小。。。
首先尝试使用自己的环境生成内核版本文件。 在编译内核之前,需要对内核进行配置,这里使用32位x86的默认配置。生成.config文件。
按照课程中的提示,配置内核时,加入内核调试信息。
给虚拟机(我的Linux Mint运行在虚拟机中)多分配了一些资源,增加了CPU的数量和内存,开始编译内核。生成内核版本文件。
后来到这里尴尬了,由于时间关系暂时没有解决(郁闷,每次作业都拖到很晚,这个后续解决后写到评论里)。 编译生成32位可执行文件时,由于一些原因失败,看似是缺少32位的编译环境。顺手编了一个64位的,但此时我忘记我的内核是32位的了。。
使用qemu加载内核时,果然出现了问题,提示体系结构不匹配。有些不明白的是,按说我内核是32位的啊,感觉应该往前走一些再挂啊。。
好吧,由于时间不够,换回到实验楼了。。。(尴尬)
按照实验要求,开启对qemu虚拟机的远程gdb调试。对start_kernel设置断点。
Contitue执行到断点处,准备逐行分析。
通过step(单步执行并进入调用的函数)和next命令进行逐行分析。
2 分析
1) 关于start_kernel
调用者: 对于32位X86系统来说,是i386_start_kernel,从这里我们也可以看出start_kernel是一个体系结构无关的第一个初始化函数,所有类型的CPU都需要逐步执行Start_kernel,完成相同的操作。
函数内容: 确实有点复杂,只能挑几个意义明显的函数进行简单说明,其他具体的分析只能看各种大牛的博客了。
简单分析见双斜线// 后的注释。
-
asmlinkage void __init start_kernel(void)
-
{
-
char * command_line; //boot向内核的传参
-
extern const struct kernel_param __start___param[], __stop___param[];
-
-
smp_setup_processor_id();
-
-
/*
-
* Need to run as early as possible, to initialize the
-
* lockdep hash:
-
*/
-
lockdep_init();
-
debug_objects_early_init();
-
-
/*
-
* Set up the the initial canary ASAP:
-
*/
-
boot_init_stack_canary();
-
-
cgroup_init_early();
-
-
local_irq_disable(); //关掉本cpu全部中断
-
early_boot_irqs_disabled = true;
-
-
/*
-
* Interrupts are still disabled. Do necessary setups, then
-
* enable them
-
*/
-
tick_init();
-
boot_cpu_init();
-
page_address_init();
-
printk(KERN_NOTICE "%s", linux_banner);
-
setup_arch(&command_line);
-
mm_init_owner(&init_mm, &init_task);
-
mm_init_cpumask(&init_mm);
-
setup_command_line(command_line); //处理内核参数
-
setup_nr_cpu_ids();
-
setup_per_cpu_areas();
-
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
-
-
build_all_zonelists(NULL);
-
page_alloc_init();
-
-
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
-
parse_early_param();
-
parse_args("Booting kernel", static_command_line, __start___param,
-
__stop___param - __start___param,
-
&unknown_bootoption);
-
-
jump_label_init();
-
-
/*
-
* These use large bootmem allocations and must precede
-
* kmem_cache_init()
-
*/
-
setup_log_buf(0);
-
pidhash_init();
-
vfs_caches_init_early(); //虚拟文件系统cache
-
sort_main_extable();
-
trap_init();
-
mm_init(); //内存管理初始化
-
-
/*
-
* Set up the scheduler prior starting any interrupts (such as the
-
* timer interrupt). Full topology setup happens at smp_init()
-
* time - but meanwhile we still have a functioning scheduler.
-
*/
-
sched_init();
-
/*
-
* Disable preemption - early bootup scheduling is extremely
-
* fragile until we cpu_idle() for the first time.
-
*/
-
preempt_disable();
-
if (!irqs_disabled()) {
-
printk(KERN_WARNING "start_kernel(): bug: interrupts were "
-
"enabled *very* early, fixing it\n");
-
local_irq_disable();
-
}
-
idr_init_cache();
-
perf_event_init();
-
rcu_init(); //RCU(read copy update)机制初始化
-
radix_tree_init();
-
/* init some links before init_ISA_irqs() */
-
early_irq_init();
-
init_IRQ(); //中断初始化
-
prio_tree_init();
-
init_timers(); //时钟相关初始化
-
hrtimers_init();
-
softirq_init(); //软中断(Linux对于中断后半部的实现)初始化
-
timekeeping_init();
-
time_init();
-
profile_init();
-
call_function_init();
-
if (!irqs_disabled())
-
printk(KERN_CRIT "start_kernel(): bug: interrupts were "
-
"enabled early\n");
-
early_boot_irqs_disabled = false;
-
local_irq_enable(); //打开中断
-
-
kmem_cache_init_late();
-
-
/*
-
* HACK This is early. We're enabling the console before
-
* we've done PCI setups etc, and console_init() must be aware of
-
* this. But we do want output early, in case something goes wrong.
-
*/
-
console_init(); //串口初始化
-
if (panic_later)
-
panic(panic_later, panic_param);
-
-
lockdep_info();
-
-
/*
-
* Need to run this when irqs are enabled, because it wants
-
* to self-test [hard/soft]-irqs on/off lock inversion bugs
-
* too:
-
*/
-
locking_selftest();
-
-
#ifdef CONFIG_BLK_DEV_INITRD
-
if (initrd_start && !initrd_below_start_ok &&
-
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
-
printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
-
"disabling it.\n",
-
page_to_pfn(virt_to_page((void *)initrd_start)),
-
min_low_pfn);
-
initrd_start = 0;
-
}
-
#endif
-
page_cgroup_init();
-
enable_debug_pagealloc();
-
debug_objects_mem_init();
-
kmemleak_init();
-
setup_per_cpu_pageset();
-
numa_policy_init(); //非一致性内存访问初始化
-
if (late_time_init)
-
late_time_init();
-
sched_clock_init();
-
calibrate_delay();
-
pidmap_init();
-
anon_vma_init();
-
#ifdef CONFIG_X86
-
if (efi_enabled(EFI_RUNTIME_SERVICES))
-
efi_enter_virtual_mode();
-
#endif
-
thread_info_cache_init();
-
cred_init();
-
fork_init(totalram_pages);
-
proc_caches_init();
-
buffer_init();
-
key_init();
-
security_init();
-
dbg_late_init();
-
vfs_caches_init(totalram_pages);
-
signals_init();
-
/* rootfs populating might need page-writeback */
-
page_writeback_init();
-
#ifdef CONFIG_PROC_FS
-
proc_root_init();
-
#endif
-
cgroup_init();
-
cpuset_init();
-
taskstats_init_early();
-
delayacct_init();
-
-
check_bugs();
-
-
acpi_early_init(); /* before LAPIC and SMP init */
-
sfi_init_late();
-
-
ftrace_init();
-
-
/* Do the rest non-__init'ed, we're now alive */
-
rest_init(); //重要,后面分析
-
}
2) 关于0号进程与1号进程
其实,0号进程我理解就是执行start_kernel->rest_init的这个内核态进程。 而在rest_init中调用kernel_thread中进行任务创建,新任务的入口函数是
kernel_init,这次任务创建分配的pid应为1,这就是1号进程的最开始部分。回到0号进程,会最终执行到
cpu_idle,进入一个while(1)的循环中,处理cpu空闲时的工作(?没仔细分析,从名字看大致是这样吧)。
-
static noinline void __init_refok rest_init(void)
-
{
-
int pid;
-
-
rcu_scheduler_starting();
-
/*
-
* We need to spawn init first so that it obtains pid 1, however
-
* the init task will end up wanting to create kthreads, which, if
-
* we schedule it before we create kthreadd, will OOPS.
-
*/
-
kernel_thread(kernel_init,NULL, CLONE_FS | CLONE_SIGHAND);
-
numa_default_policy();
-
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
-
rcu_read_lock();
-
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
-
rcu_read_unlock();
-
complete(&kthreadd_done);
-
-
/*
-
* The boot idle thread must execute schedule()
-
* at least once to get things moving:
-
*/
-
init_idle_bootup_task(current);
-
preempt_enable_no_resched();
-
schedule();
-
-
/* Call into cpu_idle with preempt disabled */
-
preempt_disable();
-
cpu_idle();
-
}
再来看1号进程后续做了什么。 初始化了用户接口,最后调用init_post。(其他的暂时不关注。。不关注)
-
static int __init kernel_init(void * unused)
-
{
-
/*
-
* Wait until kthreadd is all set-up.
-
*/
-
wait_for_completion(&kthreadd_done);
-
-
/* Now the scheduler is fully set up and can do blocking allocations */
-
gfp_allowed_mask = __GFP_BITS_MASK;
-
-
/*
-
* init can allocate pages on any node
-
*/
-
set_mems_allowed(node_states[N_HIGH_MEMORY]);
-
/*
-
* init can run on any cpu.
-
*/
-
set_cpus_allowed_ptr(current, cpu_all_mask);
-
-
cad_pid = task_pid(current);
-
-
smp_prepare_cpus(setup_max_cpus);
-
-
do_pre_smp_initcalls();
-
lockup_detector_init();
-
-
smp_init();
-
sched_init_smp();
-
-
do_basic_setup();
-
-
/* Open the /dev/console on the rootfs, this should never fail */
-
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) //打开用户可以输入命令的console口
-
printk(KERN_WARNING "Warning: unable to open an initial console.\n");
-
-
(void) sys_dup(0);
-
(void) sys_dup(0);
-
/*
-
* check if there is an early userspace init. If yes, let it do all
-
* the work
-
*/
-
-
if (!ramdisk_execute_command)
-
ramdisk_execute_command = "/init";
-
-
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
-
ramdisk_execute_command = NULL;
-
prepare_namespace();
-
}
-
-
/*
-
* Ok, we have completed the initial bootup, and
-
* we're essentially up and running. Get rid of the
-
* initmem segments and start the user-mode stuff..
-
*/
-
-
init_post();
-
return 0;
-
}
-
static noinline int init_post(void)
-
{
-
/* need to finish all async __init code before freeing the memory */
-
async_synchronize_full();
-
free_initmem();
-
mark_rodata_ro();
-
system_state = SYSTEM_RUNNING;
-
numa_default_policy();
-
-
-
current->signal->flags |= SIGNAL_UNKILLABLE;
-
-
if (ramdisk_execute_command) {
-
run_init_process(ramdisk_execute_command);
-
printk(KERN_WARNING "Failed to execute %s\n",
-
ramdisk_execute_command);
-
}
-
-
/*
-
* We try each of these until one succeeds.
-
*
-
* The Bourne shell can be used instead of init if we are
-
* trying to recover a really broken machine.
-
*/
-
if (execute_command) {
-
run_init_process(execute_command); //如果内核命令行中给出了到init进程的直接路径(或者别的可替代的程序),这里就试图执行init。
-
printk(KERN_WARNING "Failed to execute %s. Attempting "
-
"defaults...\n", execute_command);
-
}
-
run_init_process("/sbin/init"); //如果前面执行失败,就按照下述4行的顺序,寻找执行init,如果都没有,就以shell当作init来执行。
-
run_init_process("/etc/init");
-
run_init_process("/bin/init");
-
run_init_process("/bin/sh");
-
-
panic("No init found. Try passing init= option to kernel. "
-
"See Linux Documentation/init.txt for guidance.");
-
}
-
static void run_init_process(const char *init_filename)
-
{
-
argv_init[0] = init_filename;
-
kernel_execve(init_filename, argv_init, envp_init); // 不会再返回内核态啦~~内核态88啦
-
}
4 总结
简单来说,
1) start_kernel是第一个结构无关的内核初始化程序,所有cpu都要进行相同的步骤,start_kernel本身就是0号进程。0号进程最终会进入死循环,处理空闲事件。
2) 1号进程由0号进程创建,最终会进入用户态进行相关初始化工作,是第一个用户态进程。
作者:胡川
原创作品转载请注明出处
《Linux内核分析》MOOC课程
参考:
《创建一号进程》 http://blog.csdn.net/yunsongice/article/details/6171336
《0号进程与1号进程的区别》 http://blog.csdn.net/yjzl1911/article/details/5613569
《start_kernel》 http://www.cnitblog.com/zouzheng/archive/2008/08/04/47574.html
《init_post》函数
阅读(643) | 评论(0) | 转发(0) |