我们知道所有要包含进内核的目录都由顶层makefile来调控,所以体系结构相关的目录(即arch/arm)肯定会被包含进顶层makefile中,果然我们在顶层makefile中发现了这么一句:
对于 607 vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y)我们在顶层目录找到了相应的对应关系:
这两行就决定了makefile文件中的一些编译规则,我们知道落实到具体的编译要到底层makefile,所以我们来到了:arch/arm/mach-s3c2410/Makefile中,并找到了下面一句话:obj-$(CONFIG_CPU_S3C2410) += s3c2410.o
#include
#include
#if (PHYS_OFFSET & 0x001fffff)
#error "PHYS_OFFSET must be at an even 2MiB boundary!"
#endif
#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
#define KERNEL_RAM_PADDR (PHYS_OFFSET + TEXT_OFFSET)
/*
* swapper_pg_dir is the virtual address of the initial page table.
* We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must
* make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect
* the least significant 16 bits to be 0x8000, but we could probably
* relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
*/
#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000 /*内核地址必须是32字节对齐*/
#error KERNEL_RAM_VADDR must start at 0xXXXX8000
#endif
.globl swapper_pg_dir @定义一个全局变量
.equ swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000 @页表的起始地址
.macro pgtbl, rd @宏定义,建立页表时有用
ldr \rd, =(KERNEL_RAM_PADDR - 0x4000)
.endm
#ifdef CONFIG_XIP_KERNEL
#define KERNEL_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
#define KERNEL_END _edata_loc
#else
#define KERNEL_START KERNEL_RAM_VADDR
#define KERNEL_END _end @内核镜像结束地址(虚拟地址,在vmlinux.lds.S中定义)
#endif
/*
* Kernel startup entry point.
* ---------------------------
*
* This is normally called from the decompressor code. The requirements
* are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
* r1 = machine nr.这里告诉你了机器码在r1里面
*
* This code is mostly position independent, so if you link the kernel at
* 0xc0008000, you call this at __pa(0xc0008000).
*
* See linux/arch/arm/tools/mach-types for the complete list of machine
* numbers for r1.
*
* We're trying to keep crap to a minimum; DO NOT add any machine specific
* crap here - that's what the boot loader (or in extreme, well justified
* circumstances, zImage) is for.
*/
.section ".text.head", "ax" @定义一个.text.head段,段的属性a是允许段,x可执行v
.type stext, %function
ENTRY(stext) /*入口点*/
msr cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode and irqs disabled
mrc p15, 0, r9, c0, c0 @ 通过协处理器CP15的寄存器c0来get processor id
bl __lookup_processor_type @ r5=procinfo r9=cpuid,判断内核是否支持处理器CPU
movs r10, r5 @ invalid processor (r5=0)?
beq __error_p @ yes, error 'p'
bl __lookup_machine_type @ r5=machinfo,通
过机器码判断是否支持该单板,还记得吗?我们在u-boot里面提到过机器码。关于这个
@函数请看注释1
movs r8, r5 @ invalid machine (r5=0)?
beq __error_a @ yes, error 'a'
bl __create_page_tables//创建页表,注释2里有稍微详细一点的解释,更详细的解释在MMU分析里来说
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_machine_type
* above. On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
*/
ldr r13, __switch_data @ address to jump to after mmu has been enabled,当mmu使能后将调到这个地址去,详见注释3
adr lr, __enable_mmu @ return (PIC) address
add pc, r10, #PROCINFO_INITFUNC
#if defined(CONFIG_SMP)
.type secondary_startup, #function
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*
* Ensure that we're in SVC mode, and IRQs are disabled. Lookup
* the processor type - there is no need to check the machine type
* as it has already been validated by the primary processor.
*/
msr cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type
movs r10, r5 @ invalid processor?
moveq r0, #'p' @ yes, error 'p'
beq __error
/*
* Use the page tables supplied from __cpu_up.
*/
adr r4, __secondary_data
ldmia r4, {r5, r7, r13} @ address to jump to after
sub r4, r4, r5 @ mmu has been enabled
ldr r4, [r7, r4] @ get secondary_data.pgdir
adr lr, __enable_mmu @ return address
add pc, r10, #PROCINFO_INITFUNC @ initialise processor
@ (return control reg)
/*
* r6 = &secondary_data
*/
ENTRY(__secondary_switched)
ldr sp, [r7, #4] @ get secondary_data.stack
mov fp, #0
b secondary_start_kernel
.type __secondary_data, %object
__secondary_data:
.long .
.long secondary_data
.long __secondary_switched
#endif /* defined(CONFIG_SMP) */
/*
* Setup common bits before finally enabling the MMU. Essentially
* this is just loading the page table pointer and domain access
* registers.
*/
.type __enable_mmu, %function
__enable_mmu:
#ifdef CONFIG_ALIGNMENT_TRAP
orr r0, r0, #CR_A
#else
bic r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CR_C
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #CR_Z
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
mcr p15, 0, r5, c3, c0, 0 @ load domain access register
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
b __turn_mmu_on
/*
* Enable the MMU. This completely changes the structure of the visible
* memory space. You will not be able to trace execution through this.
* If you have an enquiry about this, *please* check the linux-arm-kernel
* mailing list archives BEFORE sending another post to the list.
*
* r0 = cp#15 control register
* r13 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
*/
.align 5
.type __turn_mmu_on, %function
__turn_mmu_on:
mov r0, r0
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
mov r3, r3
mov r3, r3
mov pc, r13
/*
* Setup the initial page tables. We only setup the barest
* amount which are required to get the kernel running, which
* generally means mapping in the kernel code.
*
* r8 = machinfo
* r9 = cpuid
* r10 = procinfo
*
* Returns:
* r0, r3, r6, r7 corrupted
* r4 = physical page table address
*/
.type __create_page_tables, %function
__create_page_tables:
pgtbl r4 @ page table address
/*
* Clear the 16K level 1 swapper page table
*/
mov r0, r4
mov r3, #0
add r6, r0, #0x4000
1: str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
str r3, [r0], #4
teq r0, r6
bne 1b
ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
/*
* Create identity mapping for first MB of kernel to
* cater for the MMU enable. This identity mapping
* will be removed by paging_init(). We use our current program
* counter to determine corresponding section base address.
*/
mov r6, pc, lsr #20 @ start of kernel section
orr r3, r7, r6, lsl #20 @ flags + kernel base
str r3, [r4, r6, lsl #2] @ identity mapping
/*
* Now setup the pagetables for our kernel direct
* mapped region.
*/
add r0, r4, #(KERNEL_START & 0xff000000) >> 18
str r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
ldr r6, =(KERNEL_END - 1)
add r0, r0, #4
add r6, r4, r6, lsr #18
1: cmp r0, r6
add r3, r3, #1 << 20
strls r3, [r0], #4
bls 1b
#ifdef CONFIG_XIP_KERNEL
/*
* Map some ram to cover our .data and .bss areas.
*/
orr r3, r7, #(KERNEL_RAM_PADDR & 0xff000000)
.if (KERNEL_RAM_PADDR & 0x00f00000)
orr r3, r3, #(KERNEL_RAM_PADDR & 0x00f00000)
.endif
add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> 18
str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]!
ldr r6, =(_end - 1)
add r0, r0, #4
add r6, r4, r6, lsr #18
1: cmp r0, r6
add r3, r3, #1 << 20
strls r3, [r0], #4
bls 1b
#endif
/*
* Then map first 1MB of ram in case it contains our boot params.
*/
add r0, r4, #PAGE_OFFSET >> 18
orr r6, r7, #(PHYS_OFFSET & 0xff000000)
.if (PHYS_OFFSET & 0x00f00000)
orr r6, r6, #(PHYS_OFFSET & 0x00f00000)
.endif
str r6, [r0]
#ifdef CONFIG_DEBUG_LL
ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
/*
* Map in IO space for serial debugging.
* This allows debug messages to be output
* via a serial console before paging_init.
*/
ldr r3, [r8, #MACHINFO_PGOFFIO]
add r0, r4, r3
rsb r3, r3, #0x4000 @ PTRS_PER_PGD*sizeof(long)
cmp r3, #0x0800 @ limit to 512MB
movhi r3, #0x0800
add r6, r0, r3
ldr r3, [r8, #MACHINFO_PHYSIO]
orr r3, r3, r7
1: str r3, [r0], #4
add r3, r3, #1 << 20
teq r0, r6
bne 1b
#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
/*
* If we're using the NetWinder or CATS, we also need to map
* in the 16550-type serial port for the debug messages
*/
add r0, r4, #0xff000000 >> 18
orr r3, r7, #0x7c000000
str r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
/*
* Map in screen at 0x02000000 & SCREEN2_BASE
* Similar reasons here - for debug. This is
* only for Acorn RiscPC architectures.
*/
add r0, r4, #0x02000000 >> 18
orr r3, r7, #0x02000000
str r3, [r0]
add r0, r4, #0xd8000000 >> 18
str r3, [r0]
#endif
#endif
mov pc, lr
.ltorg
#include "head-common.S"
注释1:这个注释比较长,要静下心来看
3: .long .
.long __arch_info_begin
.long __arch_info_end
..........................................................................................
..........................................................................................
__lookup_machine_type:
adr r3, 3b @r3存放这标号3处的物理地址
ldmia r3, {r4, r5, r6} @r4存放标号3处的虚拟地址,r5=__arch_info_begin的虚拟地址,r6=long __arch_info_end的虚拟地址
sub r3, r3, r4 @ r3=物理地址与虚拟地址间的偏移量
add r5, r5, r3 @ r5=__arch_info_begin的物理地址
add r6, r6, r3 @ r6=__arch_info_end的物理地址
1: ldr r3, [r5, #MACHINFO_TYPE] @ 获取机器码
teq r3, r1 @ 比较
beq 2f @ 匹配了就返回,好的那么我们就返回head.S了,拜拜!!!
add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc
cmp r5, r6 //判断是不是到了最后一个machine_desc
blo 1b //不是就跳回标号1
mov r5, #0 @ unknown machine
2: mov pc, lr
上面为什么要经虚拟地址改变成物理地址呢?这是因为MMU还没有启动,不能使用虚拟地址!关于MMU的问题我们还会再分析。
上面提到了:
__arch_info_begin
__arch_info_end
这是什么东西呢?我们在连接文件arch/arm/kernel/vmlinux.lds中发现了它的源头:
__arch_info_begin = .;
*(.arch.info.init)
__arch_info_end = .;
我们看到在__arch_info_begin与__arch_info_end之间存放的是*(.arch.info.init),那么*(.arch.info.init)又是什么东西呢?我们收索内核文件,最后在include/asm-arm/mach中发现了arch.info.init的蛛丝马迹:
#define MACHINE_START(_type,_name) \
static const struct machine_desc __mach_desc_##_type \
__used \
__attribute__((__section__(".arch.info.init"))) = { \
.nr = MACH_TYPE_##_type, \
.name = _name,
#define MACHINE_END \
};
原来.arch.info.init和其它信息一起被定义在了一个宏MACHINE_START里面,那么是谁在使用这个宏呢?我们收索内核文件,发现在arch/arm/mach-s3c2410/mach-smdk2410.c里使用了这个宏,我们还是贴出相关代码:
MACHINE_START(SMDK2410, "SMDK2410")
.phys_io = S3C2410_PA_UART,
.io_pg_offst = (((u32)S3C24XX_VA_UART) >> 18) & 0xfffc,
.boot_params = S3C2410_SDRAM_PA + 0x100,
.map_io = smdk2410_map_io,
.init_irq = s3c24xx_init_irq,
.init_machine = smdk2410_init,
.timer = &s3c24xx_timer,
MACHINE_END
我们将宏展开,得到:
static const struct machine_desc __mach_desc_SMDK2410
__used
__attribute__((__section__(".arch.info.init"))) = {
.nr = MACH_TYPE_SMDK2410 ,
.name = "SMDK2410" ,
. phys_io = S3C2410_PA_UART,
.io_pg_offst = (((u32)S3C24XX_VA_UART) >> 18) & 0xfffc,
.boot_params = S3C2410_SDRAM_PA + 0x100,
.map_io = smdk2410_map_io,
.init_irq = s3c24xx_init_irq,
.init_machine = smdk2410_init,
.timer = &s3c24xx_timer,
};
那好现在我们的任务就是来分析上面这段代码,这段代码不就是定义了一个struct machine_desc结构体吗,我们找到这个结构体的定义:
struct machine_desc {
unsigned int nr; /* architecture number,这个就是机器码了 */
unsigned int phys_io; /* start of physical io */
unsigned int io_pg_offst; /* byte offset for io
* page tabe entry */
const char *name; /* architecture name,体系结构名字*/
unsigned long boot_params; /* tagged list */
unsigned int video_start; /* start of video RAM */
unsigned int video_end; /* end of video RAM */
unsigned int reserve_lp0 :1; /* never has lp0 */
unsigned int reserve_lp1 :1; /* never has lp1 */
unsigned int reserve_lp2 :1; /* never has lp2 */
unsigned int soft_reboot :1; /* soft reboot */
void (*fixup)(struct machine_desc *,
struct tag *, char **,
struct meminfo *);
void (*map_io)(void);/* IO mapping function */
void (*init_irq)(void);
struct sys_timer *timer; /* system tick timer */
void (*init_machine)(void);
};
一个linux内核会在它所支持的单板所对应的单板文件里定义一个struct machine_desc 结构体,当然是用宏定义的,不信的话可以查一下其它的单板文件。在这个结构体里就会对机器码赋值,比如上面例子里nr= MACH_TYPE_SMDK2410,那么这个 MACH_TYPE_SMDK2410到底是多少呢?这在arch/arm/tools/mach-types里有定义,比如:
smdk2410 ARCH_SMDK2410 SMDK2410 193
这些机器码的值就和u-boot里面传进来的参数(机器码)相对应,u-boot传进来的机器码放在了r1寄存器里。
而 __attribute__((__section__(".arch.info.init"))) 就是对这个结构体定义了一个属性,将它的段强制转化为.arch.info.init,这样就可以放在__arch_info_begin与__arch_info_end之间了。
现在我们就很清楚__lookup_machine_type要做的事情了,就是从__arch_info_begin与__arch_info_end之间取出struct machine_desc结构体,并将其机器码与r1寄存器值(就是u-boot传进来的机器码)相比较,如果机器码相同的话,就说明内核支持相应的单板,如果不相同就取下一个struct machine_desc结构体,如果比较完还没有发现匹配项,就说明内核不支持相应的单板,出错返回。
好的,现在可以回去了!回哪儿去?呵呵,别忘了我们的任务是分析head.S,这里只是head.S中的一小段代码,任重道远呢!
注释2:问什么要创建页表呢?
我们知道在连接脚本arch/arm/kernel/vmlinux.lds.S中定义了内核的连接地址是:. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)或者
. = PAGE_OFFSET + TEXT_OFFSET这是虚拟地址,并不对应真正的内存地址。真正的内存地址是从0x30000000开始的,因此要创建页表,来进行虚拟地址与物理地址的映射。
注释3:MMU启动后跳转到哪里呢?
MMU启动之后会根据地址__switch_data跳转到arch/arm/kernel中__switch_data标号处。然后进行了没几步就通过
b start_kernel
跳转到start_kernel开始执行。我们找到这个函数,发现是个c函数,有点兴奋!
关于start_kernel的代码我们就不全部贴出来了,只是做一些必要的分析,先简述一下它的功能:
printk(linux_banner);输出内核版本信息
setup_arch(&command_line);设置与体系结构相关的环境,下面将着重分析
setup_command_line(command_line);
console_init();初始化控制台
rest_init();启动init进程,下面还会分析
我们着重分析一下setup_arch(&command_line)和setup_command_line(command_line):
还记不记得u-boot启动内核的命令:theKernel (0, bd->bi_arch_number, bd->bi_boot_params)
这里面有两个参数 bd->bi_arch_number和bd->bi_boot_params,前者代表机器码,我们在前面已经对它进行了处理,后者是其它一些参数,我们还没有处理,其实setup_arch(&command_line)和setup_command_line(command_line)就是来处理这些参数的,我们现在来分析一下:
先贴出setup_arch的代码
void __init setup_arch(char **cmdline_p)
{
struct tag *tags = (struct tag *)&init_tags;
struct machine_desc *mdesc; //详见注释1
char *from = default_command_line;//内核启动时没有传入命令行参数的话会采用默认的命令行参数
setup_processor();
mdesc = setup_machine(machine_arch_type);
machine_name = mdesc->name;//单板的名字
if (mdesc->soft_reboot)
reboot_setup("s");
if (mdesc->boot_params)
tags = phys_to_virt(mdesc->boot_params);
/*
* If we have the old style parameters, convert them to
* a tag list.
*/
if (tags->hdr.tag != ATAG_CORE)
convert_to_tag_list(tags);
if (tags->hdr.tag != ATAG_CORE)
tags = (struct tag *)&init_tags;
if (mdesc->fixup)
mdesc->fixup(mdesc, tags, &from, &meminfo);
if (tags->hdr.tag == ATAG_CORE) {
if (meminfo.nr_banks != 0)
squash_mem_tags(tags);
parse_tags(tags);
}
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
memcpy(boot_command_line, from, COMMAND_LINE_SIZE);
boot_command_line[COMMAND_LINE_SIZE-1] = '\0';
parse_cmdline(cmdline_p, from);//解析命令行参数
paging_init(&meminfo, mdesc);
request_standard_resources(&meminfo, mdesc);
#ifdef CONFIG_SMP
smp_init_cpus();
#endif
cpu_init();
/*
* Set up various architecture-specific pointers
*/
init_arch_irq = mdesc->init_irq;
system_timer = mdesc->timer;
init_machine = mdesc->init_machine;
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
}
注释1:
这个结构体没忘记吧,由u-boot传进来的机器码就可以找到相对应的结构体。还记的我们上面分析的东西吗?这行代码还记不记的:.boot_params = S3C2410_SDRAM_PA + 0x100,其中S3C2410_SDRAM_PA=0x30000000,那么boot_params=0x30000100,再想想我们在u-boot分析里分析出来的启动参数放在哪里!想起来了吧,就是放在0x30000100地址开始处。这样就找到了u-boot传进来的参数,接下来就是对其进行处理
再来贴出的代码:
static void __init setup_command_line(char *command_line)
{
saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
static_command_line = alloc_bootmem(strlen (command_line)+1);
strcpy (saved_command_line, boot_command_line);
strcpy (static_command_line, command_line);
}
这个函数很简单,只是将相应的启动参数保存
我们再来分析一下rest_init()
首先要明确一下,u-boot的最终目的是为了启动内核,那么内核启动的目的又是什么呢?那就是运行应用程序,而应用程序是挂载在跟文件系统上的。其实rest_init()就是来启动应用程序的,好的,来看代码:
rest_init()里面调用kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND),而这个函数的含义可以理解为调用函数kernel_init()----->prepare_namespace()------>mount_root(),
这个函数就用来挂接跟文件系统。挂载好跟文件系统后会回到kernel_init()函数里调用函数:init_post(),在init_post()
函数里我们贴出一些代码来说明它的作用:
sys_open((const char __user *) "/dev/console", O_RDWR, 0) //打开控制台
run_init_process("/sbin/init");//执行应用程序
run_init_process("/etc/init");
run_init_process("/bin/init");
run_init_process("/bin/sh");
上面prepare_namespace()调用mount_root()来挂载跟文件系统,其实在挂载文件系统之前肯定还要根据具体的参数来确定挂载哪些文件系统,所以我们还是要在来分析一下prepare_namespace()函数:
我们贴出init/do_mounts.c文件里的下面几行代码
static int __init root_dev_setup(char *line)
{
strlcpy(saved_root_name, line, sizeof(saved_root_name));
return 1;
}
__setup("root=", root_dev_setup);
__setup的定义在include/linux/init.h中:
#define __setup(str, fn)
__setup_param(str, fn, fn, 0)
#define __setup_param(str, unique_id, fn, early)
static char __setup_str_##unique_id[] __initdata = str;
static struct obs_kernel_param __setup_##unique_id
__attribute_used__
__attribute__((__section__(".init.setup")))
__attribute__((aligned((sizeof(long)))))
= { __setup_str_##unique_id, fn, early }
以上就是定义了一个结构体,这个结构体里有一个字符串,一个函数,还有一个early,且这个结构体的段属性为.init.setup,查看连接脚本发现如下代码:
__setup_start = .;
*(.init.setup)
__setup_end = .;
说明所有的结构体被放在__setup_start 到__setup_end 之间。其实在解析命令行参数是,就会根据root=?来调用相应的处理程序!
好的,就分析到这里了,最后一点分析的比较酱油,今后估计在移植的时候会进一步分析