1. 内核的生成过程
1.1 vmlinux的生成过程
-
ld -m elf_i386 -T./arch/i386/vmlinux.lds -e stext arch/i386/kernel/head.o arch/i386/kernel/init_task.o init/main.o init/version.o \
-
--start-group \
-
arch/i386/kernel/kernel.o arch/i386/mm/mm.o kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o \
-
drivers/char/char.o drivers/block/block.o drivers/misc/misc.o drivers/net/net.o drivers/media/media.o drivers/char/agp/agp.o drivers/char/drm/drm.o drivers/ide/idedriver.o drivers/scsi/scsidrv.o drivers/cdrom/driver.o drivers/sound/sounddrivers.o drivers/pci/driver.o drivers/pcmcia/pcmcia.o drivers/net/pcmcia/pcmcia_net.o drivers/video/video.o drivers/usb/usbdrv.o \
-
net/network.o \
-
/work/os/linux-2.4.12/arch/i386/lib/lib.a /work/os/linux-2.4.12/lib/lib.a /work/os/linux-2.4.12/arch/i386/lib/lib.a \
-
--end-group \
-
-o vmlinux
1.2 链接脚本./arch/i386/vmlinux.lds
-
/* ld script to make i386 Linux kernel
-
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
-
*/
-
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-
OUTPUT_ARCH(i386)
-
ENTRY(_start)
-
SECTIONS
-
{
-
. = 0xC0000000 + 0x100000;
-
_text = .; /* Text and read-only data */
-
.text : {
-
*(.text)
-
*(.fixup)
-
*(.gnu.warning)
-
} = 0x9090
-
.text.lock : { *(.text.lock) } /* out-of-line lock text */
-
-
_etext = .; /* End of text section */
-
-
.rodata : { *(.rodata) *(.rodata.*) }
-
.kstrtab : { *(.kstrtab) }
-
-
. = ALIGN(16); /* Exception table */
-
__start___ex_table = .;
-
__ex_table : { *(__ex_table) }
-
__stop___ex_table = .;
-
-
__start___ksymtab = .; /* Kernel symbol table */
-
__ksymtab : { *(__ksymtab) }
-
__stop___ksymtab = .;
-
-
.data : { /* Data */
-
*(.data)
-
CONSTRUCTORS
-
}
-
-
_edata = .; /* End of data section */
-
-
. = ALIGN(8192); /* init_task */
-
.data.init_task : { *(.data.init_task) }
-
-
. = ALIGN(4096); /* Init code and data */
-
__init_begin = .;
-
.text.init : { *(.text.init) }
-
.data.init : { *(.data.init) }
-
. = ALIGN(16);
-
__setup_start = .;
-
.setup.init : { *(.setup.init) }
-
__setup_end = .;
-
__initcall_start = .;
-
.initcall.init : { *(.initcall.init) }
-
__initcall_end = .;
-
. = ALIGN(4096);
-
__init_end = .;
-
-
. = ALIGN(4096);
-
.data.page_aligned : { *(.data.idt) }
-
-
. = ALIGN(32);
-
.data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
-
__bss_start = .; /* BSS */
-
.bss : {
-
*(.bss)
-
}
-
_end = . ;
-
-
/* Sections to be discarded */
-
/DISCARD/ : {
-
*(.text.exit)
-
*(.data.exit)
-
*(.exitcall.exit)
-
}
-
-
/* Stabs debugging sections. */
-
.stab 0 : { *(.stab) }
-
.stabstr 0 : { *(.stabstr) }
-
.stab.excl 0 : { *(.stab.excl) }
-
.stab.exclstr 0 : { *(.stab.exclstr) }
-
.stab.index 0 : { *(.stab.index) }
-
.stab.indexstr 0 : { *(.stab.indexstr) }
-
.comment 0 : { *(.comment) }
-
}
linux-2.4.12/arch/i386/kernel/head.S
-
/*
-
* linux/arch/i386/head.S -- the 32-bit startup code.
-
*
-
* Copyright (C) 1991, 1992 Linus Torvalds
-
*
-
* Enhanced CPU detection and feature setting code by Mike Jagdis
-
* and Martin Mares, November 1997.
-
*/
-
-
.text
-
#include <linux/config.h>
-
#include <linux/threads.h>
-
#include <linux/linkage.h>
-
#include <asm/segment.h>
-
#include <asm/page.h>
-
#include <asm/pgtable.h>
-
#include <asm/desc.h>
-
-
#define OLD_CL_MAGIC_ADDR 0x90020
-
#define OLD_CL_MAGIC 0xA33F
-
#define OLD_CL_BASE_ADDR 0x90000
-
#define OLD_CL_OFFSET 0x90022
-
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-
-
/*
-
* References to members of the boot_cpu_data structure.
-
*/
-
-
#define CPU_PARAMS SYMBOL_NAME(boot_cpu_data)
-
#define X86 CPU_PARAMS+0
-
#define X86_VENDOR CPU_PARAMS+1
-
#define X86_MODEL CPU_PARAMS+2
-
#define X86_MASK CPU_PARAMS+3
-
#define X86_HARD_MATH CPU_PARAMS+6
-
#define X86_CPUID CPU_PARAMS+8
-
#define X86_CAPABILITY CPU_PARAMS+12
-
#define X86_VENDOR_ID CPU_PARAMS+28
-
-
startup_32: -->看head.S时一定要记住,head.S被链接在了0xc0100000
-
cld -->0010:00100000 (unk. ctxt): cld ; fc
-
//ds=es=fs=gs=内核代码段
-
movl $(__KERNEL_DS),%eax
-
movl %eax,%ds
-
movl %eax,%es
-
movl %eax,%fs
-
movl %eax,%gs
-
#ifdef CONFIG_SMP
-
orw %bx,%bx -->完全没看懂这个是啥意思
-
jz 1f -->这儿跳过
-
-
1:
-
#endif
-
//在include/asm-i386/page.h中定义了PAGE_OFFSET=0xC0000000
-
//pg0=0xC0102000,empty_zero_page=c0104000
-
//下面一段的作用是在0x102000-0x104000建立页目录表映射了[0-8M]的内存空间
-
movl $pg0-__PAGE_OFFSET,%edi
-
movl $007,%eax -->0x07的意思是PRESENT+RW+USER(存在+可读写+任意用户都可访问)
-
2: stosl
-
add $0x1000,%eax
-
cmp $empty_zero_page-__PAGE_OFFSET,%edi
-
jne 2b
-
-
//swapper_pg_dir=c0101000,设置页目录表基地址cr3=0x101000,并开启分页机制
-
//这儿注意swapper_pg_dir可不是空的,是预先设置好了页目录表项的值0x102007与0x103007
-
3:
-
movl $swapper_pg_dir-__PAGE_OFFSET,%eax
-
movl %eax,%cr3 -->cr3=0x101000
-
movl %cr0,%eax -->将cr0最高位设1,打开分页
-
orl $0x80000000,%eax
-
movl %eax,%cr0
-
jmp 1f -->刷新流水线
-
1:
-
movl $1f,%eax
-
jmp *%eax -->0010:0010005c (unk. ctxt): jmp eax -->还是用的物理地址
-
1: -->下面有详细分析虚拟地址到物理地址的变换过程
-
lss stack_start,%esp -->0010:c010005e (unk. ctxt): lss esp, ds:0xc0100224 -->从此开始使用虚拟地址了
-
-
#ifdef CONFIG_SMP
-
orw %bx,%bx
-
jz 1f -->这儿跳过
-
pushl $0
-
popfl
-
jmp checkCPUtype
-
1:
-
#endif CONFIG_SMP
-
-
/*
-
* Clear BSS first so that there are no surprises...
-
* No need to cld as DF is already clear from cld above...
-
*/
-
//清BSS
-
xorl %eax,%eax
-
movl $ SYMBOL_NAME(__bss_start),%edi
-
movl $ SYMBOL_NAME(_end),%ecx
-
subl %edi,%ecx
-
rep
-
stosb
-
-
//将中断处理函数全部设为ignore_int
-
call setup_idt
-
/*
-
* Initialize eflags. Some BIOS's leave bits like NT set. This would
-
* confuse the debugger if this code is traced.
-
* XXX - best to initialize before switching to protected mode.
-
*/
-
pushl $0
-
popfl
-
/*
-
* Copy bootup parameters out of the way. First 2kB of
-
* _empty_zero_page is for boot parameters, second 2kB
-
* is for the command line.
-
*
-
* Note: %esi still has the pointer to the real-mode data.
-
*/
-
//将将0x90000处的4K字节复制到empty_zero_page=0x104000的,其中esi它从compressed/head.S中进来时esi=0x00090000
-
//其中前2K字节是boot的参数,后2K字节是command_line
-
movl $ SYMBOL_NAME(empty_zero_page),%edi
-
movl $512,%ecx
-
cld
-
rep
-
movsl
-
xorl %eax,%eax
-
movl $512,%ecx
-
rep -->rep movsd dword ptr es:[edi], dword ptr ds:[esi]
-
stosl
-
movl SYMBOL_NAME(empty_zero_page)+NEW_CL_POINTER,%esi
-
andl %esi,%esi
-
jnz 2f # New command line protocol
-
cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
-
jne 1f
-
movzwl OLD_CL_OFFSET,%esi
-
addl $(OLD_CL_BASE_ADDR),%esi
-
2:
-
movl $ SYMBOL_NAME(empty_zero_page)+2048,%edi
-
movl $512,%ecx
-
rep
-
movsl
-
1:
-
checkCPUtype:
-
//中间省略这个checkCPUtype的过程
-
-
call check_x87
-
incb ready
-
lgdt gdt_descr //重新加载gdt
-
lidt idt_descr //重新加载idt
-
ljmp $(__KERNEL_CS),$1f //使gdt生效
-
1: movl $(__KERNEL_DS),%eax //修改了gdt之后重新初始化各个段寄存器# reload all the segment registers
-
movl %eax,%ds # after changing gdt.
-
movl %eax,%es
-
movl %eax,%fs
-
movl %eax,%gs
-
#ifdef CONFIG_SMP
-
movl $(__KERNEL_DS), %eax -->重新设置ss,搞不懂为什么不能用lss了?
-
movl %eax,%ss # Reload the stack pointer (segment only)
-
#else
-
lss stack_start,%esp # Load processor stack
-
#endif
-
xorl %eax,%eax
-
lldt %ax -->ldt_base=0x00000000
-
cld # gcc2 wants the direction flag cleared at all times
-
#ifdef CONFIG_SMP
-
movb ready, %cl
-
cmpb $1,%cl
-
je 1f -->跳到1f # the first CPU calls start_kernel all other CPUs call initialize_secondary
-
call SYMBOL_NAME(initialize_secondary) -->不执行这个
-
jmp L6
-
1:
-
#endif
-
call SYMBOL_NAME(start_kernel) -->终于到C语言的start_kernel了
-
L6:
-
jmp L6 # main should never return here, but
-
# just in case, we know what happens.
-
-
ready: .byte 0
-
-
/*
-
* setup_idt
-
*
-
* sets up a idt with 256 entries pointing to
-
* ignore_int, interrupt gates. It doesn't actually load
-
* idt - that can be done only after paging has been enabled
-
* and the kernel moved to PAGE_OFFSET. Interrupts
-
* are enabled elsewhere, when we can be relatively
-
* sure everything is ok.
-
*/
-
setup_idt:
-
lea ignore_int,%edx
-
movl $(__KERNEL_CS << 16),%eax
-
movw %dx,%ax /* selector = 0x0010 = cs */
-
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
-
-
lea SYMBOL_NAME(idt_table),%edi
-
mov $256,%ecx
-
rp_sidt:
-
movl %eax,(%edi)
-
movl %edx,4(%edi)
-
addl $8,%edi
-
dec %ecx
-
jne rp_sidt
-
ret
-
//lss 后esp: 0xc027a000
-
ENTRY(stack_start) -->c0265a80 D init_tasks
-
.long SYMBOL_NAME(init_task_union)+8192 -->c0278000 D init_task_union -->c027a000 A __init_begin
-
.long __KERNEL_DS
-
-
/* This is the default interrupt "handler" :-) */
-
int_msg:
-
.asciz "Unknown interrupt\n"
-
ALIGN
-
ignore_int:
-
cld
-
pushl %eax
-
pushl %ecx
-
pushl %edx
-
pushl %es
-
pushl %ds
-
movl $(__KERNEL_DS),%eax
-
movl %eax,%ds
-
movl %eax,%es
-
pushl $int_msg
-
call SYMBOL_NAME(printk)
-
popl %eax
-
popl %ds
-
popl %es
-
popl %edx
-
popl %ecx
-
popl %eax
-
iret
-
-
/*
-
* The interrupt descriptor table has room for 256 idt's,
-
* the global descriptor table is dependent on the number
-
* of tasks we can have..
-
*/
-
#define IDT_ENTRIES 256
-
#define GDT_ENTRIES (__TSS(NR_CPUS))
-
-
-
.globl SYMBOL_NAME(idt)
-
.globl SYMBOL_NAME(gdt)
-
-
ALIGN
-
.word 0
-
idt_descr:
-
.word IDT_ENTRIES*8-1 # idt contains 256 entries
-
SYMBOL_NAME(idt):
-
.long SYMBOL_NAME(idt_table)
-
-
.word 0
-
gdt_descr:
-
.word GDT_ENTRIES*8-1
-
SYMBOL_NAME(gdt):
-
.long SYMBOL_NAME(gdt_table)
-
-
/*
-
* This is initialized to create an identity-mapping at 0-8M (for bootup
-
* purposes) and another mapping of the 0-8M area at virtual address
-
* PAGE_OFFSET.
-
*/
-
//页目录表size=4096,有1024项=768+256
-
.org 0x1000
-
ENTRY(swapper_pg_dir)
-
.long 0x00102007
-
.long 0x00103007
-
.fill BOOT_USER_PGD_PTRS-2,4,0
-
/* default: 766 entries */
-
.long 0x00102007 -->页目录表的第768项就是0xC0000000开妈的虚拟地址
-
.long 0x00103007
-
/* default: 254 entries */
-
.fill BOOT_KERNEL_PGD_PTRS-2,4,0
-
-
//页表size=4096*2,1个页表能映射4M,2个页表映射8M内存,以上代码看出是映射了物理地址[0-8M]的内存
-
.org 0x2000
-
ENTRY(pg0)
-
-
.org 0x3000
-
ENTRY(pg1)
-
-
/*
-
* empty_zero_page must immediately follow the page tables ! (The
-
* initialization loop counts until empty_zero_page)
-
*/
-
-
.org 0x4000
-
ENTRY(empty_zero_page)
-
-
.org 0x5000
-
-
/*
-
* Real beginning of normal "text" segment
-
*/
-
ENTRY(stext)
-
ENTRY(_stext)
-
-
/*
-
* This starts the data section. Note that the above is all
-
* in the text section because it has alignment requirements
-
* that we cannot fulfill any other way.
-
*/
-
.data
-
-
ALIGN
-
/*
-
* This contains typically 140 quadwords, depending on NR_CPUS.
-
*
-
* NOTE! Make sure the gdt descriptor in head.S matches this if you
-
* change anything.
-
*/
-
ENTRY(gdt_table)
-
.quad 0x0000000000000000 /* NULL descriptor */
-
.quad 0x0000000000000000 /* not used */
-
.quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
-
.quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
-
.quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
-
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
-
.quad 0x0000000000000000 /* not used */
-
.quad 0x0000000000000000 /* not used */
-
/*
-
* The APM segments have byte granularity and their bases
-
* and limits are set at run time.
-
*/
-
.quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
-
.quad 0x00409a0000000000 /* 0x48 APM CS code */
-
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
-
.quad 0x0040920000000000 /* 0x58 APM DS data */
-
.fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */
-
-
/*
-
* This is to aid debugging, the various locking macros will be putting
-
* code fragments here. When an oops occurs we'd rather know that it's
-
* inside the .text.lock section rather than as some offset from whatever
-
* function happens to be last in the .text segment.
-
*/
-
.section .text.lock
-
ENTRY(stext_lock)
附录1.bochs调试kernel/head.S中断点的设置
-
<bochs:1> lb 0x100000 -->在1M处设断点
-
<bochs:2> c
-
(0) Breakpoint 1, 0x00100000 in ?? ()
-
Next at t=92855571
-
(0) [0x000000100000] 0010:00100000 (unk. ctxt): cld ; fc -->第1次的cld是compressed/head.S的内容
-
<bochs:3> c
-
(0) Breakpoint 1, 0x00100000 in ?? ()
-
Next at t=174491593
-
(0) [0x000000100000] 0010:00100000 (unk. ctxt): cld ; fc -->第2次的cld才是kernel/head.S的内容
附录2:刚进入arch/i386/kernel/head.S之前与之后gdt的对比
a. 刚进入head.S时的gdt
-
<bochs:4> n
-
Next at t=174491594
-
(0) [0x000000100001] 0010:00100001 (unk. ctxt): mov eax, 0x00000018 ; b818000000
-
<bochs:5> sreg
-
es:0x0018, dh=0x00cf9300, dl=0x0000ffff, valid=31
-
Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
cs:0x0010, dh=0x00cf9b00, dl=0x0000ffff, valid=1
-
Code segment, base=0x00000000, limit=0xffffffff, Execute/Read, Non-Conforming, Accessed, 32-bit
-
ss:0x0018, dh=0x00cf9300, dl=0x0000ffff, valid=31
-
Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
ds:0x0018, dh=0x00cf9300, dl=0x0000ffff, valid=31
-
Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
fs:0x0018, dh=0x00cf9300, dl=0x0000ffff, valid=1
-
Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
gs:0x0018, dh=0x00cf9300, dl=0x0000ffff, valid=1
-
Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
ldtr:0x0000, dh=0x00008200, dl=0x0000ffff, valid=1
-
tr:0x0000, dh=0x00008b00, dl=0x0000ffff, valid=1
-
gdtr:base=0x00090a8b, limit=0x8000
-
idtr:base=0x00000000, limit=0x0
-
<bochs:9> info gdt
-
Global Descriptor Table (base=0x00090a8b, limit=32768):
-
GDT[0x00]=??? descriptor hi=0x00000000, lo=0x00000000
-
GDT[0x01]=??? descriptor hi=0x00000000, lo=0x00000000
-
GDT[0x02]=Code segment, base=0x00000000, limit=0xffffffff, Execute/Read, Non-Conforming, Accessed, 32-bit
-
GDT[0x03]=Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
b.第二次设置了gdt,在start_kernel之前的gdt
-
info gdt
-
Global Descriptor Table (base=0xc0263000, limit=1119):
-
GDT[0x00]=??? descriptor hi=0x00000000, lo=0x00000000
-
GDT[0x01]=??? descriptor hi=0x00000000, lo=0x00000000
-
GDT[0x02]=Code segment, base=0x00000000, limit=0xffffffff, Execute/Read, Non-Conforming, Accessed, 32-bit
-
GDT[0x03]=Data segment, base=0x00000000, limit=0xffffffff, Read/Write, Accessed
-
GDT[0x04]=Code segment, base=0x00000000, limit=0xffffffff, Execute/Read, Non-Conforming, 32-bit
-
GDT[0x05]=Data segment, base=0x00000000, limit=0xffffffff, Read/Write
-
GDT[0x06]=??? descriptor hi=0x00000000, lo=0x00000000
附录3: 页表的内容
上图出自《Linux内核完全注释》
a. 1个页表项可以映射4K的内存,
0x102000-0x104000=0x2000=8192个字节
8192个字节有8192/4=2K个页表项,所以总共映射了2K*4K=8M的内存
b. 页目表[0x102000-0x104000]的内容如下 --> 映射内存起始
[0-8M]
-
<bochs:70> xp /2060wx 0x102000
-
[bochs]:
-
0x00102000 <bogus+ 0>: 0x00000007 0x00001007 0x00002007 0x00003007
-
0x00102010 <bogus+ 16>: 0x00004007 0x00005007 0x00006007 0x00007007
-
....
-
0x00103fe0 <bogus+ 8160>: 0x007f8007 0x007f9007 0x007fa007 0x007fb007
-
0x00103ff0 <bogus+ 8176>: 0x007fc007 0x007fd007 0x007fe007 0x007ff007
-
0x00104000 <bogus+ 8192>: 0x00000000 0x00000000 0x00000000 0x00000000
附录4. 详细分析虚拟地址到物进地址的过程-->以0xc010005e为例
上图出自赵炯的《Linux内核完全注释V3.0.pdf》
c010005e=11000000000100000000000001011110
a.页目录表找到页表的过程
a.1 高10位--> 1100000000=768=0x300 -->页目录表的第768项是0x
c010005e的页目录项
a.2 cr3+768*4=0x101000+0xC00=0x101C00
a.3 则0x101C00地址的内容为page_table的地址
-
<bochs:25> xp /1024wx 0x101000
-
0x00101000 <bogus+ 0>: 0x00102007 0x00103007 0x00000000 0x00000000
-
0x00101010 <bogus+ 16>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0x00101020 <bogus+ 32>: 0x00000000 0x00000000 0x00000000 0x00000000
-
....
-
0x00101c00 <bogus+ 3072>: 0x00102007 0x00103007 0x00000000 0x00000000
-
0x00101c10 <bogus+ 3088>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0x00101c20 <bogus+ 3104>: 0x00000000 0x00000000 0x00000000 0x00000000
-
0x00101ff0 <bogus+ 4080>: 0x00000000 0x00000000 0x00000000 0x00000000
0xc010005e的页表基地址是0x00102007-->后12位无效-->所以最终的地址是0x
0x00102000
b.由页表找到物理地址的过程
b.1 0100000000=256=0x100
-->页表的第256项是0xc010005e的页表项
b.2 在page_table中的偏移是256*4=1024=0x400
b.3 页表项=页表基地址+页表内的偏移=0x00102000+0x400=0x102400
-
<bochs:27> xp /2060wx 0x102000
-
0x001023f0 <bogus+ 1008>: 0x000fc007 0x000fd007 0x000fe007 0x000ff007
-
0x00102400 <bogus+ 1024>: 0x00100007 0x00101007 0x00102007 0x00103007
-
0x00102410 <bogus+ 1040>: 0x00104007 0x00105007 0x00106007 0x00107007
则0x00100007-->后12位无效-->所以最终的页帧地址是0x00100000
c.最终的物理地址
c.1 后12位000001011110=94=0x5E
c.2 物理地址=页帧地址+页内偏称=
0x00100000+0x5E=0x0010005E
d.综上
虚拟地址
c010005e--->物理地址
0x0010005E
阅读(1489) | 评论(0) | 转发(0) |