只能由 grub 来引导,x86_32.S 是从GRUB进入XEN 的入口文件
1 #include
2 #include
3 #include
4 #include
5 #include
6 #include
7 #include
8
9 #define SECONDARY_CPU_FLAG 0xA5A5A5A5
10
11 .text
12
13 ENTRY(start)
14 ENTRY(stext)
15 ENTRY(_stext)
16 jmp __start
/**
grub 根据xen的头信息获得入口地址,然后读入整个映像,最后把控制权交给xen。
看一下xen的头:
# readelf -l xen
Elf 文件类型为 EXEC (可执行文件)
入口点 0x100000
共有 1 个程序头,开始于偏移量52
程序头:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x000080 0x00100000 0x00100000 0xb86e4 0x107000 RWE 0x40
Section to Segment mapping:
段节...
00 .text
当xen运行时,cpu已经处于保护模式
**/
17
18 .align 4
19
20 /*** MULTIBOOT HEADER ****/
21 #define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
22 MULTIBOOT_HEADER_WANT_MEMORY)
23 /* Magic number indicating a Multiboot header. */
24 .long MULTIBOOT_HEADER_MAGIC
25 /* Flags to bootloader (see Multiboot spec). */
26 .long MULTIBOOT_HEADER_FLAGS
27 /* Checksum: must be the negated sum of the first two fields. */
/**
上面的定义是给grub看的,表明支持multiboot, 详细内容见multiboot协议
**/
28 .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
29
30 not_multiboot_msg:
31 .asciz "ERR: Not a Multiboot bootloader!"
32 not_multiboot:
33 mov $not_multiboot_msg-__PAGE_OFFSET,%esi
34 mov $0xB8000,%edi # VGA framebuffer
35 1: mov (%esi),%bl
36 test %bl,%bl # Terminate on '\0' sentinel
37 2: je 2b
38 mov $0x3f8+5,%dx # UART Line Status Register
39 3: in %dx,%al
40 test $0x20,%al # THR Empty flag
41 je 3b
42 mov $0x3f8+0,%dx # UART Transmit Holding Register
43 mov %bl,%al
44 out %al,%dx # Send a character over the serial line
45 movsb # Write a character to the VGA framebuffer
46 mov $7,%al
47 stosb # Write an attribute to the VGA framebuffer
48 jmp 1b
49
50 __start:
51 /* Set up a few descrīptors: on entry only CS is guaranteed good. */
52 lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET
/**
初始化gdt, 为什么要-__PAGE_OFFSET,前面已经说过,简单来说,编译时的代码位置和运行是不一样,因此要减一个偏移量。当使用页转换后就不需要-__PAGE_OFFSET了,那时的0xFF100000和0x00100000将映射到同一物理地址。
对比一下gdt_descr和nopaging_gdt_descr的内容也可以看出一点道道:
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.long gdt_table - FIRST_RESERVED_GDT_BYTE
.word 0
nopaging_gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
这两个descr实际上指向同一个gdt_table.
**/
53 mov $(__HYPERVISOR_DS),%ecx
/**
初始化ds, es, fs, gs
__HYPERVISOR_DS 的值可以在 ./include/asm/config.h 中找到:
#define __HYPERVISOR_CS 0xe008
#define __HYPERVISOR_DS 0xe010
每个表项是8个字节,换成十进制cs,ds分别是第7169和7170项。
再看看nopaging_gdt_descr的内容:
nopaging_gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
gdt 的基地址位于gdt_table之前,空出了FIRST_RESERVED_GDT_BYTE个字节,
#define FIRST_RESERVED_GDT_PAGE 14
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
实际是就是保留了14个页,算一下:
14 * 4096 /8 = 7168 项,所以第7169,7170项就是相对于gdt_table的第1,2项。
**/
54 mov %ecx,%ds
55 mov %ecx,%es
56 mov %ecx,%fs
57 mov %ecx,%gs
58 ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
59 1: lss stack_start-__PAGE_OFFSET,%esp
60 add $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
/**
设置stack:
ENTRY(stack_start)
.long cpu0_stack
.long __HYPERVISOR_DS
...
ENTRY(cpu0_stack)
.fill STACK_SIZE,1,0
这里为什么要多减一个CPUINFO_sizeof呢?原因是xen要在栈底保留出一点空间供cpu0 的 cpu_info 结构使用。
看看./include/asm-x86/current.h 中的 get_cpu_info 是如何实现的就更清楚了:
23 static inline struct cpu_info *get_cpu_info(void)
24 {
25 struct cpu_info *cpu_info;
26 __asm__ ( "and %%"__OP"sp,%0; or %2,%0"
27 : "=r" (cpu_info)
28 : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-sizeof(struct cpu_info))
29 );
30 return cpu_info;
31 }
用伪汇编指令表示,意思是这样:
mov (~(STACK_SIZE-1)), cpu_info
and %esp, cpu_info
or (STACK_SIZE-sizeof(struct cpu_info)), cpu_info
这里面有些小技巧, %esp and (~(STACK_SIZE-1)) 的值实际就是栈顶, 再or (STACK_SIZE-sizeof(struct cpu_info))
就得到距栈底sizeof(struct cpu_info)的地址,该空间就是上面保留下来的。
**/
61
62 /* Reset EFLAGS (subsumes CLI and CLD). */
63 pushl $0
64 popf
65
66 /* Set up FPU. */
67 fninit
68
69 /* Set up CR4, except global flag which Intel requires should be */
70 /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
71 mov mmu_cr4_features-__PAGE_OFFSET,%ecx
/**
CR4:
31-------------------14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
reserved(set to 0) VMXE 0 0 | | PCE PGE MCE PAE PSE DE TSD PVI VME
OSXMMEXCPT ________| |
OSFXSR___________|
mmu_cr4_features 在 ./arch/x86/setup.c 中有定义:
#if CONFIG_PAGING_LEVELS > 2
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE;
#endif
**/
72 and $0x7f,%cl # CR4.PGE (global enable)
/**
清除PGE位
**/
73 mov %ecx,%cr4
74
75 cmp $(SECONDARY_CPU_FLAG),%ebx
76 je start_paging
/**
SECONDARY_CPU_FLAG 的值是这样定义的
#define SECONDARY_CPU_FLAG 0xA5A5A5A5
xen 3.x.y 以后支持SMP(包括xen0和xenU), 当%ebx值是0xA5A5A5A5时,意味着这段代码是从SMP上的从别的CPU执行过来的。
**/
77
78 /* Check for Multiboot bootloader */
79 cmp $0x2BADB002,%eax
80 jne not_multiboot
81
82 /* Initialize BSS (no nasty surprises!) */
83 mov $__bss_start-__PAGE_OFFSET,%edi
84 mov $_end-__PAGE_OFFSET,%ecx
85 sub %edi,%ecx
86 xor %eax,%eax
87 rep stosb
88
89 /* Save the Multiboot info structure for later use. */
90 add $__PAGE_OFFSET,%ebx
91 push %ebx
92
93 #ifdef CONFIG_X86_PAE
94 /* Initialize low and high mappings of all memory with 2MB pages */
95 mov $idle_pg_table_l2-__PAGE_OFFSET,%edi
96 mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
97 1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
98 stosl /* low mapping */
99 add $4,%edi
100 add $(1<
101 cmp $DIRECTMAP_PHYS_END+0xe3,%eax
102 jne 1b
103 1: stosl /* low mappings cover as much physmem as possible */
104 add $4,%edi
105 add $(1<
106 cmp $HYPERVISOR_VIRT_START+0xe3,%eax
107 jne 1b
108 #else
109 /* Initialize low and high mappings of all memory with 4MB pages */
110 mov $idle_pg_table-__PAGE_OFFSET,%edi
111 mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
112 1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
113 stosl /* low mapping */
114 add $(1<
115 cmp $DIRECTMAP_PHYS_END+0xe3,%eax
116 jne 1b
117 1: stosl /* low mappings cover as much physmem as possible */
118 add $(1<
119 cmp $HYPERVISOR_VIRT_START+0xe3,%eax
120 jne 1b
121 #endif
122
123 /* Initialise IDT with simple error defaults. */
124 lea ignore_int,%edx
125 mov $(__HYPERVISOR_CS << 16),%eax
126 mov %dx,%ax /* selector = 0x0010 = cs */
127 mov $0x8E00,%dx /* interrupt gate - dpl=0, present */
128 lea idt_table-__PAGE_OFFSET,%edi
129 mov $256,%ecx
130 1: mov %eax,(%edi)
131 mov %edx,4(%edi)
132 add $8,%edi
133 loop 1b
134
135 start_paging:
136 #ifdef CONFIG_X86_PAE
137 /* Enable Execute-Disable (NX/XD) support if it is available. */
138 push %ebx
139 mov $0x80000000,%eax
140 cpuid
141 cmp $0x80000000,%eax /* Any function > 0x80000000? */
142 jbe no_execute_disable
143 mov $0x80000001,%eax
144 cpuid
145 bt $20,%edx /* Execute Disable? */
146 jnc no_execute_disable
147 movl $MSR_EFER,%ecx
148 rdmsr
149 bts $_EFER_NX,%eax
150 wrmsr
151 no_execute_disable:
152 pop %ebx
153 #endif
154 mov $idle_pg_table-__PAGE_OFFSET,%eax
155 mov %eax,%cr3
156 mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
157 mov %eax,%cr0
158 jmp 1f
159 1: /* Install relocated selectors (FS/GS unused). */
160 lgdt gdt_descr
161 mov $(__HYPERVISOR_DS),%ecx
162 mov %ecx,%ds
163 mov %ecx,%es
164 mov %ecx,%ss
165 ljmp $(__HYPERVISOR_CS),$1f
166 1: /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
167 mov mmu_cr4_features,%ecx
168 mov %ecx,%cr4
169 /* Relocate ESP */
170 add $__PAGE_OFFSET,%esp
171
172 lidt idt_descr
173
174 cmp $(SECONDARY_CPU_FLAG),%ebx
175 je start_secondary
176
177 /* Call into main C routine. This should never return.*/
178 call __start_xen
179 ud2 /* Force a panic (invalid opcode). */
180
181 /* This is the default interrupt handler. */
182 int_msg:
183 .asciz "Unknown interrupt\n"
184 ALIGN
185 ignore_int:
186 cld
187 mov $(__HYPERVISOR_DS),%eax
188 mov %eax,%ds
189 mov %eax,%es
190 pushl $int_msg
191 call printk
192 1: jmp 1b
193
194 /*** STACK LOCATION ***/
195
196 ENTRY(stack_start)
197 .long cpu0_stack
198 .long __HYPERVISOR_DS
199
200 /*** DEscrīptOR TABLES ***/
201
202 ALIGN
203
204 .word 0
205 idt_descr:
206 .word 256*8-1
207 .long idt_table
208
209 .word 0
210 gdt_descr:
211 .word LAST_RESERVED_GDT_BYTE
212 .long gdt_table - FIRST_RESERVED_GDT_BYTE
213
214 .word 0
215 nopaging_gdt_descr:
216 .word LAST_RESERVED_GDT_BYTE
217 .long gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
218
219 .align PAGE_SIZE, 0
220 /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
221 /* the machine->physical mapping table. Ring 0 can access all memory. */
222 #define GUEST_DESC(d) \
223 .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \
224 ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
225 ENTRY(gdt_table)
226 .quad 0x0000000000000000 /* unused */
227 .quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
228 .quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
229 GUEST_DESC(0x00c0ba00) /* 0xe019 ring 1 3.xxGB code at 0x0 */
230 GUEST_DESC(0x00c0b200) /* 0xe021 ring 1 3.xxGB data at 0x0 */
231 GUEST_DESC(0x00c0fa00) /* 0xe02b ring 3 3.xxGB code at 0x0 */
232 GUEST_DESC(0x00c0f200) /* 0xe033 ring 3 3.xxGB data at 0x0 */
233 .quad 0x0000000000000000 /* unused */
234 .fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
235
236 .align PAGE_SIZE, 0
237
238 #ifdef CONFIG_X86_PAE
239 ENTRY(idle_pg_table)
240 ENTRY(idle_pg_table_l3)
241 .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
242 .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
243 .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
244 .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
245 .section ".bss.page_aligned","w"
246 ENTRY(idle_pg_table_l2)
247 .fill 4*PAGE_SIZE,1,0
248 #else
249 .section ".bss.page_aligned","w"
250 ENTRY(idle_pg_table)
251 ENTRY(idle_pg_table_l2)
252 .fill 1*PAGE_SIZE,1,0
253 #endif
254
255 .section ".bss.stack_aligned","w"
256 ENTRY(cpu0_stack)
257 .fill STACK_SIZE,1,0
阅读(1808) | 评论(0) | 转发(0) |