Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1866990
  • 博文数量: 473
  • 博客积分: 13997
  • 博客等级: 上将
  • 技术积分: 5953
  • 用 户 组: 普通用户
  • 注册时间: 2010-01-22 11:52
文章分类

全部博文(473)

文章存档

2014年(8)

2013年(38)

2012年(95)

2011年(181)

2010年(151)

分类: LINUX

2013-02-16 23:23:53

只能由 grub 来引导,x86_32.S 是从GRUB进入XEN 的入口文件

    1   #include
    2   #include
    3   #include
    4   #include
    5   #include
    6   #include
    7   #include
    8
    9   #define SECONDARY_CPU_FLAG 0xA5A5A5A5
   10
   11           .text
   12
   13   ENTRY(start)
   14   ENTRY(stext)
   15   ENTRY(_stext)
   16           jmp __start
/**
grub 根据xen的头信息获得入口地址,然后读入整个映像,最后把控制权交给xen。
看一下xen的头:
# readelf -l xen

Elf 文件类型为 EXEC (可执行文件)
入口点 0x100000
共有 1 个程序头,开始于偏移量52

程序头:
Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz Flg Align
LOAD           0x000080 0x00100000 0x00100000 0xb86e4 0x107000 RWE 0x40

Section to Segment mapping:
段节...
   00     .text
当xen运行时,cpu已经处于保护模式

**/
   17
   18           .align 4
   19
   20   /*** MULTIBOOT HEADER ****/
   21   #define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_HEADER_MODS_ALIGNED | \
   22                                   MULTIBOOT_HEADER_WANT_MEMORY)
   23           /* Magic number indicating a Multiboot header. */
   24           .long MULTIBOOT_HEADER_MAGIC
   25           /* Flags to bootloader (see Multiboot spec). */
   26           .long MULTIBOOT_HEADER_FLAGS
   27           /* Checksum: must be the negated sum of the first two fields. */
/**
上面的定义是给grub看的,表明支持multiboot, 详细内容见multiboot协议
**/
   28           .long -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
   29
   30   not_multiboot_msg:
   31           .asciz "ERR: Not a Multiboot bootloader!"
   32   not_multiboot:
   33           mov     $not_multiboot_msg-__PAGE_OFFSET,%esi
   34           mov     $0xB8000,%edi # VGA framebuffer
   35   1:      mov     (%esi),%bl
   36           test    %bl,%bl        # Terminate on '\0' sentinel
   37   2:      je      2b
   38           mov     $0x3f8+5,%dx   # UART Line Status Register
   39   3:      in      %dx,%al
   40           test    $0x20,%al      # THR Empty flag
   41           je      3b
   42           mov     $0x3f8+0,%dx   # UART Transmit Holding Register
   43           mov     %bl,%al
   44           out     %al,%dx        # Send a character over the serial line
   45           movsb                  # Write a character to the VGA framebuffer
   46           mov     $7,%al
   47           stosb                  # Write an attribute to the VGA framebuffer
   48           jmp     1b
   49
   50   __start:
   51           /* Set up a few descrīptors: on entry only CS is guaranteed good. */
   52           lgdt    %cs:nopaging_gdt_descr-__PAGE_OFFSET
/**
初始化gdt, 为什么要-__PAGE_OFFSET,前面已经说过,简单来说,编译时的代码位置和运行是不一样,因此要减一个偏移量。当使用页转换后就不需要-__PAGE_OFFSET了,那时的0xFF100000和0x00100000将映射到同一物理地址。
对比一下gdt_descr和nopaging_gdt_descr的内容也可以看出一点道道:
gdt_descr:
        .word   LAST_RESERVED_GDT_BYTE
        .long   gdt_table - FIRST_RESERVED_GDT_BYTE

        .word   0
nopaging_gdt_descr:
        .word   LAST_RESERVED_GDT_BYTE
        .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
这两个descr实际上指向同一个gdt_table.
**/

   53           mov     $(__HYPERVISOR_DS),%ecx
/**
初始化ds, es, fs, gs
__HYPERVISOR_DS 的值可以在 ./include/asm/config.h 中找到:
#define __HYPERVISOR_CS 0xe008
#define __HYPERVISOR_DS 0xe010
每个表项是8个字节,换成十进制cs,ds分别是第7169和7170项。
再看看nopaging_gdt_descr的内容:
nopaging_gdt_descr:
        .word   LAST_RESERVED_GDT_BYTE
        .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
gdt 的基地址位于gdt_table之前,空出了FIRST_RESERVED_GDT_BYTE个字节,
#define FIRST_RESERVED_GDT_PAGE 14
#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
实际是就是保留了14个页,算一下:
14 * 4096 /8 = 7168 项,所以第7169,7170项就是相对于gdt_table的第1,2项。

**/
   54           mov     %ecx,%ds
   55           mov     %ecx,%es
   56           mov     %ecx,%fs
   57           mov     %ecx,%gs
   58           ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
   59   1:      lss     stack_start-__PAGE_OFFSET,%esp
   60           add     $(STACK_SIZE-CPUINFO_sizeof-__PAGE_OFFSET),%esp
/**
设置stack:
ENTRY(stack_start)
        .long cpu0_stack
        .long __HYPERVISOR_DS
...
ENTRY(cpu0_stack)
        .fill STACK_SIZE,1,0

这里为什么要多减一个CPUINFO_sizeof呢?原因是xen要在栈底保留出一点空间供cpu0 的 cpu_info 结构使用。
看看./include/asm-x86/current.h 中的 get_cpu_info 是如何实现的就更清楚了:

     23 static inline struct cpu_info *get_cpu_info(void)
     24 {
     25     struct cpu_info *cpu_info;
     26     __asm__ ( "and %%"__OP"sp,%0; or %2,%0"
     27               : "=r" (cpu_info)
     28               : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-sizeof(struct cpu_info))
     29         );
     30     return cpu_info;
     31 }
用伪汇编指令表示,意思是这样:
mov (~(STACK_SIZE-1)), cpu_info
and %esp, cpu_info
or (STACK_SIZE-sizeof(struct cpu_info)), cpu_info

这里面有些小技巧, %esp and (~(STACK_SIZE-1)) 的值实际就是栈顶, 再or (STACK_SIZE-sizeof(struct cpu_info))
就得到距栈底sizeof(struct cpu_info)的地址,该空间就是上面保留下来的。

**/

   61
   62           /* Reset EFLAGS (subsumes CLI and CLD). */
   63           pushl   $0
   64           popf
   65
   66           /* Set up FPU. */
   67           fninit
   68
   69           /* Set up CR4, except global flag which Intel requires should be     */
   70           /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
   71           mov     mmu_cr4_features-__PAGE_OFFSET,%ecx
/**
CR4:
31-------------------14   13       12 11 10 9 8      7     6       5      4      3      2      1      0
reserved(set to 0)         VMXE   0   0   | | PCE PGE MCE PAE PSE DE   TSD   PVI VME
                OSXMMEXCPT ________| |
                           OSFXSR___________|
mmu_cr4_features 在 ./arch/x86/setup.c 中有定义:
#if CONFIG_PAGING_LEVELS > 2
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE;
#endif
                                                  

**/

   72           and     $0x7f,%cl   # CR4.PGE (global enable)
/**
清除PGE位
**/
   73           mov     %ecx,%cr4
   74
   75           cmp     $(SECONDARY_CPU_FLAG),%ebx
   76           je      start_paging
/**
SECONDARY_CPU_FLAG 的值是这样定义的
#define SECONDARY_CPU_FLAG 0xA5A5A5A5
xen 3.x.y 以后支持SMP(包括xen0和xenU), 当%ebx值是0xA5A5A5A5时,意味着这段代码是从SMP上的从别的CPU执行过来的。


**/
   77
   78           /* Check for Multiboot bootloader */
   79           cmp     $0x2BADB002,%eax
   80           jne     not_multiboot
   81
   82           /* Initialize BSS (no nasty surprises!) */
   83           mov     $__bss_start-__PAGE_OFFSET,%edi
   84           mov     $_end-__PAGE_OFFSET,%ecx
   85           sub     %edi,%ecx
   86           xor     %eax,%eax
   87           rep     stosb
   88
   89           /* Save the Multiboot info structure for later use. */
   90           add     $__PAGE_OFFSET,%ebx
   91           push    %ebx
   92
   93   #ifdef CONFIG_X86_PAE
   94           /* Initialize low and high mappings of all memory with 2MB pages */
   95           mov     $idle_pg_table_l2-__PAGE_OFFSET,%edi
   96           mov     $0xe3,%eax                  /* PRESENT+RW+A+D+2MB */
   97   1:      mov     %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
   98           stosl                                /* low mapping */
   99           add     $4,%edi
100           add     $(1< 101           cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
102           jne     1b
103   1:      stosl   /* low mappings cover as much physmem as possible */
104           add     $4,%edi
105           add     $(1< 106           cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
107           jne     1b
108   #else
109           /* Initialize low and high mappings of all memory with 4MB pages */
110           mov     $idle_pg_table-__PAGE_OFFSET,%edi
111           mov     $0xe3,%eax                  /* PRESENT+RW+A+D+4MB */
112   1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
113           stosl                                /* low mapping */
114           add     $(1< 115           cmp     $DIRECTMAP_PHYS_END+0xe3,%eax
116           jne     1b
117   1:      stosl   /* low mappings cover as much physmem as possible */
118           add     $(1< 119           cmp     $HYPERVISOR_VIRT_START+0xe3,%eax
120           jne     1b
121   #endif
122
123           /* Initialise IDT with simple error defaults. */
124           lea     ignore_int,%edx
125           mov     $(__HYPERVISOR_CS << 16),%eax
126           mov     %dx,%ax            /* selector = 0x0010 = cs */
127           mov     $0x8E00,%dx        /* interrupt gate - dpl=0, present */
128           lea     idt_table-__PAGE_OFFSET,%edi
129           mov     $256,%ecx
130   1:      mov     %eax,(%edi)
131           mov     %edx,4(%edi)
132           add     $8,%edi
133           loop    1b
134
135   start_paging:
136   #ifdef CONFIG_X86_PAE
137           /* Enable Execute-Disable (NX/XD) support if it is available. */
138           push    %ebx
139           mov     $0x80000000,%eax
140           cpuid
141           cmp     $0x80000000,%eax    /* Any function > 0x80000000? */
142           jbe     no_execute_disable
143           mov     $0x80000001,%eax
144           cpuid
145           bt      $20,%edx            /* Execute Disable? */
146           jnc     no_execute_disable
147           movl    $MSR_EFER,%ecx
148           rdmsr
149           bts     $_EFER_NX,%eax
150           wrmsr
151   no_execute_disable:
152           pop     %ebx
153   #endif
154           mov     $idle_pg_table-__PAGE_OFFSET,%eax
155           mov     %eax,%cr3
156           mov     $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
157           mov     %eax,%cr0
158           jmp     1f
159   1:      /* Install relocated selectors (FS/GS unused). */
160           lgdt    gdt_descr
161           mov     $(__HYPERVISOR_DS),%ecx
162           mov     %ecx,%ds
163           mov     %ecx,%es
164           mov     %ecx,%ss
165           ljmp    $(__HYPERVISOR_CS),$1f
166   1:      /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
167           mov     mmu_cr4_features,%ecx
168           mov     %ecx,%cr4
169           /* Relocate ESP */
170           add     $__PAGE_OFFSET,%esp
171
172           lidt    idt_descr
173
174           cmp     $(SECONDARY_CPU_FLAG),%ebx
175           je      start_secondary
176
177           /* Call into main C routine. This should never return.*/
178           call    __start_xen
179           ud2     /* Force a panic (invalid opcode). */
180
181   /* This is the default interrupt handler. */
182   int_msg:
183           .asciz "Unknown interrupt\n"
184           ALIGN
185   ignore_int:
186           cld
187           mov     $(__HYPERVISOR_DS),%eax
188           mov     %eax,%ds
189           mov     %eax,%es
190           pushl   $int_msg
191           call    printk
192   1:      jmp     1b
193
194   /*** STACK LOCATION ***/
195
196   ENTRY(stack_start)
197           .long cpu0_stack
198           .long __HYPERVISOR_DS
199
200   /*** DEscrīptOR TABLES ***/
201
202           ALIGN
203
204           .word   0
205   idt_descr:
206           .word   256*8-1
207           .long   idt_table
208
209           .word   0
210   gdt_descr:
211           .word   LAST_RESERVED_GDT_BYTE
212           .long   gdt_table - FIRST_RESERVED_GDT_BYTE
213
214           .word   0
215   nopaging_gdt_descr:
216           .word   LAST_RESERVED_GDT_BYTE
217           .long   gdt_table - FIRST_RESERVED_GDT_BYTE - __PAGE_OFFSET
218
219           .align PAGE_SIZE, 0
220   /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
221   /*     the machine->physical mapping table. Ring 0 can access all memory.    */
222   #define GUEST_DESC(d)                                                   \
223           .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff,                \
224                 ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
225   ENTRY(gdt_table)
226           .quad 0x0000000000000000     /* unused */
227           .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
228           .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
229           GUEST_DESC(0x00c0ba00)       /* 0xe019 ring 1 3.xxGB code at 0x0 */
230           GUEST_DESC(0x00c0b200)       /* 0xe021 ring 1 3.xxGB data at 0x0 */
231           GUEST_DESC(0x00c0fa00)       /* 0xe02b ring 3 3.xxGB code at 0x0 */
232           GUEST_DESC(0x00c0f200)       /* 0xe033 ring 3 3.xxGB data at 0x0 */
233           .quad 0x0000000000000000     /* unused                           */
234           .fill 2*NR_CPUS,8,0          /* space for TSS and LDT per CPU    */
235
236           .align PAGE_SIZE, 0
237
238   #ifdef CONFIG_X86_PAE
239   ENTRY(idle_pg_table)
240   ENTRY(idle_pg_table_l3)
241           .long idle_pg_table_l2 + 0*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
242           .long idle_pg_table_l2 + 1*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
243           .long idle_pg_table_l2 + 2*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
244           .long idle_pg_table_l2 + 3*PAGE_SIZE + 0x01 - __PAGE_OFFSET, 0
245   .section ".bss.page_aligned","w"
246   ENTRY(idle_pg_table_l2)
247           .fill 4*PAGE_SIZE,1,0
248   #else
249   .section ".bss.page_aligned","w"
250   ENTRY(idle_pg_table)
251   ENTRY(idle_pg_table_l2)
252           .fill 1*PAGE_SIZE,1,0
253   #endif
254
255   .section ".bss.stack_aligned","w"
256   ENTRY(cpu0_stack)
257           .fill STACK_SIZE,1,0
阅读(1768) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~