Chinaunix首页 | 论坛 | 博客
  • 博客访问: 295643
  • 博文数量: 44
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 1354
  • 用 户 组: 普通用户
  • 注册时间: 2012-04-08 15:38
个人简介

人生像是在跑马拉松,能够完赛的都是不断地坚持向前迈进;人生就是像在跑马拉松,不断调整步伐,把握好分分秒秒;人生还是像在跑马拉松,能力决定了能跑短程、半程还是全程。人生其实就是一场马拉松,坚持不懈,珍惜时间。

文章分类

分类: LINUX

2014-12-10 22:17:17

前面已经分析了内核页表的准备工作以及内核低端内存页表的建立,接着回到init_mem_mapping()中,低端内存页表建立后紧随着还有一个函数early_ioremap_page_table_range_init()

  1. 【file:/arch/x86/mm/init.c】
  2. /*
  3.  * Build a proper pagetable for the kernel mappings. Up until this
  4.  * point, we've been running on some set of pagetables constructed by
  5.  * the boot process.
  6.  *
  7.  * If we're booting on native hardware, this will be a pagetable
  8.  * constructed in arch/x86/kernel/head_32.S. The root of the
  9.  * pagetable will be swapper_pg_dir.
  10.  *
  11.  * If we're booting paravirtualized under a hypervisor, then there are
  12.  * more options: we may already be running PAE, and the pagetable may
  13.  * or may not be based in swapper_pg_dir. In any case,
  14.  * paravirt_pagetable_init() will set up swapper_pg_dir
  15.  * appropriately for the rest of the initialization to work.
  16.  *
  17.  * In general, pagetable_init() assumes that the pagetable may already
  18.  * be partially populated, and so it avoids stomping on any existing
  19.  * mappings.
  20.  */
  21. void __init early_ioremap_page_table_range_init(void)
  22. {
  23.     pgd_t *pgd_base = swapper_pg_dir;
  24.     unsigned long vaddr, end;
  25.  
  26.     /*
  27.      * Fixed mappings, only the page table structure has to be
  28.      * created - mappings will be set by set_fixmap():
  29.      */
  30.     vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
  31.     end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
  32.     page_table_range_init(vaddr, end, pgd_base);
  33.     early_ioremap_reset();
  34. }

该函数主要是用于建立固定内存映射区的。固定内存映射区是指FIXADDR_STARTFIXADDR_TOP的地址空间,而该地址空间因功能特性不同通过索引来定义区分,其中索引以枚举类型的形式定义在enum fixed_addresses里面。

  1. 【file:/arch/x86/include/asm/fixmap.h】
  2. /*
  3.  * Here we define all the compile-time 'special' virtual
  4.  * addresses. The point is to have a constant address at
  5.  * compile time, but to set the physical address only
  6.  * in the boot process.
  7.  * for x86_32: We allocate these special addresses
  8.  * from the end of virtual memory (0xfffff000) backwards.
  9.  * Also this lets us do fail-safe vmalloc(), we
  10.  * can guarantee that these special addresses and
  11.  * vmalloc()-ed addresses never overlap.
  12.  *
  13.  * These 'compile-time allocated' memory buffers are
  14.  * fixed-size 4k pages (or larger if used with an increment
  15.  * higher than 1). Use set_fixmap(idx,phys) to associate
  16.  * physical memory with fixmap indices.
  17.  *
  18.  * TLB entries of such buffers will not be flushed across
  19.  * task switches.
  20.  */
  21. enum fixed_addresses {
  22. #ifdef CONFIG_X86_32
  23.     FIX_HOLE,
  24.     FIX_VDSO,
  25. #else
  26.     VSYSCALL_LAST_PAGE,
  27.     VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
  28.                 + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
  29.     VVAR_PAGE,
  30.     VSYSCALL_HPET,
  31. #ifdef CONFIG_PARAVIRT_CLOCK
  32.     PVCLOCK_FIXMAP_BEGIN,
  33.     PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
  34. #endif
  35. #endif
  36.     FIX_DBGP_BASE,
  37.     FIX_EARLYCON_MEM_BASE,
  38. #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
  39.     FIX_OHCI1394_BASE,
  40. #endif
  41. #ifdef CONFIG_X86_LOCAL_APIC
  42.     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
  43. #endif
  44. #ifdef CONFIG_X86_IO_APIC
  45.     FIX_IO_APIC_BASE_0,
  46.     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
  47. #endif
  48. #ifdef CONFIG_X86_VISWS_APIC
  49.     FIX_CO_CPU, /* Cobalt timer */
  50.     FIX_CO_APIC, /* Cobalt APIC Redirection Table */
  51.     FIX_LI_PCIA, /* Lithium PCI Bridge A */
  52.     FIX_LI_PCIB, /* Lithium PCI Bridge B */
  53. #endif
  54.     FIX_RO_IDT, /* Virtual mapping for read-only IDT */
  55. #ifdef CONFIG_X86_32
  56.     FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
  57.     FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
  58. #ifdef CONFIG_PCI_MMCONFIG
  59.     FIX_PCIE_MCFG,
  60. #endif
  61. #endif
  62. #ifdef CONFIG_PARAVIRT
  63.     FIX_PARAVIRT_BOOTMAP,
  64. #endif
  65.     FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
  66.     FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
  67. #ifdef CONFIG_X86_INTEL_MID
  68.     FIX_LNW_VRTC,
  69. #endif
  70.     __end_of_permanent_fixed_addresses,
  71.  
  72.     /*
  73.      * 256 temporary boot-time mappings, used by early_ioremap(),
  74.      * before ioremap() is functional.
  75.      *
  76.      * If necessary we round it up to the next 256 pages boundary so
  77.      * that we can have a single pgd entry and a single pte table:
  78.      */
  79. #define NR_FIX_BTMAPS 64
  80. #define FIX_BTMAPS_SLOTS 4
  81. #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
  82.     FIX_BTMAP_END =
  83.      (__end_of_permanent_fixed_addresses ^
  84.       (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
  85.      -PTRS_PER_PTE
  86.      ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
  87.        (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
  88.      : __end_of_permanent_fixed_addresses,
  89.     FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
  90. #ifdef CONFIG_X86_32
  91.     FIX_WP_TEST,
  92. #endif
  93. #ifdef CONFIG_INTEL_TXT
  94.     FIX_TBOOT_BASE,
  95. #endif
  96.     __end_of_fixed_addresses
  97. };

但是各枚举标识的分区并不是从低地址往高地址分布,而是自高地址往低地址分布。其中__fix_to_virt宏定义就是用来通过索引来计算相应的固定映射区域的线性地址。

#define __fix_to_virt(x)         (FIXADDR_TOP - ((x) << PAGE_SHIFT))

对应的有虚拟地址转索引的宏:

#define __virt_to_fix(x)         ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)

接着回到early_ioremap_page_table_range_init()的第一个函数page_table_range_init()

  1. 【file:/arch/x86/mm/init_32.c】
  2. /*
  3.  * This function initializes a certain range of kernel virtual memory
  4.  * with new bootmem page tables, everywhere page tables are missing in
  5.  * the given range.
  6.  *
  7.  * NOTE: The pagetables are allocated contiguous on the physical space
  8.  * so we can cache the place of the first one and move around without
  9.  * checking the pgd every time.
  10.  */
  11. static void __init
  12. page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
  13. {
  14.     int pgd_idx, pmd_idx;
  15.     unsigned long vaddr;
  16.     pgd_t *pgd;
  17.     pmd_t *pmd;
  18.     pte_t *pte = NULL;
  19.     unsigned long count = page_table_range_init_count(start, end);
  20.     void *adr = NULL;
  21.  
  22.     if (count)
  23.         adr = alloc_low_pages(count);
  24.  
  25.     vaddr = start;
  26.     pgd_idx = pgd_index(vaddr);
  27.     pmd_idx = pmd_index(vaddr);
  28.     pgd = pgd_base + pgd_idx;
  29.  
  30.     for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
  31.         pmd = one_md_table_init(pgd);
  32.         pmd = pmd + pmd_index(vaddr);
  33.         for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
  34.                             pmd++, pmd_idx++) {
  35.             pte = page_table_kmap_check(one_page_table_init(pmd),
  36.                             pmd, vaddr, pte, &adr);
  37.  
  38.             vaddr += PMD_SIZE;
  39.         }
  40.         pmd_idx = 0;
  41.     }
  42. }

该函数里面其中调用的page_table_range_init_count()

  1. 【file:/arch/x86/mm/init_32.c】
  2. static unsigned long __init
  3. page_table_range_init_count(unsigned long start, unsigned long end)
  4. {
  5.     unsigned long count = 0;
  6. #ifdef CONFIG_HIGHMEM
  7.     int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
  8.     int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
  9.     int pgd_idx, pmd_idx;
  10.     unsigned long vaddr;
  11.  
  12.     if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
  13.         return 0;
  14.  
  15.     vaddr = start;
  16.     pgd_idx = pgd_index(vaddr);
  17.  
  18.     for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
  19.         for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
  20.                             pmd_idx++) {
  21.             if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
  22.                 (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
  23.                 count++;
  24.             vaddr += PMD_SIZE;
  25.         }
  26.         pmd_idx = 0;
  27.     }
  28. #endif
  29.     return count;
  30. }

page_table_range_init_count()用来计算指临时内核映射区间的页表数量。前面提到FIXADDR_STARTFIXADDR_TOP是固定映射区,其间有多个索引标识不同功能的映射区间,其中的一个区间FIX_KMAP_BEGINFIX_KMAP_END是临时内核映射区。顺便可以看一下两者的定义:

    FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */

    FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,

其中KM_TYPE_NR表示“窗口”数量,在高端内存的任意一个页框都可以通过一个“窗口”映射到内核地址空间,调用kmap_atomic可以搭建起“窗口”到高端内存的关系,即建立临时内核映射。而NR_CPUS则表示CPU数量。总的来说就是该临时内核映射区间是为了给各个CPU准备一个指定的窗口空间。由于kmap_atomic()对该区间的使用,所以该区间必须保证其页表连续性。

如果页全局目录数不为0的时候,紧接着page_table_range_init_count()的是alloc_low_pages()

  1. 【file:/arch/x86/mm/init.c】
  2. /*
  3.  * Pages returned are already directly mapped.
  4.  *
  5.  * Changing that is likely to break Xen, see commit:
  6.  *
  7.  * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve
  8.  *
  9.  * for detailed information.
  10.  */
  11. __ref void *alloc_low_pages(unsigned int num)
  12. {
  13.     unsigned long pfn;
  14.     int i;
  15.  
  16.     if (after_bootmem) {
  17.         unsigned int order;
  18.  
  19.         order = get_order((unsigned long)num << PAGE_SHIFT);
  20.         return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK |
  21.                         __GFP_ZERO, order);
  22.     }
  23.  
  24.     if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
  25.         unsigned long ret;
  26.         if (min_pfn_mapped >= max_pfn_mapped)
  27.             panic("alloc_low_pages: ran out of memory");
  28.         ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
  29.                     max_pfn_mapped << PAGE_SHIFT,
  30.                     PAGE_SIZE * num , PAGE_SIZE);
  31.         if (!ret)
  32.             panic("alloc_low_pages: can not alloc memory");
  33.         memblock_reserve(ret, PAGE_SIZE * num);
  34.         pfn = ret >> PAGE_SHIFT;
  35.     } else {
  36.         pfn = pgt_buf_end;
  37.         pgt_buf_end += num;
  38.         printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
  39.             pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
  40.     }
  41.  
  42.     for (i = 0; i < num; i++) {
  43.         void *adr;
  44.  
  45.         adr = __va((pfn + i) << PAGE_SHIFT);
  46.         clear_page(adr);
  47.     }
  48.  
  49.     return __va(pfn << PAGE_SHIFT);
  50. }

则是根据前面early_alloc_pgt_buf()申请保留的页表缓冲空间使用情况来判断,是从页表缓冲空间中申请还是通过memblock算法申请页表内存。

回到page_table_range_init(),其中one_md_table_init()是用于当pgd入参为空时,申请新物理页作为页中间目录的,但是此次仅分析x86PAE环境的情况,不存在页中间目录,故实际上返回的仍是入参。附代码:

  1. 【file:/arch/x86/mm/init_32.c】
  2. /*
  3.  * Creates a middle page table and puts a pointer to it in the
  4.  * given global directory entry. This only returns the gd entry
  5.  * in non-PAE compilation mode, since the middle layer is folded.
  6.  */
  7. static pmd_t * __init one_md_table_init(pgd_t *pgd)
  8. {
  9.     pud_t *pud;
  10.     pmd_t *pmd_table;
  11.  
  12. #ifdef CONFIG_X86_PAE
  13.     if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
  14.         pmd_table = (pmd_t *)alloc_low_page();
  15.         paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
  16.         set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
  17.         pud = pud_offset(pgd, 0);
  18.         BUG_ON(pmd_table != pmd_offset(pud, 0));
  19.  
  20.         return pmd_table;
  21.     }
  22. #endif
  23.     pud = pud_offset(pgd, 0);
  24.     pmd_table = pmd_offset(pud, 0);
  25.  
  26.     return pmd_table;
  27. }

接着的是page_table_kmap_check(),其入参调用的one_page_table_init()是用于当入参pmd没有页表指向时,创建页表并使其指向被创建的页表。page_table_kmap_check()实现:

  1. 【file:/arch/x86/mm/init_32.c】
  2. static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
  3.                        unsigned long vaddr, pte_t *lastpte,
  4.                        void **adr)
  5. {
  6. #ifdef CONFIG_HIGHMEM
  7.     /*
  8.      * Something (early fixmap) may already have put a pte
  9.      * page here, which causes the page table allocation
  10.      * to become nonlinear. Attempt to fix it, and if it
  11.      * is still nonlinear then we have to bug.
  12.      */
  13.     int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
  14.     int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
  15.  
  16.     if (pmd_idx_kmap_begin != pmd_idx_kmap_end
  17.         && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
  18.         && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
  19.         pte_t *newpte;
  20.         int i;
  21.  
  22.         BUG_ON(after_bootmem);
  23.         newpte = *adr;
  24.         for (i = 0; i < PTRS_PER_PTE; i++)
  25.             set_pte(newpte + i, pte[i]);
  26.         *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
  27.  
  28.         paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
  29.         set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
  30.         BUG_ON(newpte != pte_offset_kernel(pmd, 0));
  31.         __flush_tlb_all();
  32.  
  33.         paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
  34.         pte = newpte;
  35.     }
  36.     BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
  37.            && vaddr > fix_to_virt(FIX_KMAP_END)
  38.            && lastpte && lastpte + PTRS_PER_PTE != pte);
  39. #endif
  40.     return pte;
  41. }

可以看到这里在此出现临时内核映射区间的标识(FIX_KMAP_ENDFIX_KMAP_BEGIN),检查当前页表初始化的地址是否处于该区间范围,如果是,则把其pte页表的内容拷贝到page_table_range_init()申请的页表空间中,并将newpte新页表的地址设置到pmd中(32bit系统实际上就是页全局目录),然后调用__flush_tlb_all()刷新TLB缓存;如果不是该区间,则仅是由入参中调用的one_page_table_init()被分配到了页表空间。

由此,可以知道page_table_range_init()主要是做了什么了。这是由于kmap_atomic()对该区间的使用,该区间必须保证其页表连续性。为了避免前期可能对固定映射区已经分配了页表项,基于临时内核映射区间要求页表连续性的保证,所以在此重新申请连续的页表空间将原页表内容拷贝至此。值得注意的是,与低端内存的页表初始化不同的是,这里的页表只是被分配,相应的PTE项并未初始化,这个工作将会交由以后各个固定映射区部分的相关代码调用set_fixmap()来将相关的固定映射区页表与物理内存关联。

early_ioremap_page_table_range_init()函数再往下的early_ioremap_reset()仅是对after_paging_init全局变量赋值。

最后退出early_ioremap_page_table_range_init()后,init_mem_mapping()调用load_cr3()刷新CR3寄存器,__flush_tlb_all()则用于刷新TLB,由此启用新的内存分页映射。

     至此,内核页表建立完毕。

阅读(3104) | 评论(0) | 转发(2) |
给主人留下些什么吧!~~