一.为什么引入临时内存映射(temporary kernel mappings)
在永久内存映射中我们看到,如果
pkmap_page_table页表里面没有空的entry,那么就会导致这次映射被阻塞,所以我们说不能在一些原子的上下文情况下调用kmap()函数。而在临时内存映射中,不会去判断该pte是否已经被用掉了,它采用的是覆盖的策略,所以把总是能成功的建立映射。会不会被阻塞就是临时内存映射和永久内存映射一个最明显的区别。
二. 临时内存映射
内核地址空间布局
FIXADDR_TOP = 0xfffff000:因为最后一个页面(0xfffff000--0xffffffff)被内核所保留。
enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_HOLE,
FIX_VDSO,
#else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
+ ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
VSYSCALL_HPET,
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
#ifdef CONFIG_X86_IO_APIC
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
#ifdef CONFIG_X86_VISWS_APIC
FIX_CO_CPU, /* Cobalt timer */
FIX_CO_APIC, /* Cobalt APIC Redirection Table */
FIX_LI_PCIA, /* Lithium PCI Bridge A */
FIX_LI_PCIB, /* Lithium PCI Bridge B */
#endif
#ifdef CONFIG_X86_F00F_BUG
FIX_F00F_IDT, /* Virtual mapping for IDT */
#endif
#ifdef CONFIG_X86_CYCLONE_TIMER
FIX_CYCLONE_TIMER, /*cyclone timer register*/
#endif
#ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#ifdef CONFIG_PCI_MMCONFIG
FIX_PCIE_MCFG,
#endif
#endif
#ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
#endif
FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
__end_of_permanent_fixed_addresses,
/*
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*
* We round it up to the next 256 pages boundary so that we
* can have a single pgd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 4
FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
(__end_of_permanent_fixed_addresses & 255),
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
#endif
#ifdef CONFIG_X86_32
FIX_WP_TEST,
#endif
#ifdef CONFIG_INTEL_TXT
FIX_TBOOT_BASE,
#endif
__end_of_fixed_addresses
};
|
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
从FIXADDR_START到FIXADDR_TOP这段地址空间被称为固定映射线性空间,这段地址空间主要用来映射一些设备的内存和寄存器,像FIX_APIC_BASE是用来映射local apic的寄存器的。其中的FIX_KMAP_BEGIN到FIX_KMAP_END是用来供临时内存映射的使用。
这段空间的大小是KM_TYPE_NR*NR_CPUS,也就是说,每个CPU都有独立的KM_TYPE_NR大小的空间,来看看看KM_TYPE_NR是什么
#ifdef __WITH_KM_FENCE
# define KMAP_D(n) __KM_FENCE_##n ,
#else
# define KMAP_D(n)
#endif
enum km_type {
KMAP_D(0) KM_BOUNCE_READ,
KMAP_D(1) KM_SKB_SUNRPC_DATA,
KMAP_D(2) KM_SKB_DATA_SOFTIRQ,
KMAP_D(3) KM_USER0,
KMAP_D(4) KM_USER1,
KMAP_D(5) KM_BIO_SRC_IRQ,
KMAP_D(6) KM_BIO_DST_IRQ,
KMAP_D(7) KM_PTE0,
KMAP_D(8) KM_PTE1,
KMAP_D(9) KM_IRQ0,
KMAP_D(10) KM_IRQ1,
KMAP_D(11) KM_SOFTIRQ0,
KMAP_D(12) KM_SOFTIRQ1,
KMAP_D(13) KM_SYNC_ICACHE,
KMAP_D(14) KM_SYNC_DCACHE,
/* UML specific, for copy_*_user - used in do_op_one_page */
KMAP_D(15) KM_UML_USERCOPY,
KMAP_D(16) KM_IRQ_PTE,
KMAP_D(17) KM_NMI,
KMAP_D(18) KM_NMI_PTE,
KMAP_D(19) KM_TYPE_NR
};
|
每个枚举项都代表了一个pte,每个CPU都有20个这样的pte供临时内核映射使用。我们先来看看内核如何来获得这些对应的枚举项所对应的pte地址:
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
|
这个宏返回的是固定线性映射里面枚举项所对应的线性地址,FIXADDR_TOP=0xfffff000,以FIX_APIC_BASE为例,__fix_to_virt(
FIX_APIC_BASE
),FIX_APIC_BASE等于4,那么他的线性地址就是0xfffff000-(4<
线性地址和枚举项的位置关系是成反比的,越靠前的枚举项对应的线性地址越靠后。
下面的代码是完整的如何通过枚举项,获得对应的pte项所对应的线性地址。
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), vaddr), vaddr);
}
static void __init kmap_init(void)
{
unsigned long kmap_vstart;
/*
* Cache the first kmap pte:
*/
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
kmap_prot = PAGE_KERNEL;
}
|
注意:kmap_pte是临时内存映射起始的页表项,接下来的其他页表项根据他们的相对位置差就可以求得。比如idx对应的pte=kmap_pte - idx,为什么是减不是加,看上面的说明(越靠前的枚举项对应的线性地址越靠后
)。
接下来我们看实际的建立映射的函数(kmap_atomic)
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
* However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
//判断要建立映射的页在高端内存中,否则其线性地址已经存在
if (!PageHighMem(page))
return page_address(page);
debug_kmap_atomic(type);
//求得对应的idx
idx = type + KM_TYPE_NR*smp_processor_id();
//内核可以访问该页的线性地址
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
//根据kmap_pte,建立对应的页表项映射,之后就可以通过vaddr访问该页了
set_pte(kmap_pte-idx, mk_pte(page, prot));
return (void *)vaddr;
}
void *kmap_atomic(struct page *page, enum km_type type)
{
return kmap_atomic_prot(page, type, kmap_prot);
}
|
解除映射也很简单,没有做什么特殊处理:
void kunmap_atomic(void *kvaddr, enum km_type type)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
/*
* Force other mappings to Oops if they'll try to access this pte
* without first remap it. Keeping stale mappings around is a bad idea
* also, in case the page changes cacheability attributes or becomes
* a protected page in a hypervisor.
*/
if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
kpte_clear_flush(kmap_pte-idx, vaddr);
else {
#ifdef CONFIG_DEBUG_HIGHMEM
BUG_ON(vaddr < PAGE_OFFSET);
BUG_ON(vaddr >= (unsigned long)high_memory);
#endif
}
pagefault_enable();
}
|
三. 总结
其实,不管是那样的方式,原理都是一样的,都是在固定映射区外选定一个地址,然后再修改PTE项,使其指向相应的page。特别值得我们注意的是,因为kmap()会引起睡眠,所以它不能用于中断处理。但每一种映射方式都有自己的优点和缺点,这需要我们在写代码的时候仔细考虑了。
阅读(1554) | 评论(0) | 转发(0) |