为了技术,我不会停下学习的脚步,我相信我还能走二十年。
分类:
2012-07-01 20:28:13
原文地址:高端内存分析 作者:embededgood
#ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; e820_register_active_regions(0, 0, highend_pfn); sparse_memory_present_with_active_regions(0); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE-1)+1; #else e820_register_active_regions(0, 0, max_low_pfn); sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1)+1; #endif |
pgd = swapper_pg_dir + pgd_index(vaddr); pud = pud_offset(pgd, vaddr);//pud==pgd pmd = pmd_offset(pud, vaddr);//pmd==pud==pgd pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; |
void *kmap(struct page *page) { might_sleep(); if (!PageHighMem(page)) return page_address(page); return kmap_high(page); } |
/** * kmap_high - map a highmem page into memory * @page: &struct page to map * * Returns the page's virtual memory address. * * We cannot call this from interrupts, as it may block. */ void *kmap_high(struct page *page) { unsigned long vaddr; /* * For highmem pages, we can't trust "virtual" until * after we have the lock. */ lock_kmap(); vaddr = (unsigned long)page_address(page); if (!vaddr) vaddr = map_new_virtual(page); pkmap_count[PKMAP_NR(vaddr)]++; BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); unlock_kmap(); return (void*) vaddr; } |
static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; int count; start: count = LAST_PKMAP;//LAST_PKMAP=1024 /* Find an empty entry */ for (;;) { last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK; if (!last_pkmap_nr) { flush_all_zero_pkmaps(); count = LAST_PKMAP; } if (!pkmap_count[last_pkmap_nr]) break; /* Found a usable entry */ if (--count) continue; /* * Sleep for somebody else to unmap their entries */ { DECLARE_WAITQUEUE(wait, current); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&pkmap_map_wait, &wait); unlock_kmap(); schedule(); remove_wait_queue(&pkmap_map_wait, &wait); lock_kmap(); /* Somebody else might have mapped it while we slept */ if (page_address(page)) return (unsigned long)page_address(page); /* Re-start */ goto start; } } vaddr = PKMAP_ADDR(last_pkmap_nr); set_pte_at(&init_mm, vaddr, &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); return vaddr; } |
last_pkmap_nr:记录上次被分配的页表项在pkmap_page_table里的位置,初始值为0,所以第一次分配的时候last_pkmap_nr等于1。
接下来判断什么时候last_pkmap_nr等于0,等于0就表示1023(LAST_PKMAP(1024)-1)个页表项已经被分配了,这时候就需要调用flush_all_zero_pkmaps()函数,把所有pkmap_count[] 计数为1的页表项在TLB里面的entry给flush掉,并重置为0,这就表示该页表项又可以用了,可能会有疑惑为什么不在把pkmap_count置为1的时候也就是解除映射的同时把TLB也flush呢?个人感觉有可能是为了效率的问题吧,毕竟等到不够的时候再刷新,效率要好点吧。
再判断pkmap_count[last_pkmap_nr]是否为0,0的话就表示这个页表项是可用的,那么就跳出循环了到下面了。
PKMAP_ADDR(last_pkmap_nr)返回这个页表项对应的线性地址vaddr.
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
set_pte_at(mm, addr, ptep, pte)函数在NON-PAE i386上的实现其实很简单,其实就等同于下面的代码:
static inline void native_set_pte(pte_t *ptep , pte_t pte)
{
*ptep = pte;
}
我们已经知道页表的线性起始地址存放在pkmap_page_table里面,那么相应的可用的页表项的地址就是&pkmap_page_table[last_pkmap_nr],得到了页表项的地址,只要把相应的pte填写进去,那么整个映射不就完成了吗?
pte由两部分组成:高20位表示物理地址,低12位表示页的描述信息。
怎么通过page查找对应的物理地址呢(参考page_address()一文)?其实很简单,用(page - mem_map) 再移PAGE_SHIFT位就可以了。
低12位的页描述信息是固定的:kmap_prot=(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL).
下面的代码就是做了这些事情:
mk_pte(page, kmap_prot));
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
#define page_to_pfn __page_to_pfn
#define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \
ARCH_PFN_OFFSET)
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
massage_pgprot(pgprot));
}
接下来把pkmap_count[last_pkmap_nr]置为1,1不是表示不可用吗,既然映射已经建立好了,应该赋值为2呀,其实这个操作是在他的上层函数kmap_high里面完成的(pkmap_count[PKMAP_NR(vaddr)]++).
到此为止,整个映射就完成了,再把page和对应的线性地址加入到page_address_htable哈希链表里面就可以了(参考page_address一文)。
我们继续看所有的页表项都已经用了的情况下,也就是1024个页表项全已经映射了内存了,如何处理。此时count==0,于是就进入了下面的代码:
/*
* Sleep for somebody else to unmap their entries
*/
{
DECLARE_WAITQUEUE(wait, current);
__set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&pkmap_map_wait, &wait);
unlock_kmap();
schedule();
remove_wait_queue(&pkmap_map_wait, &wait);
lock_kmap();
/* Somebody else might have mapped it while we slept */
if (page_address(page))
return (unsigned long)page_address(page);
/* Re-start */
goto start;
}
这段代码其实很简单,就是把当前任务加入到等待队列pkmap_map_wait,当有其他任务唤醒这个队列时,再继续goto start,重新整个过程。这里就是上面说的调用kmap函数有可能阻塞的原因。
那么什么时候会唤醒pkmap_map_wait队列呢?当调用kunmap_high函数,来释放掉一个映射的时候。
kunmap_high函数其实页很简单,就是把要释放的页表项的计数减1,如果等于1的时候,表示有可用的页表项了,再唤醒pkmap_map_wait队列
/**
* kunmap_high - map a highmem page into memory
* @page: &struct page to unmap
*
* If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
* only from user context.
*/
void kunmap_high(struct page *page)
{
unsigned long vaddr;
unsigned long nr;
unsigned long flags;
int need_wakeup;
lock_kmap_any(flags);
vaddr = (unsigned long)page_address(page);
BUG_ON(!vaddr);
nr = PKMAP_NR(vaddr);
/*
* A count must never go down to zero
* without a TLB flush!
*/
need_wakeup = 0;
switch (--pkmap_count[nr]) {//减一
case 0:
BUG();
case 1:
/*
* Avoid an unnecessary wake_up() function call.
* The common case is pkmap_count[] == 1, but
* no waiters.
* The tasks queued in the wait-queue are guarded
* by both the lock in the wait-queue-head and by
* the kmap_lock. As the kmap_lock is held here,
* no need for the wait-queue-head's lock. Simply
* test if the queue is empty.
*/
need_wakeup = waitqueue_active(&pkmap_map_wait);
}
unlock_kmap_any(flags);
/* do wake-up, if needed, race-free outside of the spin lock */
if (need_wakeup)
wake_up(&pkmap_map_wait);
}
From: