一.总体说明
1.1 关于struct page的说明
-
typedef struct page {
-
struct list_head list; /* ->mapping has some page lists. */
-
struct address_space *mapping; /* The inode (or ...) we belong to. */
-
unsigned long index; /* Our offset within mapping. */
-
struct page *next_hash; /* Next page sharing our hash bucket in the pagecache hash table. */
-
atomic_t count; /* Usage count, see below. */
-
unsigned long flags; /* atomic flags, some possibly updated asynchronously */
-
struct list_head lru; /* Pageout list, eg. active_list; protected by pagemap_lru_lock !! */
-
wait_queue_head_t wait; /* Page locked? Stand in line... */
-
struct page **pprev_hash; /* Complement to *next_hash. */
-
struct buffer_head * buffers; /* Buffer maps us to a disk block. */
-
void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */
-
struct zone_struct *zone; /* Memory zone we are in. */
-
} mem_map_t;
二.代码分析
start_kernel-->kmem_cache_sizes_init-->kmem_cache_create-->kmem_cache_alloc
-->kmem_cache_grow-->kmem_getpages-->__get_free_pages
在linux-2.4.18/include/linux/mm.h:362中
-
static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
-
{
-
if (order >= MAX_ORDER) //默认MAX_ORDER=10,所以这儿最多能分配2^9=512个页面2M内存
-
return NULL;
-
return _alloc_pages(gfp_mask, order);
-
}
mm/page_alloc.c中
-
struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
-
{
-
return __alloc_pages(gfp_mask, order, contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
-
}
在mm/page_alloc.c中
-
/*
-
* This is the 'heart' of the zoned buddy allocator:
-
*/
-
struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
-
{
-
unsigned long min;
-
zone_t **zone, * classzone;
-
struct page * page;
-
int freed;
-
-
zone = zonelist->zones; //contig_page_data.node_zonelists中的项
-
classzone = *zone;
-
min = 1UL << order; //order=0,1<<0=1,所以要分配的最小的页面是1个
-
//假设当前是查找zone_normal,如果if(z->free_pages>min)说明normal中空闲pages不足,
-
//zonelist中nomal的下一项是zone_normal,那么就到zone_dma中去查找
-
for (;;) {
-
zone_t *z = *(zone++);
-
if (!z) //每个zonelist都是以0结尾的
-
break;
-
-
min += z->pages_low; //pages_low=510是zone_balance_max中规定的,所以执行后min=511
-
if (z->free_pages > min) {
-
page = rmqueue(z, order);
-
if (page)
-
return page;
-
}
-
}
-
-
classzone->need_balance = 1;
-
mb();
-
if (waitqueue_active(&kswapd_wait))
-
wake_up_interruptible(&kswapd_wait);
-
-
zone = zonelist->zones;
-
min = 1UL << order;
-
for (;;) {
-
unsigned long local_min;
-
zone_t *z = *(zone++);
-
if (!z)
-
break;
-
-
local_min = z->pages_min;
-
if (!(gfp_mask & __GFP_WAIT))
-
local_min >>= 2;
-
min += local_min;
-
if (z->free_pages > min) {
-
page = rmqueue(z, order);
-
if (page)
-
return page;
-
}
-
}
-
-
/* here we're in the low on memory slow path */
-
-
rebalance:
-
if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
-
zone = zonelist->zones;
-
for (;;) {
-
zone_t *z = *(zone++);
-
if (!z)
-
break;
-
-
page = rmqueue(z, order);
-
if (page)
-
return page;
-
}
-
return NULL;
-
}
-
-
/* Atomic allocations - we can't balance anything */
-
if (!(gfp_mask & __GFP_WAIT))
-
return NULL;
-
-
page = balance_classzone(classzone, gfp_mask, order, &freed);
-
if (page)
-
return page;
-
-
zone = zonelist->zones;
-
min = 1UL << order;
-
for (;;) {
-
zone_t *z = *(zone++);
-
if (!z)
-
break;
-
-
min += z->pages_min;
-
if (z->free_pages > min) {
-
page = rmqueue(z, order);
-
if (page)
-
return page;
-
}
-
}
-
-
/* Don't let big-order allocations loop */
-
if (order > 3)
-
return NULL;
-
-
/* Yield for kswapd, and try again */
-
current->policy |= SCHED_YIELD;
-
__set_current_state(TASK_RUNNING);
-
schedule();
-
goto rebalance;
-
}
为什么
在mm/page_alloc.c中__alloc_pages-->rmqueue
-
static struct page * rmqueue(zone_t *zone, unsigned int order)
-
{
-
free_area_t * area = zone->free_area + order; //free_area[order]就是free_list中含order个空闲页面的数组
-
unsigned int curr_order = order;
-
struct list_head *head, *curr;
-
unsigned long flags;
-
struct page *page;
-
-
spin_lock_irqsave(&zone->lock, flags);
-
do {
-
head = &area->free_list;
-
curr = memlist_next(head);
-
-
if (curr != head) { //curr!=head说明该order的链表不为空
-
unsigned int index;
-
-
page = memlist_entry(curr, struct page, list); //从head的链表中取第1个page返回
-
if (BAD_RANGE(zone,page))
-
BUG();
-
memlist_del(curr); //把curr从head的链表中移除,即链表中不再管理刚分配的page
-
index = page - zone->zone_mem_map;
-
if (curr_order != MAX_ORDER-1)
-
MARK_USED(index, curr_order, area);
-
zone->free_pages -= 1UL << order; //zone中含有的free_pages减1
-
//如果从order的链表中就分配到了page则order==curr_order,下面这个expand里面就直接返回
-
//如果从order的链表中没有分配到page,则curr_order++之后,curr_order>order则expand中就会执行
-
page = expand(zone, page, index, order, curr_order, area);
-
spin_unlock_irqrestore(&zone->lock, flags);
-
-
set_page_count(page, 1);
-
if (BAD_RANGE(zone,page))
-
BUG();
-
if (PageLRU(page))
-
BUG();
-
if (PageActive(page))
-
BUG();
-
return page;
-
}
-
curr_order++; //curr==head说明该order的链表为空,
-
area++; //就到up-level的order中去找
-
} while (curr_order < MAX_ORDER);
-
spin_unlock_irqrestore(&zone->lock, flags);
-
-
return NULL;
-
}
在mm/page_alloc.c中L159
__alloc_pages-->rmqueue-->expand
-
static inline struct page * expand (zone_t *zone, struct page *page,
-
unsigned long index, int low, int high, free_area_t * area)
-
{
-
unsigned long size = 1 << high;
-
//将8作为order=1的结点插入order=1中
-
//将10作为order=0的结点插入order=0中
-
//则11就是分配到的page的结点
-
while (high > low) {
-
if (BAD_RANGE(zone,page))
-
BUG();
-
area--;
-
high--;
-
size >>= 1;
-
memlist_add_head(&(page)->list, &(area)->free_list);
-
MARK_USED(index, high, area);
-
index += size;
-
page += size;
-
}
-
if (BAD_RANGE(zone,page))
-
BUG();
-
return page;
-
}
开始时的状态:
第1次alloc_page(order=0)时的状态:
第2次alloc_page(order=0)时的状态:
阅读(1663) | 评论(0) | 转发(0) |