分类: LINUX
2008-11-25 18:56:26
三:alloc_pages()/alloc_page 实现分析
函数原型:
alloc_pages(unsigned int gfp_mask, unsigned int order);
alloc_page(unsigned int gfp_mask);
其中alloc_pages()来了分配多页内存,而alloc_page()只用来分配单页内存.
在内核中,alloc_page就是调用了alloc_pages(gfp_mask,0)来实现的
gfp_mask表示分配的标志.
关于alloc_pages/alloc_page的调用,请参阅相关的的资料
此外,alloc_pages返回的是page结构,如果要使用分配到的内存,还需使用void *page_address(struct page *page)将其转换一下.
看一下具体的代码:
static inline struct page *
alloc_pages(unsigned int gfp_mask, unsigned int order)
{
//参数有效性判断.MAX_PRDER通常被定义为11,所以,最大只能分得2^10大小的内存
if (unlikely(order >= MAX_ORDER))
return NULL;
return alloc_pages_current(gfp_mask, order);
}
struct page *alloc_pages_current(unsigned gfp, unsigned order)
{
//当前进程的内存分配策略,通常为NULL
struct mempolicy *pol = current->mempolicy;
if (!pol || in_interrupt())
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
return alloc_page_interleave(gfp, order, interleave_nodes(pol));
return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
}
Zonelist_policy():根据参数标志和当前CPU节点得到合适的zone_list.
跟踪进__alloc_pages()
struct page * fastcall
__alloc_pages(unsigned int gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
const int wait = gfp_mask & __GFP_WAIT;
unsigned long min;
struct zone **zones, *z;
struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
int i;
int alloc_type;
int do_retry;
int can_try_harder;
//自旋锁睡眠调试函数,在没有开启相关DEBUG开关的情况下,此函数即为空
might_sleep_if(wait);
can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
//取得zonelist中的zone数组
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
//如果管理区为空,退出
if (unlikely(zones[0] == NULL)) {
/* Should this ever happen?? */
return NULL;
}
//所要分配的类型,比如说ZONE_DMA
alloc_type = zone_idx(zones[0]);
/* Go through the zonelist once, looking for a zone with enough free */
for (i = 0; (z = zones[i]) != NULL; i++) {
//找到一个合适大小的zone区
//判断分配之后的内存是否超过所充许的低位pages_low
min = z->pages_low + (1<
if (z->free_pages < min)
continue;
//从此zone中分配内存页面
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
//分配内存失败了,唤醒kswapd进行内存回收
for (i = 0; (z = zones[i]) != NULL; i++)
wakeup_kswapd(z);
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks to go deeper into reserves
*/
for (i = 0; (z = zones[i]) != NULL; i++) {
//进行内存回收之后,重新寻找有空闲的zone区,这次降低了低位要求
min = z->pages_min;
if (gfp_mask & __GFP_HIGH)
min /= 2;
if (can_try_harder)
min -= min / 4;
min += (1<
if (z->free_pages < min)
continue;
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
//如果运行到这里的话,说明页面分配依然是失败的
//如果请求内存分配的进程是”内存分配工作者”,比如说kswapd,这类型进程是为内存分配而工作的,进//程mm带有标志PF_MEMALLOC .或者进程是在做”out of memeroy”之类的工具,那就什么都不管了,只要//管理区中有内存就分配给它
if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
/* go through the zonelist yet again, ignoring mins */
for (i = 0; (z = zones[i]) != NULL; i++) {
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
goto nopage;
}
/* Atomic allocations - we can't balance anything */
if (!wait)
goto nopage;
//如果运行到这里的话,说明zone 区的空闲内存实在太少,调用try_to_free_pages进行内存回收.把磁盘缓存区,slab缓存区中的页面释放,将inactive_list中的页面交换至磁盘,然后再回收该项页面
rebalance:
/* We now go into synchronous reclaim */
p->flags |= PF_MEMALLOC;
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
try_to_free_pages(zones, gfp_mask, order);
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
/* go through the zonelist yet one more time */
//回收过后,再请求内存
for (i = 0; (z = zones[i]) != NULL; i++) {
min = z->pages_min;
if (gfp_mask & __GFP_HIGH)
min /= 2;
if (can_try_harder)
min -= min / 4;
min += (1<
if (z->free_pages < min)
&