Chinaunix首页 | 论坛 | 博客
  • 博客访问: 716101
  • 博文数量: 183
  • 博客积分: 2650
  • 博客等级: 少校
  • 技术积分: 1428
  • 用 户 组: 普通用户
  • 注册时间: 2008-11-22 17:02
文章分类
文章存档

2017年(1)

2015年(46)

2014年(4)

2013年(8)

2012年(2)

2011年(27)

2010年(35)

2009年(60)

分类: LINUX

2009-10-09 18:24:20

:alloc_pages()/alloc_page 实现分析

 函数原型:

         alloc_pages(unsigned int gfp_mask, unsigned int order);

         alloc_page(unsigned int  gfp_mask);

其中alloc_pages()来了分配多页内存,alloc_page()只用来分配单页内存.

在内核中,alloc_page就是调用了alloc_pages(gfp_mask,0)来实现的

gfp_mask表示分配的标志.

关于alloc_pages/alloc_page的调用,请参阅相关的的资料

此外,alloc_pages返回的是page结构,如果要使用分配到的内存,还需使用void *page_address(struct page *page)将其转换一下.

看一下具体的代码:

static inline struct page *

alloc_pages(unsigned int gfp_mask, unsigned int order)

{

         //参数有效性判断.MAX_PRDER通常被定义为11,所以,最大只能分得2^10大小的内存

         if (unlikely(order >= MAX_ORDER))

                   return NULL;

         return alloc_pages_current(gfp_mask, order);

}

struct page *alloc_pages_current(unsigned gfp, unsigned order)

{

         //当前进程的内存分配策略,通常为NULL

         struct mempolicy *pol = current->mempolicy;

 

         if (!pol || in_interrupt())

                   pol = &default_policy;

         if (pol->policy == MPOL_INTERLEAVE)

                   return alloc_page_interleave(gfp, order, interleave_nodes(pol));

         return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));

}

Zonelist_policy():根据参数标志和当前CPU节点得到合适的zone_list.

整体流程如下:


跟踪进__alloc_pages()

struct page * fastcall

__alloc_pages(unsigned int gfp_mask, unsigned int order,

                   struct zonelist *zonelist)

{

         const int wait = gfp_mask & __GFP_WAIT;

         unsigned long min;

         struct zone **zones, *z;

         struct page *page;

         struct reclaim_state reclaim_state;

         struct task_struct *p = current;

         int i;

         int alloc_type;

         int do_retry;

         int can_try_harder;

         //自旋锁睡眠调试函数,在没有开启相关DEBUG开关的情况下,此函数即为空

         might_sleep_if(wait);

        

         can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;

         //取得zonelist中的zone数组

         zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */

 

         //如果管理区为空,退出

if (unlikely(zones[0] == NULL)) {

                   /* Should this ever happen?? */

                   return NULL;

         }

         //所要分配的类型,比如说ZONE_DMA

         alloc_type = zone_idx(zones[0]);

 

         /* Go through the zonelist once, looking for a zone with enough free */

         for (i = 0; (z = zones[i]) != NULL; i++) {

                   //找到一个合适大小的zone

                   //判断分配之后的内存是否超过所充许的低位pages_low

                   min = z->pages_low + (1<protection[alloc_type];

                   if (z->free_pages < min)

                            continue;

                   //从此zone中分配内存页面

                   page = buffered_rmqueue(z, order, gfp_mask);

                   if (page)

                            goto got_pg;

         }

        

//分配内存失败了,唤醒kswapd进行内存回收

         for (i = 0; (z = zones[i]) != NULL; i++)

                   wakeup_kswapd(z);

 

         /*

          * Go through the zonelist again. Let __GFP_HIGH and allocations

          * coming from realtime tasks to go deeper into reserves

          */

         for (i = 0; (z = zones[i]) != NULL; i++) {

                   //进行内存回收之后,重新寻找有空闲的zone,这次降低了低位要求

                   min = z->pages_min;

                   if (gfp_mask & __GFP_HIGH)

                            min /= 2;

                   if (can_try_harder)

                            min -= min / 4;

                   min += (1<protection[alloc_type];

 

                   if (z->free_pages < min)

                            continue;

 

                   page = buffered_rmqueue(z, order, gfp_mask);

                   if (page)

                            goto got_pg;

         }

 

         //如果运行到这里的话,说明页面分配依然是失败的

         //如果请求内存分配的进程是内存分配工作者”,比如说kswapd,这类型进程是为内存分配而工作的,//mm带有标志PF_MEMALLOC .或者进程是在做”out of memeroy”之类的工具,那就什么都不管了,只要//管理区中有内存就分配给它

         if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {

                   /* go through the zonelist yet again, ignoring mins */

                   for (i = 0; (z = zones[i]) != NULL; i++) {

                            page = buffered_rmqueue(z, order, gfp_mask);

                            if (page)

                                     goto got_pg;

                   }

                   goto nopage;

         }

 

         /* Atomic allocations - we can't balance anything */

         if (!wait)

                   goto nopage;

//如果运行到这里的话,说明zone 区的空闲内存实在太少,调用try_to_free_pages进行内存回收.把磁盘缓存区,slab缓存区中的页面释放,inactive_list中的页面交换至磁盘,然后再回收该项页面

rebalance:

         /* We now go into synchronous reclaim */

         p->flags |= PF_MEMALLOC;

         reclaim_state.reclaimed_slab = 0;

         p->reclaim_state = &reclaim_state;

 

         try_to_free_pages(zones, gfp_mask, order);

 

         p->reclaim_state = NULL;

         p->flags &= ~PF_MEMALLOC;

 

         /* go through the zonelist yet one more time */

//回收过后,再请求内存

         for (i = 0; (z = zones[i]) != NULL; i++) {

                   min = z->pages_min;

                   if (gfp_mask & __GFP_HIGH)

                            min /= 2;

                   if (can_try_harder)

                            min -= min / 4;

                   min += (1<protection[alloc_type];

 

                   if (z->free_pages < min)

                       &

阅读(945) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~