Implementation: alloc_pages()-letmego163-ChinaUnix博客

涌泉闪闪@原创空间

首页　| 　博文目录　| 　关于我

letmego163

博客访问： 448704
博文数量： 123
博客积分： 2686
博客等级：少校
技术积分： 1349
用户组：普通用户
注册时间： 2009-12-23 22:11

文章分类

全部博文（123）

game（2）
mcore（1）
Virtual Address （1）
Page Coloring（1）
memory managemen（27）
Job（2）
configuration（1）
schedule（4）
common_kernel_AP（2）
kernel_patch（1）
多核研究（2）
Linux内核数据结（2）
U-boot-2009移植（8）
Linux内核源码（26）
Linux应用相关（11）
Linux驱动程序（11）
未分配的博文（21）

文章存档

2012年（3）

2011年（10）

2010年（100）

2009年（10）

我的朋友

周宏1990

相关博文

Implementation: alloc_pages()

分类： LINUX

2010-10-27 14:18:25

static inline struct page * alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_current(gfp_mask, order); }

/** * alloc_pages_current - Allocate pages. * * @gfp: * %GFP_USER user allocation, * %GFP_KERNEL kernel allocation, * %GFP_HIGHMEM highmem allocation, * %GFP_FS don't call back into a file system. * %GFP_ATOMIC don't sleep. * @order: Power of two of allocation size in pages. 0 is a single page. * * Allocate a page from the kernel page pool. When not in * interrupt context and apply the current process NUMA policy. * Returns NULL when no page can be allocated. * * Don't call cpuset_update_task_memory_state() unless * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { struct mempolicy *pol = current->mempolicy; struct page *page; if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; get_mems_allowed(); /* * No reference counting needed for current->mempolicy * nor system default_policy */ if (pol->mode == MPOL_INTERLEAVE) page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); else page = __alloc_pages_nodemask(gfp, order, policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); put_mems_allowed(); return page; } EXPORT_SYMBOL(alloc_pages_current);

/* * This is the 'heart' of the zoned buddy allocator. */ struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask); gfp_mask &= gfp_allowed_mask; lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); if (should_fail_alloc_page(gfp_mask, order)) return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) return NULL; get_mems_allowed(); /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); if (!preferred_zone) { put_mems_allowed(); return NULL; } /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); if (unlikely(!page)) page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; } EXPORT_SYMBOL(__alloc_pages_nodemask);

/* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, struct zone *preferred_zone, int migratetype) { struct zoneref *z; struct page *page = NULL; int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ classzone_idx = zone_idx(preferred_zone); zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) goto try_next_zone; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; int ret; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) goto try_this_zone; if (zone_reclaim_mode == 0) goto this_zone_full; ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ goto try_next_zone; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ goto this_zone_full; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) goto this_zone_full; } } try_this_zone: page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask, migratetype); if (page) break; this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z); try_next_zone: if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { /* * we do zlc_setup after the first zone is tried but only * if there are multiple nodes make it worthwhile */ allowednodes = zlc_setup(zonelist, alloc_flags); zlc_active = 1; did_zlc_setup = 1; } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ zlc_active = 0; goto zonelist_scan; } return page; }

/* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); again: if (likely(order == 0)) { struct per_cpu_pages *pcp; struct list_head *list; local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; if (list_empty(list)) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, migratetype, cold); if (unlikely(list_empty(list))) goto failed; } if (cold) page = list_entry(list->prev, struct page, lru); else page = list_entry(list->next, struct page, lru); list_del(&page->lru); pcp->count--; } else { if (unlikely(gfp_flags & __GFP_NOFAIL)) { /* * __GFP_NOFAIL is not to be used in new code. * * All __GFP_NOFAIL callers should be fixed so that they * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to * allocate greater than order-1 page units with * __GFP_NOFAIL. */ WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); spin_unlock(&zone->lock); if (!page) goto failed; __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); } __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone); local_irq_restore(flags); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page; failed: local_irq_restore(flags); return NULL; }

/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { int i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; /* * Split buddy pages returned by expand() are received here * in physical page order. The page is added to the callers and * list and the list head then moves forward. From the callers * perspective, the linked list is ordered by page number in * some conditions. This is useful for IO devices that can * merge IO requests if the physical pages are ordered * properly. */ if (likely(cold == 0)) list_add(&page->lru, list); else list_add_tail(&page->lru, list); set_page_private(page, migratetype); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock(&zone->lock); return i; }

/* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, int migratetype) { struct page *page; retry_reserve: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { page = __rmqueue_fallback(zone, order, migratetype); /* * Use MIGRATE_RESERVE rather than fail an allocation. goto * is used because __rmqueue_smallest is an inline function * and we want just one call site */ if (!page) { migratetype = MIGRATE_RESERVE; goto retry_reserve; } } trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; }

/* * Go through the free lists for the given migratetype and remove * the smallest available page from the freelists */ static inline struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; struct free_area * area; struct page *page; /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) continue; page = list_entry(area->free_list[migratetype].next, struct page, lru); list_del(&page->lru); rmv_page_order(page); area->nr_free--; expand(zone, page, order, current_order, area, migratetype); return page; } return NULL; }

/* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- wli */ static inline void expand(struct zone *zone, struct page *page, int low, int high, struct free_area *area, int migratetype) { unsigned long size = 1 << high; while (high > low) { area--; high--; size >>= 1; VM_BUG_ON(bad_range(zone, &page[size])); list_add(&page[size].lru, &area->free_list[migratetype]); area->nr_free++; set_page_order(&page[size], high); } }

/* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) { struct free_area * area; int current_order; struct page *page; int migratetype, i; /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { for (i = 0; i < MIGRATE_TYPES - 1; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) continue; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) continue; page = list_entry(area->free_list[migratetype].next, struct page, lru); area->nr_free--; /* * If breaking a large block of pages, move all free * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * agressive about taking ownership of free pages */ if (unlikely(current_order >= (pageblock_order >> 1)) || start_migratetype == MIGRATE_RECLAIMABLE || page_group_by_mobility_disabled) { unsigned long pages; pages = move_freepages_block(zone, page, start_migratetype); /* Claim the whole block if over half of it is free */ if (pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_migratetype); migratetype = start_migratetype; } /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) change_pageblock_range(page, current_order, start_migratetype); expand(zone, page, order, current_order, area, migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); return page; } } return NULL; }

阅读(1228) | 评论(1) | 转发(0) |

上一篇：__rmqueue_smallest()

下一篇：Data Structure

给主人留下些什么吧！~~

chinaunix网友2010-10-28 18:17:43

很好的, 收藏了推荐一个博客，提供很多免费软件编程电子书下载： http://free-ebooks.appspot.com

回复 | 举报

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6