Chinaunix首页 | 论坛 | 博客
  • 博客访问: 154456
  • 博文数量: 34
  • 博客积分: 938
  • 博客等级: 准尉
  • 技术积分: 440
  • 用 户 组: 普通用户
  • 注册时间: 2010-04-18 12:07






2012-03-15 15:54:08

 622 typedef struct pglist_data
 623     struct zone node_zones[MAX_NR_ZONES];//pglist_data中zone数组
 624     struct zonelist node_zonelists[MAX_ZONELISTS];//页面分配的策略,在NUMA中为2,在UMA中为1
 625     int nr_zones; //zone的个数
 626 #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
 627     struct page *node_mem_map;
 629     struct page_cgroup *node_page_cgroup;
 630 #endif
 631 #endif
 632 #ifndef CONFIG_NO_BOOTMEM
 633     struct bootmem_data *bdata;
 634 #endif
 636     /*
 637      * Must be held any time you expect node_start_pfn, node_present_pages
 638      * or node_spanned_pages stay constant.  Holding this will also
 639      * guarantee that any pfn_valid() stays that way.
 640      *
 641      * Nests above zone->lock and zone->size_seqlock.
 642      */
 643     spinlock_t node_size_lock;
 644 #endif
 645     unsigned long node_start_pfn;//最开始的pfn
 646     unsigned long node_present_pages; /* total number of physical pages */
 647     unsigned long node_spanned_pages; /* total size of physical page
 648                          range, including holes */
 649     int node_id;
 650     wait_queue_head_t kswapd_wait;
 651     struct task_struct *kswapd;
 652     int kswapd_max_order;
 653 } pg_data_t;

在pglist_data下面是zone, 这个结构比较大,但注释很详细。
struct zone_reclaim_stat {
     * The pageout code in vmscan.c keeps track of how many of the
     * mem/swap backed and file backed pages are refeferenced.
     * The higher the rotated/scanned ratio, the more valuable
     * that cache is.
     * The anon LRU stats live in [0], file LRU stats in [1]
    unsigned long        recent_rotated[2];
    unsigned long        recent_scanned[2];

     * accumulated for batching
    unsigned long        nr_saved_scan[NR_LRU_LISTS];

struct zone {
    /* Fields commonly accessed by the page allocator */

    /* zone watermarks, access with *_wmark_pages(zone) macros */
    unsigned long watermark[NR_WMARK];

     * When free pages are below this point, additional steps are taken
     * when reading the number of free pages to avoid per-cpu counter
     * drift allowing watermarks to be breached
    unsigned long percpu_drift_mark;

     * We don't know if the memory that we're going to allocate will be freeable
     * or/and it will be released eventually, so to avoid totally wasting several
     * GB of ram we must reserve some of the lower zone memory (otherwise we risk
     * to run OOM on the lower zones despite there's tons of freeable ram
     * on the higher zones). This array is recalculated at runtime if the
     * sysctl_lowmem_reserve_ratio sysctl changes.
    unsigned long        lowmem_reserve[MAX_NR_ZONES];

    int node;
     * zone reclaim becomes active if more unmapped pages exist.
    unsigned long        min_unmapped_pages;
    unsigned long        min_slab_pages;
    struct per_cpu_pageset __percpu *pageset;
     * free areas of different sizes
    spinlock_t        lock;
    int                     all_unreclaimable; /* All pages pinned */
    /* see spanned/present_pages for more description */
    seqlock_t        span_seqlock;
    struct free_area    free_area[MAX_ORDER];

     * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
     * In SPARSEMEM, this map is stored in struct mem_section
    unsigned long        *pageblock_flags;

     * On compaction failure, 1<     * are skipped before trying again. The number attempted since
     * last failure is tracked with compact_considered.
    unsigned int        compact_considered;
    unsigned int        compact_defer_shift;


    /* Fields commonly accessed by the page reclaim scanner */
    spinlock_t        lru_lock;    
    struct zone_lru {
        struct list_head list;
    } lru[NR_LRU_LISTS];

    struct zone_reclaim_stat reclaim_stat;

    unsigned long        pages_scanned;       /* since last reclaim */
    unsigned long        flags;           /* zone flags, see below */

    /* Zone statistics */
    atomic_long_t        vm_stat[NR_VM_ZONE_STAT_ITEMS];

     * prev_priority holds the scanning priority for this zone.  It is
     * defined as the scanning priority at which we achieved our reclaim
     * target at the previous try_to_free_pages() or balance_pgdat()
     * invocation.
     * We use prev_priority as a measure of how much stress page reclaim is
     * under - it drives the swappiness decision: whether to unmap mapped
     * pages.
     * Access to both this field is quite racy even on uniprocessor.  But
     * it is expected to average out OK.
    int prev_priority;

     * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
     * this zone's LRU.  Maintained by the pageout code.
    unsigned int inactive_ratio;

    /* Rarely used or read-mostly fields */

     * wait_table        -- the array holding the hash table
     * wait_table_hash_nr_entries    -- the size of the hash table array
     * wait_table_bits    -- wait_table_size == (1 << wait_table_bits)
     * The purpose of all these is to keep track of the people
     * waiting for a page to become available and make them
     * runnable again when possible. The trouble is that this
     * consumes a lot of space, especially when so few things
     * wait on pages at a given time. So instead of using
     * per-page waitqueues, we use a waitqueue hash table.
     * The bucket discipline is to sleep on the same queue when
     * colliding and wake all in that wait queue when removing.
     * When something wakes, it must check to be sure its page is
     * truly available, a la thundering herd. The cost of a
     * collision is great, but given the expected load of the
     * table, they should be so rare as to be outweighed by the
     * benefits from the saved space.
     * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
     * primary users of these fields, and in mm/page_alloc.c
     * free_area_init_core() performs the initialization of them.
    wait_queue_head_t    * wait_table;
    unsigned long        wait_table_hash_nr_entries;
    unsigned long        wait_table_bits;

     * Discontig memory support fields.
    struct pglist_data    *zone_pgdat;
    /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
    unsigned long        zone_start_pfn;

     * zone_start_pfn, spanned_pages and present_pages are all
     * protected by span_seqlock.  It is a seqlock because it has
     * to be read outside of zone->lock, and it is done in the main
     * allocator path.  But, it is written quite infrequently.
     * The lock is declared along with zone->lock because it is
     * frequently read in proximity to zone->lock.  It's good to
     * give them a chance of being in the same cacheline.
    unsigned long        spanned_pages;    /* total size, including holes */
    unsigned long        present_pages;    /* amount of memory (excluding holes) */

     * rarely used fields:
    const char        *name;
} ____cacheline_internodealigned_in_smp;
 * Each physical page in the system has a struct page associated with
 * it to keep track of whatever it is we are using the page for at the
 * moment. Note that we have no way to track which tasks are using
 * a page, though if it is a pagecache page, rmap structures can tell us
 * who is mapping it.
struct page {
    unsigned long flags;        /* Atomic flags, some possibly
                     * updated asynchronously */
    atomic_t _count;        /* Usage count, see below. */
    union {
        atomic_t _mapcount;    /* Count of ptes mapped in mms,
                     * to show when page is mapped
                     * & limit reverse map searches.
        struct {        /* SLUB */
            u16 inuse;
            u16 objects;
    union {
        struct {
        unsigned long private;        /* Mapping-private opaque data:
                          * usually used for buffer_heads
                         * if PagePrivate set; used for
                         * swp_entry_t if PageSwapCache;
                         * indicates order in the buddy
                         * system if PG_buddy is set.
        struct address_space *mapping;    /* If low bit clear, points to
                         * inode address_space, or NULL.
                         * If page mapped as anonymous
                         * memory, low bit is set, and
                         * it points to anon_vma object:
                         * see PAGE_MAPPING_ANON below.
        spinlock_t ptl;
        struct kmem_cache *slab;    /* SLUB: Pointer to slab */
        struct page *first_page;    /* Compound tail pages */
    union {
        pgoff_t index;        /* Our offset within mapping. */
        void *freelist;        /* SLUB: freelist req. slab lock */
    struct list_head lru;        /* Pageout list, eg. active_list
                     * protected by zone->lru_lock !
     * On machines where all RAM is mapped into kernel address space,
     * we can simply calculate the virtual address. On machines with
     * highmem some memory is mapped into kernel virtual memory
     * dynamically, so we need a place to store that address.
     * Note that this field could be 16 bits on x86 ... ;)
     * Architectures with slow multiplication can define
     * WANT_PAGE_VIRTUAL in asm/page.h
#if defined(WANT_PAGE_VIRTUAL)
    void *virtual;            /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
    unsigned long debug_flags;    /* Use atomic bitops on this */

     * kmemcheck wants to track the status of each byte in a page; this
     * is a pointer to such a status block. NULL if not tracked.
    void *shadow;

阅读(1745) | 评论(0) | 转发(0) |

登录 注册