Chinaunix首页 | 论坛 | 博客
  • 博客访问: 620263
  • 博文数量: 69
  • 博客积分: 1891
  • 博客等级: 上尉
  • 技术积分: 1359
  • 用 户 组: 普通用户
  • 注册时间: 2010-11-20 23:38
文章分类

全部博文(69)

文章存档

2012年(46)

2011年(23)

分类: LINUX

2012-03-19 23:31:16

   之前介绍了内存模型的node,接着介绍zone——内存域,内存域分不同的类型,内核用以下常量来枚举系统中的所有内存域:

点击(此处)折叠或打开

  1. enum zone_type {
  2. #ifdef CONFIG_ZONE_DMA
  3. /*
  4. * ZONE_DMA is used when there are devices that are not able
  5. * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
  6. * carve out the portion of memory that is needed for these devices.
  7. * The range is arch specific.
  8. *
  9. * Some examples
  10. *
  11. * Architecture Limit
  12. * ---------------------------
  13. * parisc, ia64, sparc <4G
  14. * s390 <2G
  15. * arm Various
  16. * alpha Unlimited or 0-16MB.
  17. *
  18. * i386, x86_64 and multiple other arches
  19. * <16M.
  20. */
  21. ZONE_DMA,
  22. #endif
  23. #ifdef CONFIG_ZONE_DMA32
  24. /*
  25. * x86_64 needs two ZONE_DMAs because it supports devices that are
  26. * only able to do DMA to the lower 16M but also 32 bit devices that
  27. * can only do DMA areas below 4G.
  28. */
  29. ZONE_DMA32,
  30. #endif
  31. /*
  32. * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
  33. * performed on pages in ZONE_NORMAL if the DMA devices support
  34. * transfers to all addressable memory.
  35. */
  36. ZONE_NORMAL,
  37. #ifdef CONFIG_HIGHMEM
  38. /*
  39. * A memory area that is only addressable by the kernel through
  40. * mapping portions into its own address space. This is for example
  41. * used by i386 to allow the kernel to address the memory beyond
  42. * 900MB. The kernel will set up special mappings (page
  43. * table entries on i386) for each page that the kernel needs to
  44. * access.
  45. */
  46. ZONE_HIGHMEM,
  47. #endif
  48. ZONE_MOVABLE,
  49. __MAX_NR_ZONES
  50. };
  • ZONE_DMA:标记适合DMA的内存域
  • ZONE_DMA32:标记了使用32位地址字可寻址、适合DMA的内存域
  • ZONE_NORMAL:标记了可以直接映射到内核段的普通内存
  • ZONE_HIGHMEM:标记了超出内核段的物理内存
  • ZONE_MOVABLE:供防止物理内存碎片的极致使用,是一个伪内存域
  • __MAX_NR_ZONES:表示结束标记,在迭代系统中的所有内存时,会使用该变量
表示内存域的代码如下:

点击(此处)折叠或打开

  1. struct zone {
  2.         /* Fields commonly accessed by the page allocator */

  3.         /* zone watermarks, access with *_wmark_pages(zone) macros */
  4.         unsigned long watermark[NR_WMARK];

  5.         /*
  6.          * When free pages are below this point, additional steps are taken
  7.          * when reading the number of free pages to avoid per-cpu counter
  8.          * drift allowing watermarks to be breached
  9.          */
  10.         unsigned long percpu_drift_mark;

  11.         /*
  12.          * We don't know if the memory that we're going to allocate will be freeable
  13.          * or/and it will be released eventually, so to avoid totally wasting several
  14.          * GB of ram we must reserve some of the lower zone memory (otherwise we risk
  15.          * to run OOM on the lower zones despite there's tons of freeable ram
  16.          * on the higher zones). This array is recalculated at runtime if the
  17.          * sysctl_lowmem_reserve_ratio sysctl changes.
  18.          */
  19.         unsigned long lowmem_reserve[MAX_NR_ZONES];

  20. #ifdef CONFIG_NUMA
  21.         int node;
  22.         /*
  23.          * zone reclaim becomes active if more unmapped pages exist.
  24.          */
  25.         unsigned long min_unmapped_pages;
  26.         unsigned long min_slab_pages;
  27. #endif
  28.         struct per_cpu_pageset __percpu *pageset;
  29.         /*
  30.          * free areas of different sizes
  31.          */
  32.         spinlock_t lock;
  33.         int all_unreclaimable; /* All pages pinned */

  34. #ifdef CONFIG_MEMORY_HOTPLUG
  35.         /* see spanned/present_pages for more description */
  36.         seqlock_t span_seqlock;
  37. #endif
  38.         struct free_area free_area[MAX_ORDER];

  39. #ifndef CONFIG_SPARSEMEM
  40.         /*
  41.          * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
  42.          * In SPARSEMEM, this map is stored in struct mem_section
  43.          */
  44.         unsigned long *pageblock_flags;
  45. #endif /* CONFIG_SPARSEMEM */

  46. #ifdef CONFIG_COMPACTION
  47.         /*
  48.          * On compaction failure, 1<<compact_defer_shift compactions
  49.          * are skipped before trying again. The number attempted since
  50.          * last failure is tracked with compact_considered.
  51.          */
  52.         unsigned int compact_considered;
  53.         unsigned int compact_defer_shift;
  54. #endif

  55.         ZONE_PADDING(_pad1_)

  56.         /* Fields commonly accessed by the page reclaim scanner */
  57.         spinlock_t lru_lock;
  58.         struct zone_lru {
  59.                 struct list_head list;
  60.         } lru[NR_LRU_LISTS];

  61.         struct zone_reclaim_stat reclaim_stat;

  62.         unsigned long pages_scanned; /* since last reclaim */
  63.         unsigned long flags; /* zone flags, see below */

  64.         /* Zone statistics */
  65.         atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];

  66.  /*
  67.          * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
  68.          * this zone's LRU. Maintained by the pageout code.
  69.          */
  70.         unsigned int inactive_ratio;


  71.         ZONE_PADDING(_pad2_)
  72.         /* Rarely used or read-mostly fields */

  73.         /*
  74.          * wait_table -- the array holding the hash table
  75.          * wait_table_hash_nr_entries -- the size of the hash table array
  76.          * wait_table_bits -- wait_table_size == (1 << wait_table_bits)
  77.          *
  78.          * The purpose of all these is to keep track of the people
  79.          * waiting for a page to become available and make them
  80.          * runnable again when possible. The trouble is that this
  81.          * consumes a lot of space, especially when so few things
  82.          * wait on pages at a given time. So instead of using
  83.          * per-page waitqueues, we use a waitqueue hash table.
  84.          *
  85.          * The bucket discipline is to sleep on the same queue when
  86.          * colliding and wake all in that wait queue when removing.
  87.          * When something wakes, it must check to be sure its page is
  88.          * truly available, a la thundering herd. The cost of a
  89.          * collision is great, but given the expected load of the
  90.          * table, they should be so rare as to be outweighed by the
  91.          * benefits from the saved space.
  92.          *
  93.          * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
  94.          * primary users of these fields, and in mm/page_alloc.c
  95.          * free_area_init_core() performs the initialization of them.
  96.          */
  97.         wait_queue_head_t * wait_table;

  98. unsigned long wait_table_hash_nr_entries;
  99.         unsigned long wait_table_bits;

  100.         /*
  101.          * Discontig memory support fields.
  102.          */
  103.         struct pglist_data *zone_pgdat;
  104.         /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
  105.         unsigned long zone_start_pfn;

  106.         /*
  107.          * zone_start_pfn, spanned_pages and present_pages are all
  108.          * protected by span_seqlock. It is a seqlock because it has
  109.          * to be read outside of zone->lock, and it is done in the main
  110.          * allocator path. But, it is written quite infrequently.
  111.          *
  112.          * The lock is declared along with zone->lock because it is
  113.          * frequently read in proximity to zone->lock. It's good to
  114.          * give them a chance of being in the same cacheline.
  115.          */
  116.         unsigned long spanned_pages; /* total size, including holes */
  117.         unsigned long present_pages; /* amount of memory (excluding holes) */

  118.         /*
  119.          * rarely used fields:
  120.          */
  121.         const char *name;
  122. } ____cacheline_internodealigned_in_smp;
这个结构比较大,简单介绍一下:
以下是被页分配器(page allocator)访问的字段:
  • unsigned long watermark[NR_WMARK]:代表页换出时使用的水印pages_min,pages_high,pages_low,NR_WMARK是定义在枚举类zone_watermarks里

    点击(此处)折叠或打开

    1. enum zone_watermarks {
    2. WMARK_MIN,
    3. WMARK_LOW,
    4. WMARK_HIGH,
    5. NR_WMARK
    6. };
    这三个成员会影响交换守护进程的行为:
    • WMARK_HIGH:如果空闲页数多于watermark[WMARK_HIGH],则内存域的状态是理想的
    • WMARK_LOW:如果内存页少于watermark[WMARK_LOW],则内核开始将内存里的页换出到硬盘
    • WMARK_MIN:如果空闲页数少于watermark[WMARK_MIN],则内核中急需空闲页,此时页回收的压力比较大
  • unsigned long percpu_drift_mark:这个字段比较玄乎,注视也看的不是很明白,英语不够好,大概的意思是说,在空闲页的数目少于这个点percpu_drift_mark的时候,当读取和空闲页数一样的内存页时,系统会采取额外的工作,防止单CPU页数漂移,从而导致水印被破坏。
  •  unsigned long   lowmem_reserve[MAX_NR_ZONES]:这个是为各种内存域预留的页,用于一些不能失败的关键性内存分配
  •  struct per_cpu_pageset __percpu *pageset:用于实现每个CPU的热/冷页帧的列表
  • struct free_area        free_area[MAX_ORDER]:是用于伙伴系统的,每个数组元素指向对应阶也表的数组开头
以下是供页帧回收扫描器(page reclaim scanner)访问的字段,scanner会跟据页帧的活动情况对内存域中使用的页进行编目。如果页帧被频繁访问,则是活动的,相反则是不活动的,在需要换出页帧时,这样的信息是很重要的: 
  • spinlock_t  lru_lock:
  • unsigned long           pages_scanned:指上次回收页时扫描了多少页
  • flages 描述当前内存的状态

    点击(此处)折叠或打开

    1. typedef enum {
    2. ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 防止并发回收
    3. ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */内存域立即可以被回收
    4. ZONE_CONGESTED, /* zone has many dirty pages backed by
    5. * a congested BDI
    6. */内存域里有很多脏页
    7. } zone_flags_t;

  •   atomic_long_t           vm_stat[NR_VM_ZONE_STAT_ITEMS] :内存域的统计信息
  • unsigned int inactive_ratio :不活动页的比例
接着是一些很少使用或者大部分情况下是只读的字段:
  • wait_table wait_table_hash_nr_entries wait_table_bits形成等待列队,可以等待某一页可供进程使用
  • struct pglist_data      *zone_pgdat 指向节点的指针
  •  unsigned long           zone_start_pfn: zone_start_pfn == zone_start_paddr >> PAGE_SHIFT,只内存域的第一个页帧
  •  unsigned long           spanned_pages;总页数,包含空洞
  • unsigned long           present_pages; 可用页数,不包哈空洞
  •  const char              *name:内存域的惯用名词,有三个可选Normal,DMA,HighMem


阅读(4108) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~