物理内存结构分析-oujunli-ChinaUnix博客

oujunli的ChinaUnix博客ojl.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

oujunli

博客访问： 250977
博文数量： 34
博客积分： 938
博客等级：准尉
技术积分： 440
用户组：普通用户
注册时间： 2010-04-18 12:07

文章分类

全部博文（34）

性能（3）
杂文（10）
Linux（8）
Android（13）
未分配的博文（0）

文章存档

2012年（28）

2011年（6）

我的朋友

相关博文

物理内存结构分析

分类： LINUX

2012-03-15 15:54:08

物理内存中，pglist_data是管理物理内存的最高抽象。
622 typedef struct pglist_data
623     struct zone node_zones[MAX_NR_ZONES];//pglist_data中zone数组
624     struct zonelist node_zonelists[MAX_ZONELISTS];//页面分配的策略，在NUMA中为2，在UMA中为1
625     int nr_zones; //zone的个数
626 #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
627     struct page *node_mem_map;
628 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
629     struct page_cgroup *node_page_cgroup;
630 #endif
631 #endif
632 #ifndef CONFIG_NO_BOOTMEM
633     struct bootmem_data *bdata;
634 #endif
635 #ifdef CONFIG_MEMORY_HOTPLUG
636     /*
637      * Must be held any time you expect node_start_pfn, node_present_pages
638      * or node_spanned_pages stay constant. Holding this will also
639      * guarantee that any pfn_valid() stays that way.
640      *
641      * Nests above zone->lock and zone->size_seqlock.
642      */
643     spinlock_t node_size_lock;
644 #endif
645     unsigned long node_start_pfn;//最开始的pfn
646     unsigned long node_present_pages; /* total number of physical pages */
647     unsigned long node_spanned_pages; /* total size of physical page
648                          range, including holes */
649     int node_id;
650     wait_queue_head_t kswapd_wait;
651     struct task_struct *kswapd;
652     int kswapd_max_order;
653 } pg_data_t;

在pglist_data下面是zone，这个结构比较大，但注释很详细。
struct zone_reclaim_stat {
   /*
   * The pageout code in vmscan.c keeps track of how many of the
   * mem/swap backed and file backed pages are refeferenced.
   * The higher the rotated/scanned ratio, the more valuable
   * that cache is.
   *
   * The anon LRU stats live in [0], file LRU stats in [1]
   */
   unsigned long       recent_rotated[2];
   unsigned long       recent_scanned[2];

   /*
   * accumulated for batching
   */
   unsigned long       nr_saved_scan[NR_LRU_LISTS];
};

struct zone {
   /* Fields commonly accessed by the page allocator */

   /* zone watermarks, access with *_wmark_pages(zone) macros */
   unsigned long watermark[NR_WMARK];

   /*
   * When free pages are below this point, additional steps are taken
   * when reading the number of free pages to avoid per-cpu counter
   * drift allowing watermarks to be breached
   */
   unsigned long percpu_drift_mark;

   /*
   * We don't know if the memory that we're going to allocate will be freeable
   * or/and it will be released eventually, so to avoid totally wasting several
   * GB of ram we must reserve some of the lower zone memory (otherwise we risk
   * to run OOM on the lower zones despite there's tons of freeable ram
   * on the higher zones). This array is recalculated at runtime if the
   * sysctl_lowmem_reserve_ratio sysctl changes.
   */
   unsigned long       lowmem_reserve[MAX_NR_ZONES];

#ifdef CONFIG_NUMA
   int node;
   /*
   * zone reclaim becomes active if more unmapped pages exist.
   */
   unsigned long       min_unmapped_pages;
   unsigned long       min_slab_pages;
#endif
   struct per_cpu_pageset __percpu *pageset;
   /*
   * free areas of different sizes
   */
   spinlock_t       lock;
   int                     all_unreclaimable; /* All pages pinned */
#ifdef CONFIG_MEMORY_HOTPLUG
   /* see spanned/present_pages for more description */
   seqlock_t       span_seqlock;
#endif
   struct free_area   free_area[MAX_ORDER];

#ifndef CONFIG_SPARSEMEM
   /*
   * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
   * In SPARSEMEM, this map is stored in struct mem_section
   */
   unsigned long       *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_COMPACTION
   /*
   * On compaction failure, 1<   * are skipped before trying again. The number attempted since
   * last failure is tracked with compact_considered.
   */
   unsigned int       compact_considered;
   unsigned int       compact_defer_shift;
#endif

   ZONE_PADDING(_pad1_)

   /* Fields commonly accessed by the page reclaim scanner */
   spinlock_t       lru_lock;
   struct zone_lru {
       struct list_head list;
   } lru[NR_LRU_LISTS];

   struct zone_reclaim_stat reclaim_stat;

   unsigned long       pages_scanned;       /* since last reclaim */
   unsigned long       flags;           /* zone flags, see below */

   /* Zone statistics */
   atomic_long_t       vm_stat[NR_VM_ZONE_STAT_ITEMS];

   /*
   * prev_priority holds the scanning priority for this zone. It is
   * defined as the scanning priority at which we achieved our reclaim
   * target at the previous try_to_free_pages() or balance_pgdat()
   * invocation.
   *
   * We use prev_priority as a measure of how much stress page reclaim is
   * under - it drives the swappiness decision: whether to unmap mapped
   * pages.
   *
   * Access to both this field is quite racy even on uniprocessor. But
   * it is expected to average out OK.
   */
   int prev_priority;

   /*
   * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
   * this zone's LRU. Maintained by the pageout code.
   */
   unsigned int inactive_ratio;

   ZONE_PADDING(_pad2_)
   /* Rarely used or read-mostly fields */

   /*
   * wait_table       -- the array holding the hash table
   * wait_table_hash_nr_entries   -- the size of the hash table array
   * wait_table_bits   -- wait_table_size == (1 << wait_table_bits)
   *
   * The purpose of all these is to keep track of the people
   * waiting for a page to become available and make them
   * runnable again when possible. The trouble is that this
   * consumes a lot of space, especially when so few things
   * wait on pages at a given time. So instead of using
   * per-page waitqueues, we use a waitqueue hash table.
   *
   * The bucket discipline is to sleep on the same queue when
   * colliding and wake all in that wait queue when removing.
   * When something wakes, it must check to be sure its page is
   * truly available, a la thundering herd. The cost of a
   * collision is great, but given the expected load of the
   * table, they should be so rare as to be outweighed by the
   * benefits from the saved space.
   *
   * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
   * primary users of these fields, and in mm/page_alloc.c
   * free_area_init_core() performs the initialization of them.
   */
   wait_queue_head_t   * wait_table;
   unsigned long       wait_table_hash_nr_entries;
   unsigned long       wait_table_bits;

   /*
   * Discontig memory support fields.
   */
   struct pglist_data   *zone_pgdat;
   /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
   unsigned long       zone_start_pfn;

   /*
   * zone_start_pfn, spanned_pages and present_pages are all
   * protected by span_seqlock. It is a seqlock because it has
   * to be read outside of zone->lock, and it is done in the main
   * allocator path. But, it is written quite infrequently.
   *
   * The lock is declared along with zone->lock because it is
   * frequently read in proximity to zone->lock. It's good to
   * give them a chance of being in the same cacheline.
   */
   unsigned long       spanned_pages;   /* total size, including holes */
   unsigned long       present_pages;   /* amount of memory (excluding holes) */

   /*
   * rarely used fields:
   */
   const char       *name;
} ____cacheline_internodealigned_in_smp;
自己看吧，遇到要用的时候再回过头来看一下。
最后一个结构是page。
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
* a page, though if it is a pagecache page, rmap structures can tell us
* who is mapping it.
*/
struct page {
   unsigned long flags;       /* Atomic flags, some possibly
                   * updated asynchronously */
   atomic_t _count;       /* Usage count, see below. */
   union {
       atomic_t _mapcount;   /* Count of ptes mapped in mms,
                   * to show when page is mapped
                   * & limit reverse map searches.
                   */
       struct {       /* SLUB */
           u16 inuse;
           u16 objects;
       };
   };
   union {
        struct {
       unsigned long private;       /* Mapping-private opaque data:
                      * usually used for buffer_heads
                       * if PagePrivate set; used for
                       * swp_entry_t if PageSwapCache;
                       * indicates order in the buddy
                       * system if PG_buddy is set.
                       */
       struct address_space *mapping;   /* If low bit clear, points to
                       * inode address_space, or NULL.
                       * If page mapped as anonymous
                       * memory, low bit is set, and
                       * it points to anon_vma object:
                       * see PAGE_MAPPING_ANON below.
                       */
        };
#if USE_SPLIT_PTLOCKS
        spinlock_t ptl;
#endif
        struct kmem_cache *slab;   /* SLUB: Pointer to slab */
        struct page *first_page;   /* Compound tail pages */
   };
   union {
       pgoff_t index;       /* Our offset within mapping. */
       void *freelist;       /* SLUB: freelist req. slab lock */
   };
   struct list_head lru;       /* Pageout list, eg. active_list
                   * protected by zone->lru_lock !
                   */
   /*
   * On machines where all RAM is mapped into kernel address space,
   * we can simply calculate the virtual address. On machines with
   * highmem some memory is mapped into kernel virtual memory
   * dynamically, so we need a place to store that address.
   * Note that this field could be 16 bits on x86 ... ;)
   *
   * Architectures with slow multiplication can define
   * WANT_PAGE_VIRTUAL in asm/page.h
   */
#if defined(WANT_PAGE_VIRTUAL)
   void *virtual;           /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
   unsigned long debug_flags;   /* Use atomic bitops on this */
#endif

#ifdef CONFIG_KMEMCHECK
   /*
   * kmemcheck wants to track the status of each byte in a page; this
   * is a pointer to such a status block. NULL if not tracked.
   */
   void *shadow;
#endif
};
每个page代表一个物理页面。其中的注释说明了每个变量的作用。
看来学计算机英文很重要，都是英文注释，还好大学的时候好好学了一下英文，一不小心把六级给过了，虽然到现在还是不会说，但认识还是认识的，
面生的可以GOOGLE一下，也就看懂了。
到这你想到了什么？没什么？只是一大堆复制的代码，也没看到什么，但我想到了三层，似乎在计算机这个行业中，三用的挺多的，
比如MVC是三个吧，还有就是Android中的上层用的是java,框架是c++,底层是Linux,当然是C了，自己再去想想吧，
好像别人研究过，层数多余了三便不好让人理解了，不知道是不是这个原因。
物理页面相当于仓库，说明系统中拥有什么，即系统中拥有的资源，跟其它的资源是一样的，只是这里表示的是内存。
为了有效的利用这些资源和安全，系统并没有直接使用这些物理内存，而是使用了虚拟内存，连接虚拟内存与物理内存的是页目录，中间目录，页表，
即y=f(x)其中x为虚拟地址，y是物理地址，不同的x可以对应相同的y。
物理地址有虚拟地址来对应，而设备确有文件来对应，即一切设备皆文件，有某些相同的东西在里面，说不上来，自己想去吧。

阅读(2971) | 评论(0) | 转发(0) |

上一篇：kmem_cache结构的三个可变参数

下一篇：总结12.03.15

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6