Chinaunix首页 | 论坛 | 博客
  • 博客访问: 612718
  • 博文数量: 113
  • 博客积分: 2554
  • 博客等级: 少校
  • 技术积分: 1428
  • 用 户 组: 普通用户
  • 注册时间: 2011-12-21 19:53
文章分类

全部博文(113)

文章存档

2014年(1)

2013年(2)

2012年(94)

2011年(16)

分类: LINUX

2011-12-22 11:34:01

  1. static int __init kswapd_init(void)
  2. {
  3.     printk("Starting kswapd v1.8\n");
  4.     swap_setup();//设定预读页数
  5.     kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
  6.     kernel_thread(kreclaimd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
  7.     return 0;
  8. }


  9. /*
  10.  * The background pageout daemon, started as a kernel thread
  11.  * from the init process.
  12.  *
  13.  * This basically trickles out pages so that we have _some_
  14.  * free memory available even if there is no other activity
  15.  * that frees anything up. This is needed for things like routing
  16.  * etc, where we otherwise might have all activity going on in
  17.  * asynchronous contexts that cannot page things out.
  18.  *
  19.  * If there are applications that are active memory-allocators
  20.  * (most normal use), this basically shouldn't matter.
  21.  */
  22. int kswapd(void *unused)
  23. {
  24.     struct task_struct *tsk = current;

  25.     tsk->session = 1;
  26.     tsk->pgrp = 1;
  27.     strcpy(tsk->comm, "kswapd");
  28.     sigfillset(&tsk->blocked);
  29.     kswapd_task = tsk; //以上为初始化操作
  30.     
  31.     /*
  32.      * Tell the memory management that we're a "memory allocator",
  33.      * and that if we need more memory we should get access to it
  34.      * regardless (see "__alloc_pages()"). "kswapd" should
  35.      * never get caught in the normal page freeing logic.
  36.      *
  37.      * (Kswapd normally doesn't need memory anyway, but sometimes
  38.      * you need a small amount of memory in order to be able to
  39.      * page out something else, and this flag essentially protects
  40.      * us from recursively trying to free more memory as we're
  41.      * trying to free the first piece of memory in the first place).
  42.      */
  43.     tsk->flags |= PF_MEMALLOC;//管理者权限

  44.     /*
  45.      * Kswapd main loop.
  46.      */
  47.     for (;;) {
  48.         static int recalc = 0;

  49.         /* If needed, try to free some memory. */
  50.         if (inactive_shortage() || free_shortage()) {
  51.             int wait = 0;
  52.             /* Do we need to do some synchronous flushing? */
  53.             if (waitqueue_active(&kswapd_done))//看看kswapd_done队列中是否有函数在等待执行
  54.                 wait = 1;
  55.             do_try_to_free_pages(GFP_KSWAPD, wait);
  56.         }

  57.         /*
  58.          * Do some (very minimal) background scanning. This
  59.          * will scan all pages on the active list once
  60.          * every minute. This clears old referenced bits
  61.          * and moves unused pages to the inactive list.
  62.          */
  63.         refill_inactive_scan(6, 0);

  64.         /* Once a second, recalculate some VM stats. */
  65.         if (time_after(jiffies, recalc + HZ)) {
  66.             recalc = jiffies;
  67.             recalculate_vm_stats();
  68.         }

  69.         /*
  70.          * Wake up everybody waiting for free memory
  71.          * and unplug the disk queue.
  72.          */
  73.         wake_up_all(&kswapd_done);
  74.         run_task_queue(&tq_disk);

  75.         /*
  76.          * We go to sleep if either the free page shortage
  77.          * or the inactive page shortage is gone. We do this
  78.          * because:
  79.          * 1) we need no more free pages or
  80.          * 2) the inactive pages need to be flushed to disk,
  81.          * it wouldn't help to eat CPU time now ...
  82.          *
  83.          * We go to sleep for one second, but if it's needed
  84.          * we'll be woken up earlier...
  85.          */
  86.         if (!free_shortage() || !inactive_shortage()) {
  87.             interruptible_sleep_on_timeout(&kswapd_wait, HZ);//睡眠,让内核调度其他进程运行
  88.         /*
  89.          * If we couldn't free enough memory, we see if it was
  90.          * due to the system just not having enough memory.
  91.          * If that is the case, the only solution is to kill
  92.          * a process (the alternative is enternal deadlock).
  93.          *
  94.          * If there still is enough memory around, we just loop
  95.          * and try free some more memory...
  96.          */
  97.         } else if (out_of_memory()) {
  98.             oom_kill();
  99.         }
  100.     }
  101. }


  102. /*
  103.  * How many inactive pages are we short?
  104.  */
  105. int inactive_shortage(void)
  106. {
  107.     int shortage = 0;

  108.     shortage += freepages.high;//空闲页面数量
  109.     shortage += inactive_target;//不活跃页面数量;以上二者之和为总的供应量
  110.     shortage -= nr_free_pages();//可立即分配的页面
  111.     shortage -= nr_inactive_clean_pages();//不活跃干净的页面
  112.     shortage -= nr_inactive_dirty_pages;//不活跃脏页面

  113.     if (shortage > 0)
  114.         return shortage;

  115.     return 0;
  116. }


  117. /*
  118.  * Check if there are zones with a severe shortage of free pages,
  119.  * or if all zones have a minor shortage.
  120.  */
  121. int free_shortage(void)
  122. {
  123.     pg_data_t *pgdat = pgdat_list;
  124.     int sum = 0;
  125.     int freeable = nr_free_pages() + nr_inactive_clean_pages();
  126.     int freetarget = freepages.high + inactive_target / 3;

  127.     /* Are we low on free pages globally? */
  128.     if (freeable < freetarget)
  129.         return freetarget - freeable;

  130.     /* If not, are we very low on any particular zone? */
  131.     do {
  132.         int i;
  133.         for(i = 0; i < MAX_NR_ZONES; i++) {
  134.             zone_t *zone = pgdat->node_zones+ i;
  135.             if (zone->size && (zone->inactive_clean_pages +
  136.                     zone->free_pages < zone->pages_min+1)) {
  137.                 /* + 1 to have overlap with alloc_pages() !! */
  138.                 sum += zone->pages_min + 1;
  139.                 sum -= zone->free_pages;
  140.                 sum -= zone->inactive_clean_pages;
  141.             }
  142.         }
  143.         pgdat = pgdat->node_next;
  144.     } while (pgdat);

  145.     return sum;
  146. }


  147. static int do_try_to_free_pages(unsigned int gfp_mask, int user)
  148. {
  149.     int ret = 0;

  150.     /*
  151.      * If we're low on free pages, move pages from the
  152.      * inactive_dirty list to the inactive_clean list.
  153.      *
  154.      * Usually bdflush will have pre-cleaned the pages
  155.      * before we get around to moving them to the other
  156.      * list, so this is a relatively cheap operation.
  157.      */
  158.     if (free_shortage() || nr_inactive_dirty_pages > nr_free_pages() +
  159.             nr_inactive_clean_pages())
  160.         ret += page_launder(gfp_mask, user);//把已经转入不活跃的脏的页面洗净,使之成为可立即分配的页面

  161.     /*
  162.      * If needed, we move pages from the active list
  163.      * to the inactive list. We also "eat" pages from
  164.      * the inode and dentry cache whenever we do this.
  165.      */
  166.     if (free_shortage() || inactive_shortage()) {//仍然不足
  167.         shrink_dcache_memory(6, gfp_mask);
  168.         shrink_icache_memory(6, gfp_mask);//二者维持某些数据结构和物理页面间的生态平衡
  169.         ret += refill_inactive(gfp_mask, user);
  170.     } else {
  171.         /*
  172.          * Reclaim unused slab cache memory.
  173.          */
  174.         kmem_cache_reap(gfp_mask);
  175.         ret = 1;
  176.     }

  177.     return ret;
  178. }


  179. /*
  180.  * We need to make the locks finer granularity, but right
  181.  * now we need this so that we can do page allocations
  182.  * without holding the kernel lock etc.
  183.  *
  184.  * We want to try to free "count" pages, and we want to
  185.  * cluster them so that we get good swap-out behaviour.
  186.  *
  187.  * OTOH, if we're a user process (and not kswapd), we
  188.  * really care about latency. In that case we don't try
  189.  * to free too many pages.
  190.  */
  191. static int refill_inactive(unsigned int gfp_mask, int user)
  192. {//user表示是否有函数在kswapd_done队列中等待执行
  193.     int priority, count, start_count, made_progress;

  194.     count = inactive_shortage() + free_shortage();
  195.     if (user)
  196.         count = (1 << page_cluster);
  197.     start_count = count;

  198.     /* Always trim SLAB caches when memory gets low. */
  199.     kmem_cache_reap(gfp_mask);//收割由slab机制管理的空闲物理页面

  200.     priority = 6;//从6开始循环
  201.     do {
  202.         made_progress = 0;

  203.         if (current->need_resched) {//有中断服务程序需要调度
  204.             __set_current_state(TASK_RUNNING);
  205.             schedule();
  206.         }

  207.         while (refill_inactive_scan(priority, 1)) {//扫描活跃页面队列,并将之转入不活跃队列
  208.             made_progress = 1;
  209.             if (--count <= 0)
  210.                 goto done;
  211.         }

  212.         /*
  213.          * don't be too light against the d/i cache since
  214.           * refill_inactive() almost never fail when there's
  215.           * really plenty of memory free.
  216.          */
  217.         shrink_dcache_memory(priority, gfp_mask);
  218.         shrink_icache_memory(priority, gfp_mask);

  219.         /*
  220.          * Then, try to page stuff out..
  221.          */
  222.         while (swap_out(priority, gfp_mask)) {//从进程中扫描映射表,找出不活跃状态的页面
  223.             made_progress = 1;
  224.             if (--count <= 0)
  225.                 goto done;
  226.         }

  227.         /*
  228.          * If we either have enough free memory, or if
  229.          * page_launder() will be able to make enough
  230.          * free memory, then stop.
  231.          */
  232.         if (!inactive_shortage() || !free_shortage())
  233.             goto done;

  234.         /*
  235.          * Only switch to a lower "priority" if we
  236.          * didn't make any useful progress in the
  237.          * last loop.
  238.          */
  239.         if (!made_progress)
  240.             priority--;
  241.     } while (priority >= 0);

  242.     /* Always end on a refill_inactive.., may sleep... */
  243.     while (refill_inactive_scan(0, 1)) {
  244.         if (--count <= 0)
  245.             goto done;
  246.     }

  247. done:
  248.     return (count < start_count);
  249. }


  250. /**
  251.  * refill_inactive_scan - scan the active list and find pages to deactivate
  252.  * @priority: the priority at which to scan
  253.  * @oneshot: exit after deactivating one page
  254.  *
  255.  * This function will scan a portion of the active list to find
  256.  * unused pages, those pages will then be moved to the inactive list.
  257.  */
  258. int refill_inactive_scan(unsigned int priority, int oneshot)
  259. {
  260.     struct list_head * page_lru;
  261.     struct page * page;
  262.     int maxscan, page_active = 0;
  263.     int ret = 0;

  264.     /* Take the lock while messing with the list... */
  265.     spin_lock(&pagemap_lru_lock);
  266.     maxscan = nr_active_pages >> priority;
  267.     while (maxscan-- > 0 && (page_lru = active_list.prev) != &active_list) {
  268.         page = list_entry(page_lru, struct page, lru);

  269.         /* Wrong page on list?! (list corruption, should not happen) */
  270.         if (!PageActive(page)) {//不是活跃页面
  271.             printk("VM: refill_inactive, wrong page on list.\n");
  272.             list_del(page_lru);
  273.             nr_active_pages--;
  274.             continue;
  275.         }

  276.         /* Do aging on the pages. */
  277.         if (PageTestandClearReferenced(page)) {
  278.             age_page_up_nolock(page);
  279.             page_active = 1;
  280.         } else {
  281.             age_page_down_ageonly(page);
  282.             /*
  283.              * Since we don't hold a reference on the page
  284.              * ourselves, we have to do our test a bit more
  285.              * strict then deactivate_page(). This is needed
  286.              * since otherwise the system could hang shuffling
  287.              * unfreeable pages from the active list to the
  288.              * inactive_dirty list and back again...
  289.              *
  290.              * SUBTLE: we can have buffer pages with count 1.
  291.              */
  292.             if (page->age == 0 && page_count(page) <=
  293.                         (page->buffers ? 2 : 1)) {//寿命耗尽,用户控件映射?
  294.                 deactivate_page_nolock(page);
  295.                 page_active = 0;
  296.             } else {
  297.                 page_active = 1;
  298.             }
  299.         }
  300.         /*
  301.          * If the page is still on the active list, move it
  302.          * to the other end of the list. Otherwise it was
  303.          * deactivated by age_page_down and we exit successfully.
  304.          */
  305.         if (page_active || PageActive(page)) {
  306.             list_del(page_lru);
  307.             list_add(page_lru, &active_list);//连入围不
  308.         } else {
  309.             ret = 1;
  310.             if (oneshot)
  311.                 break;
  312.         }
  313.     }
  314.     spin_unlock(&pagemap_lru_lock);

  315.     return ret;
  316. }



  317. static int swap_out(unsigned int priority, int gfp_mask)
  318. {
  319.     int counter;
  320.     int __ret = 0;

  321.     /*
  322.      * We make one or two passes through the task list, indexed by
  323.      * assign = {0, 1}:
  324.      * Pass 1: select the swappable task with maximal RSS that has
  325.      * not yet been swapped out.
  326.      * Pass 2: re-assign rss swap_cnt values, then select as above.
  327.      *
  328.      * With this approach, there's no need to remember the last task
  329.      * swapped out. If the swap-out fails, we clear swap_cnt so the
  330.      * task won't be selected again until all others have been tried.
  331.      *
  332.      * Think of swap_cnt as a "shadow rss" - it tells us which process
  333.      * we want to page out (always try largest first).
  334.      */
  335.     counter = (nr_threads << SWAP_SHIFT) >> priority;
  336.     if (counter < 1)
  337.         counter = 1;

  338.     for (; counter >= 0; counter--) {//每次循环找到一个最合适的进程best,找到就扫描页面映射表,符合条件的断开
  339.         struct list_head *p;
  340.         unsigned long max_cnt = 0;
  341.         struct mm_struct *best = NULL;
  342.         int assign = 0;
  343.         int found_task = 0;
  344.     select:
  345.         spin_lock(&mmlist_lock);
  346.         p = init_mm.mmlist.next;//从第二个进程开始
  347.         for (; p != &init_mm.mmlist; p = p->next) {
  348.             struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist);
  349.              if (mm->rss <= 0)//反应进程占用页面数量
  350.                 continue;
  351.             found_task++;
  352.             /* Refresh swap_cnt? */
  353.             if (assign == 1) {
  354.                 mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
  355.                 if (mm->swap_cnt < SWAP_MIN)
  356.                     mm->swap_cnt = SWAP_MIN;
  357.             }
  358.             if (mm->swap_cnt > max_cnt) {//找出mm->swap_cnt最大的进程,反应了该进程在舆论患处内行页面努力中尚未收到考

  359. 察的页面数量
  360.                 max_cnt = mm->swap_cnt;
  361.                 best = mm;
  362.             }
  363.         }

  364.         /* Make sure it doesn't disappear */
  365.         if (best)
  366.             atomic_inc(&best->mm_users);
  367.         spin_unlock(&mmlist_lock);

  368.         /*
  369.          * We have dropped the tasklist_lock, but we
  370.          * know that "mm" still exists: we are running
  371.          * with the big kernel lock, and exit_mm()
  372.          * cannot race with us.
  373.          */
  374.         if (!best) {
  375.             if (!assign && found_task > 0) {
  376.                 assign = 1;
  377.                 goto select;
  378.             }
  379.             break;
  380.         } else {
  381.             __ret = swap_out_mm(best, gfp_mask);
  382.             mmput(best);//还原mm_users计数
  383.             break;
  384.         }
  385.     }
  386.     return __ret;
  387. }



  388. static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
  389. {
  390.     int result = 0;
  391.     unsigned long address;
  392.     struct vm_area_struct* vma;

  393.     /*
  394.      * Go through process' page directory.
  395.      */

  396.     /*
  397.      * Find the proper vm-area after freezing the vma chain
  398.      * and ptes.
  399.      */
  400.     spin_lock(&mm->page_table_lock);
  401.     address = mm->swap_address;
  402.     vma = find_vma(mm, address);
  403.     if (vma) {
  404.         if (address < vma->vm_start)
  405.             address = vma->vm_start;

  406.         for (;;) {
  407.             result = swap_out_vma(mm, vma, address, gfp_mask);
  408.             if (result)
  409.                 goto out_unlock;
  410.             vma = vma->vm_next;
  411.             if (!vma)
  412.                 break;
  413.             address = vma->vm_start;
  414.         }
  415.     }
  416.     /* Reset to 0 when we reach the end of address space */
  417.     mm->swap_address = 0;
  418.     mm->swap_cnt = 0;

  419. out_unlock:
  420.     spin_unlock(&mm->page_table_lock);
  421.     return result;
  422. }
  423.  swap_out_vma()->
  424. swap_out_pgd()->swap_out_pmd()->try_to_swap_out()//患处由pte指向的页面


  425. /*
  426.  * The swap-out functions return 1 if they successfully
  427.  * threw something out, and we got a free page. It returns
  428.  * zero if it couldn't do anything, and any other value
  429.  * indicates it decreased rss, but the page was shared.
  430.  *
  431.  * If it sleeps, it *must* return 1 to make sure we
  432.  * don't continue with the swap-out. Otherwise we may be
  433.  * using a process that no longer actually exists (it might
  434.  * have died while we slept).
  435.  */
  436. static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int

  437. gfp_mask)
  438. {
  439.     pte_t pte;
  440.     swp_entry_t entry;
  441.     struct page * page;
  442.     int onlist;

  443.     pte = *page_table;
  444.     if (!pte_present(pte))
  445.         goto out_failed;//没有在内存中
  446.     page = pte_page(pte);//指向物理页的指针
  447.     if ((!VALID_PAGE(page)) || PageReserved(page))//#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
  448. //(page - mem_map)为页面序号
  449.         goto out_failed;

  450.     if (!mm->swap_cnt)
  451.         return 1;

  452.     mm->swap_cnt--;

  453.     onlist = PageActive(page);
  454.     /* Don't look at this pte if it's been accessed recently. */
  455.     if (ptep_test_and_clear_young(page_table)) {//改页面最近是否受到了访问
  456.         age_page_up(page);
  457.         goto out_failed;
  458.     }
  459.     if (!onlist)//不再活跃队列中
  460.         /* The page is still mapped, so it can't be freeable... */
  461.         age_page_down_ageonly(page);//page->age /= 2;减少寿命;page->age不为0就不能患处

  462.     /*
  463.      * If the page is in active use by us, or if the page
  464.      * is in active use by others, don't unmap it or
  465.      * (worse) start unneeded IO.
  466.      */
  467.     if (page->age > 0)
  468.         goto out_failed;

  469.     if (TryLockPage(page))
  470.         goto out_failed;

  471.     /* From this point on, the odds are that we're going to
  472.      * nuke this pte, so read and clear the pte. This hook
  473.      * is needed on CPUs which update the accessed and dirty
  474.      * bits in hardware.
  475.      */
  476.     pte = ptep_get_and_clear(page_table);//表项清零,撤销页面映射
  477.     flush_tlb_page(vma, address);

  478.     /*
  479.      * Is the page already in the swap cache? If so, then
  480.      * we can just drop our reference to it without doing
  481.      * any IO - it's already up-to-date on disk.
  482.      *
  483.      * Return 0, as we didn't actually free any real
  484.      * memory, and we should just continue our scan.
  485.      */
  486.     if (PageSwapCache(page)) {测试是否在swapper_space队列中
  487.         entry.val = page->index;
  488.         if (pte_dirty(pte))
  489.             set_page_dirty(page);
  490. set_swap_pte:
  491.         swap_duplicate(entry);
  492.         set_pte(page_table, swp_entry_to_pte(entry));//将攀上页面的索引植入相应的页面表项,编程对攀上页面的映射
  493. drop_pte:
  494.         UnlockPage(page);
  495.         mm->rss--;
  496.         deactivate_page(page);//有条件设置为不活跃的状态
  497.         page_cache_release(page);
  498. out_failed:
  499.         return 0;
  500.     }

  501.     /*
  502.      * Is it a clean page? Then it must be recoverable
  503.      * by just paging it in again, and we can just drop
  504.      * it..
  505.      *
  506.      * However, this won't actually free any real
  507.      * memory, as the page will just be in the page cache
  508.      * somewhere, and as such we should just continue
  509.      * our scan.
  510.      *
  511.      * Basically, this just makes it possible for us to do
  512.      * some real work in the future in "refill_inactive()".
  513.      */
  514.     flush_cache_page(vma, address);
  515.     if (!pte_dirty(pte))
  516.         goto drop_pte;

  517.     /*
  518.      * Ok, it's really dirty. That means that
  519.      * we should either create a new swap cache
  520.      * entry for it, or we should write it back
  521.      * to its own backing store.
  522.      */
  523.     if (page->mapping) {
  524.         set_page_dirty(page);
  525.         goto drop_pte;
  526.     }

  527.     /*
  528.      * This is a dirty, swappable page. First of all,
  529.      * get a suitable swap entry for it, and make sure
  530.      * we have the swap cache set up to associate the
  531.      * page with that swap entry.
  532.      */
  533.     entry = get_swap_page();
  534.     if (!entry.val)
  535.         goto out_unlock_restore; /* No swap space left */

  536.     /* Add it to the swap cache and mark it dirty */
  537.     add_to_swap_cache(page, entry);
  538.     set_page_dirty(page);
  539.     goto set_swap_pte;

  540. out_unlock_restore:
  541.     set_pte(page_table, pte);
  542.     UnlockPage(page);
  543.     return 0;
  544. }


  545. /*
  546.  * Verify that a swap entry is valid and increment its swap map count.
  547.  * Kernel_lock is held, which guarantees existance of swap device.
  548.  *
  549.  * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
  550.  * "permanent", but will be reclaimed by the next swapoff.
  551.  */
  552. int swap_duplicate(swp_entry_t entry)//对索引项检查和增加攀上页面共享计数
  553. {
  554.     struct swap_info_struct * p;
  555.     unsigned long offset, type;
  556.     int result = 0;

  557.     /* Swap entry 0 is illegal */
  558.     if (!entry.val)
  559.         goto out;
  560.     type = SWP_TYPE(entry);
  561.     if (type >= nr_swapfiles)
  562.         goto bad_file;
  563.     p = type + swap_info;
  564.     offset = SWP_OFFSET(entry);
  565.     if (offset >= p->max)
  566.         goto bad_offset;
  567.     if (!p->swap_map[offset])
  568.         goto bad_unused;
  569.     /*
  570.      * Entry is valid, so increment the map count.
  571.      */
  572.     swap_device_lock(p);
  573.     if (p->swap_map[offset] < SWAP_MAP_MAX)
  574.         p->swap_map[offset]++;
  575.     else {
  576.         static int overflow = 0;
  577.         if (overflow++ < 5)
  578.             printk("VM: swap entry overflow\n");
  579.         p->swap_map[offset] = SWAP_MAP_MAX;
  580.     }
  581.     swap_device_unlock(p);
  582.     result = 1;
  583. out:
  584.     return result;

  585. bad_file:
  586.     printk("Bad swap file entry %08lx\n", entry.val);
  587.     goto out;
  588. bad_offset:
  589.     printk("Bad swap offset entry %08lx\n", entry.val);
  590.     goto out;
  591. bad_unused:
  592.     printk("Unused swap offset entry in swap_dup %08lx\n", entry.val);
  593.     goto out;
  594. }

  595. void age_page_up(struct page * page)
  596. {
  597.     /*
  598.      * We're dealing with an inactive page, move the page
  599.      * to the active list.
  600.      */
  601.     if (!page->age)
  602.         activate_page(page);

  603.     /* The actual page aging bit */
  604.     page->age += PAGE_AGE_ADV;
  605.     if (page->age > PAGE_AGE_MAX)
  606.         page->age = PAGE_AGE_MAX;
  607. }



  608. int page_launder(int gfp_mask, int sync)
  609. {
  610.     int launder_loop(扫描次数), maxscan, cleaned_pages(累计洗清页面数), maxlaunder;
  611.     int can_get_io_locks;
  612.     struct list_head * page_lru;
  613.     struct page * page;

  614.     /*
  615.      * We can only grab the IO locks (eg. for flushing dirty
  616.      * buffers to disk) if __GFP_IO is set.
  617.      */
  618.     can_get_io_locks = gfp_mask & __GFP_IO;

  619.     launder_loop = 0;
  620.     maxlaunder = 0;
  621.     cleaned_pages = 0;

  622. dirty_page_rescan:
  623.     spin_lock(&pagemap_lru_lock);
  624.     maxscan = nr_inactive_dirty_pages;
  625.     while ((page_lru = inactive_dirty_list.prev) != &inactive_dirty_list &&
  626.                 maxscan-- > 0) {
  627.         page = list_entry(page_lru, struct page, lru);

  628.         /* Wrong page on list?! (list corruption, should not happen) */
  629.         if (!PageInactiveDirty(page)) {
  630.             printk("VM: page_launder, wrong page on list.\n");
  631.             list_del(page_lru);
  632.             nr_inactive_dirty_pages--;
  633.             page->zone->inactive_dirty_pages--;
  634.             continue;
  635.         }

  636.         /* Page is or was in use? Move it to the active list. */
  637.         if (PageTestandClearReferenced(page) || page->age > 0 ||
  638.                 (!page->buffers && page_count(page) > 1) ||
  639.                 page_ramdisk(page)) {
  640.             del_page_from_inactive_dirty_list(page);
  641.             add_page_to_active_list(page);
  642.             continue;
  643.         }

  644.         /*
  645.          * The page is locked. IO in progress?
  646.          * Move it to the back of the list.
  647.          */
  648.         if (TryLockPage(page)) {
  649.             list_del(page_lru);
  650.             list_add(page_lru, &inactive_dirty_list);
  651.             continue;
  652.         }

  653.         /*
  654.          * Dirty swap-cache page? Write it out if
  655.          * last copy..
  656.          */
  657.         if (PageDirty(page)) {
  658.             int (*writepage)(struct page *) = page->mapping->a_ops->writepage;
  659.             int result;

  660.             if (!writepage)
  661.                 goto page_active;

  662.             /* First time through? Move it to the back of the list */
  663.             if (!launder_loop) {
  664.                 list_del(page_lru);
  665.                 list_add(page_lru, &inactive_dirty_list);
  666.                 UnlockPage(page);
  667.                 continue;
  668.             }

  669.             /* OK, do a physical asynchronous write to swap. */
  670.             ClearPageDirty(page);
  671.             page_cache_get(page);
  672.             spin_unlock(&pagemap_lru_lock);

  673.             result = writepage(page);
  674.             page_cache_release(page);

  675.             /* And re-start the thing.. */
  676.             spin_lock(&pagemap_lru_lock);
  677.             if (result != 1)
  678.                 continue;
  679.             /* writepage refused to do anything */
  680.             set_page_dirty(page);
  681.             goto page_active;
  682.         }

  683.         /*
  684.          * If the page has buffers, try to free the buffer mappings
  685.          * associated with this page. If we succeed we either free
  686.          * the page (in case it was a buffercache only page) or we
  687.          * move the page to the inactive_clean list.
  688.          *
  689.          * On the first round, we should free all previously cleaned
  690.          * buffer pages
  691.          */
  692.         if (page->buffers) {
  693.             int wait, clearedbuf;
  694.             int freed_page = 0;
  695.             /*
  696.              * Since we might be doing disk IO, we have to
  697.              * drop the spinlock and take an extra reference
  698.              * on the page so it doesn't go away from under us.
  699.              */
  700.             del_page_from_inactive_dirty_list(page);
  701.             page_cache_get(page);
  702.             spin_unlock(&pagemap_lru_lock);

  703.             /* Will we do (asynchronous) IO? */
  704.             if (launder_loop && maxlaunder == 0 && sync)
  705.                 wait = 2;    /* Synchrounous IO */
  706.             else if (launder_loop && maxlaunder-- > 0)
  707.                 wait = 1;    /* Async IO */
  708.             else
  709.                 wait = 0;    /* No IO */

  710.             /* Try to free the page buffers. */
  711.             clearedbuf = try_to_free_buffers(page, wait);

  712.             /*
  713.              * Re-take the spinlock. Note that we cannot
  714.              * unlock the page yet since we're still
  715.              * accessing the page_struct here...
  716.              */
  717.             spin_lock(&pagemap_lru_lock);

  718.             /* The buffers were not freed. */
  719.             if (!clearedbuf) {
  720.                 add_page_to_inactive_dirty_list(page);

  721.             /* The page was only in the buffer cache. */
  722.             } else if (!page->mapping) {
  723.                 atomic_dec(&buffermem_pages);
  724.                 freed_page = 1;
  725.                 cleaned_pages++;

  726.             /* The page has more users besides the cache and us. */
  727.             } else if (page_count(page) > 2) {
  728.                 add_page_to_active_list(page);

  729.             /* OK, we "created" a freeable page. */
  730.             } else /* page->mapping && page_count(page) == 2 */ {
  731.                 add_page_to_inactive_clean_list(page);
  732.                 cleaned_pages++;
  733.             }

  734.             /*
  735.              * Unlock the page and drop the extra reference.
  736.              * We can only do it here because we ar accessing
  737.              * the page struct above.
  738.              */
  739.             UnlockPage(page);
  740.             page_cache_release(page);

  741.             /*
  742.              * If we're freeing buffer cache pages, stop when
  743.              * we've got enough free memory.
  744.              */
  745.             if (freed_page && !free_shortage())
  746.                 break;
  747.             continue;
  748.         } else if (page->mapping && !PageDirty(page)) {
  749.             /*
  750.              * If a page had an extra reference in
  751.              * deactivate_page(), we will find it here.
  752.              * Now the page is really freeable, so we
  753.              * move it to the inactive_clean list.
  754.              */
  755.             del_page_from_inactive_dirty_list(page);
  756.             add_page_to_inactive_clean_list(page);
  757.             UnlockPage(page);
  758.             cleaned_pages++;
  759.         } else {
  760. page_active:
  761.             /*
  762.              * OK, we don't know what to do with the page.
  763.              * It's no use keeping it here, so we move it to
  764.              * the active list.
  765.              */
  766.             del_page_from_inactive_dirty_list(page);
  767.             add_page_to_active_list(page);
  768.             UnlockPage(page);
  769.         }
  770.     }
  771.     spin_unlock(&pagemap_lru_lock);

  772.     /*
  773.      * If we don't have enough free pages, we loop back once
  774.      * to queue the dirty pages for writeout. When we were called
  775.      * by a user process (that /needs/ a free page) and we didn't
  776.      * free anything yet, we wait synchronously on the writeout of
  777.      * MAX_SYNC_LAUNDER pages.
  778.      *
  779.      * We also wake up bdflush, since bdflush should, under most
  780.      * loads, flush out the dirty pages before we have to wait on
  781.      * IO.
  782.      */
  783.     if (can_get_io_locks && !launder_loop && free_shortage()) {
  784.         launder_loop = 1;
  785.         /* If we cleaned pages, never do synchronous IO. */
  786.         if (cleaned_pages)
  787.             sync = 0;
  788.         /* We only do a few "out of order" flushes. */
  789.         maxlaunder = MAX_LAUNDER;
  790.         /* Kflushd takes care of the rest. */
  791.         wakeup_bdflush(0);
  792.         goto dirty_page_rescan;
  793.     }

  794.     /* Return the number of pages moved to the inactive_clean list. */
  795.     return cleaned_pages;
  796. }
阅读(2367) | 评论(0) | 转发(0) |
0

上一篇:页面换入

下一篇:slab

给主人留下些什么吧!~~