Chinaunix首页 | 论坛 | 博客
  • 博客访问: 14778
  • 博文数量: 6
  • 博客积分: 1440
  • 博客等级: 上尉
  • 技术积分: 60
  • 用 户 组: 普通用户
  • 注册时间: 2009-09-29 15:35
文章分类
文章存档

2013年(1)

2011年(1)

2009年(4)

我的朋友
最近访客

分类: LINUX

2009-11-01 09:17:39

交換頁分配方法


交換頁分配方法
-
1) 交換空間由若干個塊設備文件物理空間或普通文件的物理空間構成, 每一交換文件用交換結構(swap_info_struct)描述, 最多允許8個交換文件(MAX_SWAPFILES), 交換文件按其優先級(swap_info->prio)大小順序排列在交換鏈表(swap_list->head)中, 當前使用的交換文件為(swap_list->next). 每一交換結構具有頁分配表(swap_info->swap_map).
-
2) 在分配交換頁時, 系統先在自由區內掃瞄一簇(256頁)連續頁塊, 後繼的分配頁被約束在此分配簇中. 當沒有分配簇可用時, 系統將在整個自由區內掃瞄. 每分配一頁, 系統會切換到下一個相同優先級的交換文件進行分配. 當同一優先級的交換文件頁面用完後, 系統切換到更低一級的交換文件進行分配. 在添加交換文件時, 如果沒有指定優先級, 系統使用遞減的least_priority分配優先級, 首先添加的交換文件具有較高的優先級.
-
3) 交換文件的起始頁為交換頭標(swap_header), 首頁的最後10個字節為交換文件的特徵串. 特徵串"SWAP_SPACE"標識舊版的交換頭標格式, 特徵串的前部為可用的交換頁位圖. 新版的交換文件使用"SWAPSPACE2"標識的頭標, 它使用"壞頁表"來標識壞頁.
-
4) 交換空間用交換頁目錄項(swp_entry_t)尋址, 交換頁目錄項的最低位為零, 最低字節的剩餘7位索引不同的交換文件, 剩餘的字節代表交換文件內的頁號. 當進程的某個物理頁面被寫入交換空間時, 該物理頁面所在的頁目錄項被置換成交換頁目錄項, 交換頁目錄項的存在位為零, 當進程再次存取該頁面時, 產生頁不存在故障, 系統通過該交換頁目錄項從交換空間中讀取頁面內容, 並將新的物理頁映射到故障區域.


-
#define MAX_SWAPFILES 8

#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0

union swap_header { 交換文件頭結構
struct
{
char reserved[PAGE_SIZE - 10]; 交換文件的可用頁位圖
char magic[10]; 交換文件的特徵串
} magic;
struct
{
char bootbits[1024]; /* Space for disklabel etc. */
unsigned int version; 子版本號
unsigned int last_page; 交換文件終止頁號
unsigned int nr_badpages; 壞頁數量
unsigned int padding[125];
unsigned int badpages[1]; 壞頁表
} info;
};

#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x)
#define MAX_SWAP_BADPAGES \ 壞頁表最大容量
((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int))

#define SWP_USED 1
#define SWP_WRITEOK 3

#define SWAPFILE_CLUSTER 256 交換文件分配簇尺寸
#define SWAP_CLUSTER_MAX 32

#define SWAP_MAP_MAX 0x7fff
#define SWAP_MAP_BAD 0x8000

struct swap_info_struct { 交換文件分配結構
unsigned int flags; 交換文件標誌
kdev_t swap_device; 交換設備, 當普通文件用作交換文件時, 其值為零
spinlock_t sdev_lock;
struct dentry * swap_file; 交換文件所在的目錄項
struct vfsmount *swap_vfsmnt; 交換文件所在的文件系統
unsigned short * swap_map; 交換頁分配表
unsigned int lowest_bit; 分配表的起始自由頁索引
unsigned int highest_bit; 分配表的終止自由頁索引
unsigned int cluster_next; 下一簇內分配點
unsigned int cluster_nr; 簇內自由頁面數
int prio; 交換文件優先級 /* swap priority */
int pages; 交換文件有效頁面數
unsigned long max; 頁面分配表尺寸
int next; /* next entry on swap list */
};

/*
* A swap entry has to fit into a "unsigned long", as
* the entry is hidden in the "index" field of the
* swapper address space.
*
* We have to move it here, since not every user of fs.h is including
* mm.h, but m.h is including fs.h via sched .h :-/
*/
typedef struct {
unsigned long val;
} swp_entry_t;

struct swap_list_t {
int head; 按優先值從大到小順序排列的鏈表 /* head of priority-ordered swapfile list */
int next; /* swapfile to be used next */
};

/* Encode and de-code a swap entry */
#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) 取交換頁目錄項的文件號
#define SWP_OFFSET(x) ((x).val >> 8) 取交換頁目錄項所在的文件頁號
#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define swp_entry_to_pte(x) ((pte_t) { (x).val })


struct swap_list_t swap_list = {-1, -1}; 交換文件鏈表

struct swap_info_struct swap_info[MAX_SWAPFILES]; 交換結構

unsigned int nr_swapfiles; 安裝的交換文件數

int nr_swap_pages; 可用的交換頁面數

#define get_swap_page() __get_swap_page(1) 分配一個可用的交換頁目錄項

swp_entry_t __get_swap_page(unsigned short count)
{
struct swap_info_struct * p;
unsigned long offset;
swp_entry_t entry;
int type, wrapped = 0;

entry.val = 0; /* Out of memory */
if (count >= SWAP_MAP_MAX)
goto bad_count;
swap_list_lock();
type = swap_list.next; 取當前交換文件索引
if (type < 0)
goto out;
if (nr_swap_pages == 0) 如果可用的交換頁數為零
goto out;

while (1) {
p = &swap_info[type]; 取交換文件結構
if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 如果交換文件可寫
swap_device_lock(p);
offset = scan_swap_map(p, count); 在分配表中尋找可分配的交換頁索引
swap_device_unlock(p);
if (offset) {
entry = SWP_ENTRY(type,offset); 建立交換頁目錄項
type = swap_info[type].next; 取下一交換文件
if (type < 0 || 如果是最末的交換文件 或者 當前優先級不等於下一優先級
p->prio != swap_info[type].prio) {
swap_list.next = swap_list.head;
} else {
swap_list.next = type;
}
goto out;
}
}
type = p->next; 取下一交換文件結構
if (!wrapped) {
if (type < 0 || p->prio != swap_info[type].prio) {
type = swap_list.head;
wrapped = 1;
}
} else
if (type < 0)
goto out; /* out of swap space */
}
out:
swap_list_unlock();
return entry;

bad_count:
printk(KERN_ERR "get_swap_page: bad count %hd from %p\n",
count, __builtin_return_address(0));
goto out;
}
static inline int scan_swap_map(struct swap_info_struct *si, unsigned short count)
{
unsigned long offset;
/*
* We try to cluster swap pages by allocating them
* sequentially in swap. Once we've allocated
* SWAPFILE_CLUSTER pages this way, however, we resort to
* first-free allocation, starting a new cluster. This
* prevents us from scattering swap pages all over the entire
* swap partition, so that we reduce overall disk seek times
* between swap pages. -- sct */
if (si->cluster_nr) { 如果分配簇未分配完
while (si->cluster_next <= si->highest_bit) {
offset = si->cluster_next++;
if (si->swap_map[offset])
continue;
si->cluster_nr--;
goto got_page;
}
}
si->cluster_nr = SWAPFILE_CLUSTER;

/* try to find an empty (even not aligned) cluster. */
offset = si->lowest_bit; 取分配表起始索引
check_next_cluster:
if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit) 如果起始分配簇終止邊界小於分配表終止邊界
{
int nr;
for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++) 掃瞄交換簇內頁面
if (si->swap_map[nr]) 如果交換簇內有非空頁面
{
offset = nr+1; 從下一頁面開始
goto check_next_cluster; 測試分配簇
}
/* We found a completly empty cluster, so start
* using it.
*/
goto got_page; 在分配表內找到128頁的分配簇
}
/* No luck, so now go finegrined as usual. -Andrea */
for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
if (si->swap_map[offset]) 線性查找一頁
continue;
got_page:
if (offset == si->lowest_bit) 如果起始自由頁被分配
si->lowest_bit++;
if (offset == si->highest_bit) 如果終止自由頁被分配
si->highest_bit--;
si->swap_map[offset] = count; 設置交換頁的引用計數
nr_swap_pages--;
si->cluster_next = offset+1; 下一分配頁
return offset;
}
return 0;
}

#define swap_free(entry) __swap_free((entry), 1)

/*
* Caller has made sure that the swapdevice corresponding to entry
* is still around or has not been recycled.
*/
void __swap_free(swp_entry_t entry, unsigned short count)
{
struct swap_info_struct * p;
unsigned long offset, type;

if (!entry.val)
goto out;

type = SWP_TYPE(entry); 取交換文件索引
if (type >= nr_swapfiles)
goto bad_nofile;
p = & swap_info[type]; 取交換文件結構
if (!(p->flags & SWP_USED))
goto bad_device;
offset = SWP_OFFSET(entry); 取交換文件頁號
if (offset >= p->max) 如果超過最大頁號
goto bad_offset;
if (!p->swap_map[offset]) 如果引用計數為零
goto bad_free;
swap_list_lock();
if (p->prio > swap_info[swap_list.next].prio)
swap_list.next = type;
swap_device_lock(p);
if (p->swap_map[offset] < SWAP_MAP_MAX) {
if (p->swap_map[offset] < count)
goto bad_count;
if (!(p->swap_map[offset] -= count)) {
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit)
p->highest_bit = offset;
nr_swap_pages++;
}
}
swap_device_unlock(p);
swap_list_unlock();
out:
return;

bad_nofile:
printk("swap_free: Trying to free nonexistent swap-page\n");
goto out;
bad_device:
printk("swap_free: Trying to free swap from unused swap-device\n");
goto out;
bad_offset:
printk("swap_free: offset exceeds max\n");
goto out;
bad_free:
printk("VM: Bad swap entry %08lx\n", entry.val);
goto out;
bad_count:
swap_device_unlock(p);
swap_list_unlock();
printk(KERN_ERR "VM: Bad count %hd current count %hd\n", count, p->swap_map[offset]);
goto out;
}

/*
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
*
* The swapon system call
*/
asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
{
struct swap_info_struct * p;
struct nameidata nd;
struct inode * swap_inode;
unsigned int type;
int i, j, prev;
int error;
static int least_priority = 0;
union swap_header *swap_header = 0;
int swap_header_version;
int nr_good_pages = 0;
unsigned long maxpages;
int swapfilesize;
struct block_device *bdev = NULL;

if (!capable(CAP_SYS_ADMIN))
return -EPERM;
lock_kernel();
p = swap_info;
for (type = 0 ; type < nr_swapfiles ; type++,p++)
if (!(p->flags & SWP_USED))
break; 尋找未用的交換文件結構
error = -EPERM;
if (type >= MAX_SWAPFILES)
goto out;
if (type >= nr_swapfiles)
nr_swapfiles = type+1;
p->flags = SWP_USED;
p->swap_file = NULL;
p->swap_vfsmnt = NULL;
p->swap_device = 0;
p->swap_map = NULL;
p->lowest_bit = 0;
p->highest_bit = 0;
p->cluster_nr = 0;
p->sdev_lock = SPIN_LOCK_UNLOCKED;
p->max = 1;
p->next = -1;
if (swap_flags & SWAP_FLAG_PREFER) {
p->prio =
(swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT; 從系統調用標誌中取交換級別
} else {
p->prio = --least_priority; 自動分配優先級
}
error = user_path_walk(specialfile, &nd);
if (error)
goto bad_swap_2;

p->swap_file = nd.dentry; 取交換文件的目錄項
p->swap_vfsmnt = nd.mnt; 取交換文件所在的文件系統
swap_inode = nd.dentry->d_inode; 取交換文件的文件節點
error = -EINVAL;

if (S_ISBLK(swap_inode->i_mode)) { 如果交換文件是塊設備文件
kdev_t dev = swap_inode->i_rdev; 取塊設備號
struct block_device_operations *bdops;

p->swap_device = dev;
set_blocksize(dev, PAGE_SIZE); 將交換塊設備的塊長設為頁尺寸

bdev = swap_inode->i_bdev; 取塊設備結構
bdops = devfs_get_ops(devfs_get_handle_from_inode(swap_inode)); 取塊設備操作表
if (bdops) bdev->bd_op = bdops;

error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP); 打開塊設備
if (error)
goto bad_swap_2;
set_blocksize(dev, PAGE_SIZE);
error = -ENODEV;
if (!dev || (blk_size[MAJOR(dev)] &&
!blk_size[MAJOR(dev)][MINOR(dev)]))
goto bad_swap;
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type)
continue;
if (dev == swap_info[ i ].swap_device)
goto bad_swap; 設備是否已經安裝
}
swapfilesize = 0;
if (blk_size[MAJOR(dev)])
swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
>> (PAGE_SHIFT - 10); 取交換設備的頁長
} else if (S_ISREG(swap_inode->i_mode)) { 如果交換文件是普通文件
error = -EBUSY;
for (i = 0 ; i < nr_swapfiles ; i++) {
if (i == type || !swap_info[ i ].swap_file)
continue;
if (swap_inode == swap_info[ i ].swap_file->d_inode)
goto bad_swap; 如果交換文件已被安裝
}
swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
} else
goto bad_swap;

swap_header = (void *) __get_free_page(GFP_USER);
if (!swap_header) {
printk("Unable to start swapping: out of memory :-)\n");
error = -ENOMEM;
goto bad_swap;
}

lock_page(virt_to_page(swap_header));
rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header, 1); 讀取交換文件的首頁

if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) 舊版本標識串
swap_header_version = 1;
else if (!memcmp("SWAPSPACE2",swap_header->magic.magic,10)) 新版本標識串
swap_header_version = 2;
else {
printk("Unable to find swap-space signature\n");
error = -EINVAL;
goto bad_swap;
}

switch (swap_header_version) {
case 1:
memset(((char *) swap_header)+PAGE_SIZE-10,0,10); 清除標識串內存
j = 0;
p->lowest_bit = 0;
p->highest_bit = 0;
for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
if (test_bit(i,(char *) swap_header)) { 逐位檢測交換頁位圖
if (!p->lowest_bit)
p->lowest_bit = i; 記錄最低非零位
p->highest_bit = i; 記錄最高非零位
p->max = i+1; 非零位外邊界
j++; 非零位計數
}
}
nr_good_pages = j;
p->swap_map = vmalloc(p->max * sizeof(short)); 分配交換分配位圖
if (!p->swap_map) {
error = -ENOMEM;
goto bad_swap;
}
for (i = 1 ; i < p->max ; i++) {
if (test_bit(i,(char *) swap_header))
p->swap_map[ i ] = 0; 建立交換分配位圖
else
p->swap_map[ i ] = SWAP_MAP_BAD;
}
break;

case 2:
/* Check the swap header's sub-version and the size of
the swap file and bad block lists */
if (swap_header->info.version != 1) { 檢查子版本
printk(KERN_WARNING
"Unable to handle swap header version %d\n",
swap_header->info.version);
error = -EINVAL;
goto bad_swap;
}

p->lowest_bit = 1;
p->highest_bit = swap_header->info.last_page - 1;
p->max = swap_header->info.last_page;

maxpages = SWP_OFFSET(SWP_ENTRY(0,~0UL)); 取交換頁目錄項所能表示的最大頁號
if (p->max >= maxpages)
p->max = maxpages-1;

error = -EINVAL;
if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
goto bad_swap;

/* OK, set up the swap map and apply the bad block list */
if (!(p->swap_map = vmalloc (p->max * sizeof(short)))) {
error = -ENOMEM;
goto bad_swap;
}

error = 0;
memset(p->swap_map, 0, p->max * sizeof(short));
for (i=0; iinfo.nr_badpages; i++) {
int page = swap_header->info.badpages[ i ];
if (page <= 0 || page >= swap_header->info.last_page)
error = -EINVAL;
else
p->swap_map[page] = SWAP_MAP_BAD;
}
nr_good_pages = swap_header->info.last_page -
swap_header->info.nr_badpages -
1 /* header page */;
if (error)
goto bad_swap;
}

if (swapfilesize && p->max > swapfilesize) {
printk(KERN_WARNING
"Swap area shorter than signature indicates\n");
error = -EINVAL;
goto bad_swap;
}
if (!nr_good_pages) {
printk(KERN_WARNING "Empty swap-file\n");
error = -EINVAL;
goto bad_swap;
}
p->swap_map[0] = SWAP_MAP_BAD;
p->flags = SWP_WRITEOK;
p->pages = nr_good_pages;
swap_list_lock();
nr_swap_pages += nr_good_pages;
printk(KERN_INFO "Adding Swap: %dk swap-space (priority %d)\n",
nr_good_pages<<(PAGE_SHIFT-10), p->prio);

/* insert swap space into swap_list: */
prev = -1;
for (i = swap_list.head; i >= 0; i = swap_info[ i ].next) {
if (p->prio >= swap_info[ i ].prio) { 交換鏈按交換優先值的降序排序
break;
}
prev = i;
}
p->next = i;
if (prev < 0) {
swap_list.head = swap_list.next = p - swap_info;
} else {
swap_info[prev].next = p - swap_info;
}
swap_list_unlock();
error = 0;
goto out;
bad_swap:
if (bdev)
blkdev_put(bdev, BDEV_SWAP);
bad_swap_2:
if (p->swap_map)
vfree(p->swap_map);
nd.mnt = p->swap_vfsmnt;
nd.dentry = p->swap_file;
p->swap_device = 0;
p->swap_file = NULL;
p->swap_vfsmnt = NULL;
p->swap_map = NULL;
p->flags = 0;
if (!(swap_flags & SWAP_FLAG_PREFER))
++least_priority;
path_release(&nd);
out:
if (swap_header)
free_page((long) swap_header);
unlock_kernel();
return error;
}
阅读(829) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~