Coder
分类: LINUX
2010-07-28 18:32:46
卸载文件系统
umount系统调用用来卸载一个文件系统。相应的sys_umount()服务例程定义如下:
---------------------------------------------------------------------
fs/namespace.c
/*
* Now umount can handle mount points as well
as block devices.
* This is important for filesystems which use
unnamed block devices.
*
* We now support a flag for forced unmount
like the other 'big iron'
* unixes. Our API is identical to OSF/1 to
avoid making a mess of AMD
*/
SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
{
struct path path;
int retval;
int lookup_flags = 0;
if (flags &
~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
return -EINVAL;
if (!(flags &
UMOUNT_NOFOLLOW))
lookup_flags |=
LOOKUP_FOLLOW;
retval =
user_path_at(AT_FDCWD, name, lookup_flags, &path);
if (retval)
goto out;
retval = -EINVAL;
if (path.dentry !=
path.mnt->mnt_root)
goto dput_and_out;
if
(!check_mnt(path.mnt))
goto dput_and_out;
retval = -EPERM;
if
(!capable(CAP_SYS_ADMIN))
goto dput_and_out;
retval =
do_umount(path.mnt, flags);
dput_and_out:
/* we mustn't call
path_put() as that would clear mnt_expiry_mark */
dput(path.dentry);
mntput_no_expire(path.mnt);
out:
return retval;
}
---------------------------------------------------------------------
sys_umount()服务例程作用于两个参数:路径名(可以为挂载点或者是设备文件名)和一组标志。该函数执行如下操作:
1、检查标志字段,如果设置了MNT_FORCE、MNT_DETACH、MNT_EXPIRE或 UMOUNT_NOFOLLOW之外的标志,则直接返回-EINVAL错误码。如果没有设置UMOUNT_NOFOLLOW标志,则设置查找标志lookup_flags的LOOKUP_FOLLOW。UMOUNT_NOFOLLOW
标志是
set-user-ID-root
程序所带来的安全问题。
MNT_EXPIRE标志不能与MNT_FORCE
或MNT_DETACH同时设置。
2、调用user_path_at()查找挂载点路径;该函数把查找的结果存放在path类型的局部变量path中。
3、如果查找的最终目录不是文件系统的挂载点,则设置retval返回码为-EINVAL并跳转到第7步。这步检查是通过验证path.dentry(路径的目录项) 和 path.mnt->mnt_root(路径的vfsmount对象的根目录项)是否相等来实现的。
4、如果要卸载的文件系统还没有安装在命名空间中,则设置retval返回码为-EINVAL并跳转到第7步(某些特殊文件系统没有安装点)。这种检查是通过在path.mnt上调用check_mnt()来实现的。
5、如果用户不具有卸载文件系统的特权,则设置retval返回码为-EPERM并跳转到第7步。
6、调用do_umount(path.mnt,
flags),传递给它的参数为path.mnt(vfsmount对象)和flags(一组标志)。该函数定义如下:
---------------------------------------------------------------------
fs/namespace.c
static int do_umount(struct vfsmount *mnt, int flags)
{
struct super_block *sb
= mnt->mnt_sb;
int retval;
LIST_HEAD(umount_list);
retval =
security_sb_umount(mnt, flags);
if (retval)
return retval;
/*
* Allow userspace to request a mountpoint be
expired rather than
* unmounting unconditionally. Unmount only
happens if:
* (1)
the mark is already set (the mark is cleared by mntput())
* (2)
the usage count == 1 [parent vfsmount] + 1 [sys_umount]
*/
if (flags &
MNT_EXPIRE) {
if (mnt ==
current->fs->root.mnt ||
flags & (MNT_FORCE | MNT_DETACH))
return -EINVAL;
if
(atomic_read(&mnt->mnt_count) != 2)
return -EBUSY;
if
(!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
}
/*
* If we may have to abort operations to get
out of this
* mount, and they will themselves hold
resources we must
* allow the fs to do things. In the Unix
tradition of
* 'Gee thats tricky lets do it in userspace'
the umount_begin
* might fail to complete on the first run
through as other tasks
* must return, and the like. Thats for the
mount program to worry
* about for the moment.
*/
if (flags &
MNT_FORCE && sb->s_op->umount_begin) {
sb->s_op->umount_begin(sb);
}
/*
* No sense to grab the lock for this test, but
test itself looks
* somewhat bogus. Suggestions for better
replacement?
* Ho-hum... In principle, we might treat that
as umount + switch
* to rootfs. GC would eventually take care of
the old vfsmount.
* Actually it makes sense, especially if
rootfs would contain a
* /reboot - static binary that would close all
descriptors and
* call reboot(9). Then init(8) could umount
root and exec /reboot.
*/
if (mnt ==
current->fs->root.mnt && !(flags & MNT_DETACH)) {
/*
* Special case for "unmounting" root
...
* we just try to remount it readonly.
*/
down_write(&sb->s_umount);
if (!(sb->s_flags
& MS_RDONLY))
retval =
do_remount_sb(sb, MS_RDONLY, NULL, 0);
up_write(&sb->s_umount);
return retval;
}
down_write(&namespace_sem);
spin_lock(&vfsmount_lock);
event++;
if (!(flags &
MNT_DETACH))
shrink_submounts(mnt,
&umount_list);
retval = -EBUSY;
if (flags &
MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
if
(!list_empty(&mnt->mnt_list))
umount_tree(mnt,
1, &umount_list);
retval = 0;
}
spin_unlock(&vfsmount_lock);
if (retval)
security_sb_umount_busy(mnt);
up_write(&namespace_sem);
release_mounts(&umount_list);
return retval;
}
---------------------------------------------------------------------
该函数执行如下操作:
a.从vfsmount对象的mnt_sb字段检索超级块对象sb的地址。
b.如果设置了MNT_EXPIRE标志,即要标记挂载点“到期”,则若要卸载的文件系统是根文件系统或者同时设置了MNT_FORCE
或MNT_DETACH,则返回-EINVAL;
检查vfsmount的引用计数,若不为2,则返回-EBUSY,要卸载的文件系统在卸载的时候不能有引用者,这个2代表vfsmount的父vfsmount和
sys_umount()对本对象的引用;接着设置vfsmount对象的mnt_expiry_mark字段为1。
c.如果用户要求强制卸载操作,则调用umount_begin超级块操作中断任何正在进行的安装操作sb->s_op->umount_begin(sb)。
d.如果要卸载的文件系统是根文件系统,且用户并不要求真正地把它卸载下来(即设置了MNT_DETACH标志,这个标志仅仅标记挂载点为不能再访问,知道挂载不busy时才卸载),则调用do_remount_sb()重新安装根文件系统为只读并终止,并返回do_remount_sb()的返回值。
e.为进行写操作而获取当前进程的namespace_sem读/写信号量和vfsmount_lock自旋锁。
f.如果已安装文件系统不包含任何子安装文件系统的安装点,或者用户要求强制卸载文件系统,则调用umount_tree()卸载文件系统(及其所有子文件系统)。umount_tree()函数定义如下:
---------------------------------------------------------------------
fs/namespace.c
void umount_tree(struct vfsmount *mnt, int propagate, struct
list_head *kill)
{
struct vfsmount *p;
for (p = mnt; p; p =
next_mnt(p, mnt))
list_move(&p->mnt_hash,
kill);
if (propagate)
propagate_umount(kill);
list_for_each_entry(p,
kill, mnt_hash) {
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
list_del_init(&p->mnt_child);
if (p->mnt_parent
!= p) {
p->mnt_parent->mnt_ghosts++;
p->mnt_mountpoint->d_mounted--;
}
change_mnt_propagation(p,
MS_PRIVATE);
}
}
---------------------------------------------------------------------
还是这个函数给人的感觉实在啊。它完成实际的底层的卸载文件系统的任务。首先他将mnt的所有子移动至kill链表中,也就是传递进去的umount_list,然后将kill链表中的所有的vfsmount对象的一些字段设为无效状态。
g.释放vfsmount_lock自旋锁和当前进程的namespace_sem读/写信号量。
h.调用release_mounts(&umount_list),release_mounts()函数定义如下:
---------------------------------------------------------------------
fs/namespace.c
void release_mounts(struct list_head *head)
{
struct vfsmount *mnt;
while
(!list_empty(head)) {
mnt =
list_first_entry(head, struct vfsmount, mnt_hash);
list_del_init(&mnt->mnt_hash);
if
(mnt->mnt_parent != mnt) {
struct dentry
*dentry;
struct vfsmount
*m;
spin_lock(&vfsmount_lock);
dentry =
mnt->mnt_mountpoint;
m =
mnt->mnt_parent;
mnt->mnt_mountpoint
= mnt->mnt_root;
mnt->mnt_parent
= mnt;
m->mnt_ghosts--;
spin_unlock(&vfsmount_lock);
dput(dentry);
mntput(m);
}
mntput(mnt);
}
}
---------------------------------------------------------------------
vfsmount对象所占的内存空间最终在mntput()函数中释放。
7、减少相应文件系统根目录的目录项对象和已安装文件系统描述符的引用计数器值;这些计数器值由user_path_at()增加。
8、调用mntput_no_expire(path.mnt)
9、返回retval的值。