Chinaunix首页 | 论坛 | 博客
  • 博客访问: 441418
  • 博文数量: 99
  • 博客积分: 65
  • 博客等级: 民兵
  • 技术积分: 1012
  • 用 户 组: 普通用户
  • 注册时间: 2012-04-20 16:30
个人简介

linux kernel 工程师

文章分类

全部博文(99)

文章存档

2018年(5)

2017年(12)

2016年(27)

2015年(10)

2014年(43)

2012年(2)

我的朋友

分类: LINUX

2016-12-26 18:17:02

SYSCALL_DEFINE5(mount,...), 最后扩展为
asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, char __user * type, unsigned long flags, void __user * data)

对应于mount命令, mount -t ext3 /dev/sdc1 /mnt/tmp1
dev_name: /dev/sdc1
dir_name: /mnt/tmp1
type: ext3

对于该情景,sys_mount最终要调用到mnt_set_mountpoint和commit_tree,将child mnt与parent mnt建立起联系。


点击(此处)折叠或打开

  1. #define __SYSCALL_DEFINEx(x, name, ...) \
  2.         asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

  3.     #define SYSCALL_DEFINEx(x, sname, ...) \
  4.         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

  5.  #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)

点击(此处)折叠或打开

  1. SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
  2.             char __user *, type, unsigned long, flags, void __user *, data)
  3.     {
  4.         int ret;
  5.         char *kernel_type;
  6.         char *kernel_dir;
  7.         char *kernel_dev;
  8.         unsigned long data_page;
  9.        
  10.         /* 用户态的type复制到内核态 */
  11.         ret = copy_mount_string(type, &kernel_type);
  12.         if (ret < 0)
  13.             goto out_type;

  14.         /* 用户态的dir_name复制到内核态 */
  15.         kernel_dir = getname(dir_name);
  16.         if (IS_ERR(kernel_dir)) {
  17.             ret = PTR_ERR(kernel_dir);
  18.             goto out_dir;
  19.         }

  20.      /* 用户态的dev_name复制到内核态 */

  21.         ret = copy_mount_string(dev_name, &kernel_dev);
  22.         if (ret < 0)
  23.             goto out_dev;

  24.         /* 用户态的data制到内核态 */
  25.         ret = copy_mount_options(data, &data_page);
  26.         if (ret < 0)
  27.             goto out_data;

  28.         ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
  29.             (void *) data_page);

  30.         free_page(data_page);
  31.     out_data:
  32.         kfree(kernel_dev);
  33.     out_dev:
  34.         putname(kernel_dir);
  35.     out_dir:
  36.         kfree(kernel_type);
  37.     out_type:
  38.         return ret;
  39.     }

点击(此处)折叠或打开

  1. long do_mount(char *dev_name, char *dir_name, char *type_page,
  2.              unsigned long flags, void *data_page)
  3.     {
  4.         struct path path;
  5.         int retval = 0;
  6.         int mnt_flags = 0;

  7.         /* Discard magic */
  8.         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
  9.             flags &= ~MS_MGC_MSK;

  10.         /* Basic sanity checks */

  11.         if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
  12.             return -EINVAL;

  13.         if (data_page)
  14.             ((char *)data_page)[PAGE_SIZE - 1] = 0;

  15.         /* ... and get the mountpoint */
  16.         /* dir_name是mount_point,kern_path函数查找dir_name对应的dentry和vfsmount结构
  17.             kern_path实际上就是调用do_path_lookup函数
  18.          */
  19.         retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
  20.         if (retval)
  21.             return retval;

  22.         retval = security_sb_mount(dev_name, &path,
  23.                      type_page, flags, data_page);
  24.         if (retval)
  25.             goto dput_out;

  26.         /* Default to relatime unless overriden */
  27.         if (!(flags & MS_NOATIME))
  28.             mnt_flags |= MNT_RELATIME;

  29.         /* Separate the per-mountpoint flags */
  30.         if (flags & MS_NOSUID)
  31.             mnt_flags |= MNT_NOSUID;
  32.         if (flags & MS_NODEV)
  33.             mnt_flags |= MNT_NODEV;
  34.         if (flags & MS_NOEXEC)
  35.             mnt_flags |= MNT_NOEXEC;
  36.         if (flags & MS_NOATIME)
  37.             mnt_flags |= MNT_NOATIME;
  38.         if (flags & MS_NODIRATIME)
  39.             mnt_flags |= MNT_NODIRATIME;
  40.         if (flags & MS_STRICTATIME)
  41.             mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
  42.         if (flags & MS_RDONLY)
  43.             mnt_flags |= MNT_READONLY;

  44.         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
  45.              MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
  46.              MS_STRICTATIME);
  47.         /* 现在只关心正常的mount操作,即do_new_mount函数 */
  48.         if (flags & MS_REMOUNT)
  49.             retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
  50.                      data_page);
  51.         else if (flags & MS_BIND)
  52.             retval = do_loopback(&path, dev_name, flags & MS_REC);
  53.         else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
  54.             retval = do_change_type(&path, flags);
  55.         else if (flags & MS_MOVE)
  56.             retval = do_move_mount(&path, dev_name);
  57.         else
  58.             retval = do_new_mount(&path, type_page, flags, mnt_flags,
  59.                      dev_name, data_page);
  60.     dput_out:
  61.         path_put(&path);
  62.         return retval;
  63.     }

点击(此处)折叠或打开

  1. /*
  2.  * create a new mount for userspace and request it to be added into the
  3.  * namespace's tree
  4.  */
  5. static int do_new_mount(struct path *path, char *type, int flags,
  6.             int mnt_flags, char *name, void *data)
  7. {
  8.     struct vfsmount *mnt;

  9.     if (!type)
  10.         return -EINVAL;

  11.     /* we need capabilities... */
  12.     if (!capable(CAP_SYS_ADMIN))
  13.         return -EPERM;

  14.     lock_kernel();
  15.     /* do_kern_mount的作用是读取设备的super block, 生成设备的root dentry, 生成一个vfsmount结构 */
  16.     mnt = do_kern_mount(type, flags, name, data);
  17.     unlock_kernel();
  18.     if (IS_ERR(mnt))
  19.         return PTR_ERR(mnt);
  20.     /* do_add_mount作用是将刚刚为设备生成的mnt与mountpoint建立起联系 */
  21.     return do_add_mount(mnt, path, mnt_flags, NULL);
  22. }

点击(此处)折叠或打开

  1. /*
  2.  * add a mount into a namespace's mount tree
  3.  * - provide the option of adding the new mount to an expiration list
  4.  */
  5. int do_add_mount(struct vfsmount *newmnt, struct path *path,
  6.          int mnt_flags, struct list_head *fslist)
  7. {
  8.     int err;

  9.     mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);

  10.     down_write(&namespace_sem);

    /* 防止在等待信号量的过程中有人捷足先登,在mount_point上又mount了其他设备
       如果有其他设备抢先mount到了这个mountponit,那么对不起,我只能用它的root dentry作为新的mountponit了,
        所以这里调用follow_down,直到找到最新抢先mount的设备的root dentry */

  1.     /* Something was mounted here while we slept */
  2.     while (d_mountpoint(path->dentry) &&
  3.      follow_down(path))
  4.         ;
  5.     err = -EINVAL;
  6.     if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
  7.         goto unlock;

     /* 防止同一个设备mount到同一个mountpoint */

  1.     /* Refuse the same filesystem on the same mount point */
  2.     err = -EBUSY;
  3.     if (path->mnt->mnt_sb == newmnt->mnt_sb &&
  4.      path->mnt->mnt_root == path->dentry)
  5.         goto unlock;

    /* 设备的根节点不能是符号链接 */

  1.     err = -EINVAL;
  2.     if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
  3.         goto unlock;
  4.     /* graft_tree实现newmnt(也即是childmount)与parent mount建立联系 */
  5.     newmnt->mnt_flags = mnt_flags;
  6.     if ((err = graft_tree(newmnt, path)))
  7.         goto unlock;

      /* do_new_mount传递过来的fslist为NULL */

  1.     if (fslist) /* add to the specified expiration list */
  2.         list_add_tail(&newmnt->mnt_expire, fslist);

  3.     up_write(&namespace_sem);
  4.     return 0;

  5. unlock:
  6.     up_write(&namespace_sem);
  7.     mntput(newmnt);
  8.     return err;
  9. }

点击(此处)折叠或打开

  1. static int graft_tree(struct vfsmount *mnt, struct path *path)
  2. {
  3.     int err;
  4.     /* 判断子文件系统是否可以挂载,有些文件系统如pipefs, 块设备文件系统是不可以挂载的 */
  5.     if (mnt->mnt_sb->s_flags & MS_NOUSER)
  6.         return -EINVAL;
  7.     /* mountpoint应当是目录,设备的root dentry也需要是目录 */
  8.     if (S_ISDIR(path->dentry->d_inode->i_mode) !=
  9.      S_ISDIR(mnt->mnt_root->d_inode->i_mode))
  10.         return -ENOTDIR;

  11. ...
  12.     err = -ENOENT;
  13.     /* 调用attach_recursive_mnt实现mount操作 */
  14.     if (!d_unlinked(path->dentry))
  15.         err = attach_recursive_mnt(mnt, path, NULL);
  16. out_unlock:
  17.     mutex_unlock(&path->dentry->d_inode->i_mutex);
  18.     if (!err)
  19.         security_sb_post_addmount(mnt, path);
  20.     return err;
  21. }

点击(此处)折叠或打开

  1. static int attach_recursive_mnt(struct vfsmount *source_mnt,
  2.             struct path *path, struct path *parent_path)
  3. {
  4.     LIST_HEAD(tree_list);
  5.     struct vfsmount *dest_mnt = path->mnt;
  6.     struct dentry *dest_dentry = path->dentry;
  7.     struct vfsmount *child, *p;
  8.     int err;

  9.     if (IS_MNT_SHARED(dest_mnt)) {
  10.         err = invent_group_ids(source_mnt, true);
  11.         if (err)
  12.             goto out;
  13.     }
  14.     err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
  15.     if (err)
  16.         goto out_cleanup_ids;

  17.     spin_lock(&vfsmount_lock);

  18.     if (IS_MNT_SHARED(dest_mnt)) {
  19.         for (p = source_mnt; p; p = next_mnt(p, source_mnt))
  20.             set_mnt_shared(p);
  21.     }
  22.     /* graft_tree传递过来的parent_path==NULL */
  23.     if (parent_path) {
  24.         detach_mnt(source_mnt, parent_path);
  25.         attach_mnt(source_mnt, path);
  26.         touch_mnt_namespace(parent_path->mnt->mnt_ns);
  27.     } else {
  28.         /* 走这个分支 */
  29.         mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
  30.         commit_tree(source_mnt);
  31.     }

  32.     list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
  33.         list_del_init(&child->mnt_hash);
  34.         commit_tree(child);
  35.     }
  36.     spin_unlock(&vfsmount_lock);
  37.     return 0;

  38.  out_cleanup_ids:
  39.     if (IS_MNT_SHARED(dest_mnt))
  40.         cleanup_group_ids(source_mnt, NULL);
  41.  out:
  42.     return err;
  43. }
mnt_set_mountpoint是关联parent mnt与child_mnt的关键函数,建立如下图1的关系

                                                            图1

点击(此处)折叠或打开

  1. void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
  2.             struct vfsmount *child_mnt)
  3. {
  4.     child_mnt->mnt_parent = mntget(mnt);
  5.     child_mnt->mnt_mountpoint = dget(dentry);
  6.     dentry->d_mounted++;
  7. }
commit_tree函数首先设置child mnt的命名空间,并将child mnt通过mnt_list链入命名空间的list.



点击(此处)折叠或打开

  1. /*
  2.  * the caller must hold vfsmount_lock
  3.  */
  4. static void commit_tree(struct vfsmount *mnt)
  5. {
  6.     struct vfsmount *parent = mnt->mnt_parent;
  7.     struct vfsmount *m;
  8.     LIST_HEAD(head);
  9.     struct mnt_namespace *n = parent->mnt_ns;

  10.     BUG_ON(parent == mnt);

  11.     list_add_tail(&head, &mnt->mnt_list);
  12.     list_for_each_entry(m, &head, mnt_list)
  13.         m->mnt_ns = n;    /*  child mount属于parent mount的命名空间 */
  14.    
  15.     /* list_spice将两个链表合并在一起,实际上是把child mnt通过mnt_list链入parent mnt所在name_space的list */
  16.     list_splice(&head, n->list.prev);

    /* child mount加入mount_hashtable表,所在hash链由parent mnt的地址和mount point的name生成 */

  1.     list_add_tail(&mnt->mnt_hash, mount_hashtable +
  2.                 hash(parent, mnt->mnt_mountpoint));
  3. /* child mount通过mnt_child表,加入parent mnt的mnt_mounts开头的list尾部 */
  4.     list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
  5.     touch_mnt_namespace(n);
  6. }






阅读(1080) | 评论(0) | 转发(0) |
0

上一篇:link_path_walk

下一篇:vfs_kern_mount

给主人留下些什么吧!~~