分类: LINUX
2013-12-11 11:05:11
每个文件系统都是一个独立的世界,统管着属于自己的文件们。如果你想进入这世界瞧一瞧,要准备一扇门(文件夹),然后施展魔法(mount命令),门的另一边便通向了新文件系统。
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { int ret; char *kernel_type; char *kernel_dir; char *kernel_dev; unsigned long data_page; ret = copy_mount_string(type, &kernel_type); if (ret < 0) goto out_type; kernel_dir = getname(dir_name); //从app获得设备要挂载的节点 if (IS_ERR(kernel_dir)) { ret = PTR_ERR(kernel_dir); goto out_dir; } ret = copy_mount_string(dev_name, &kernel_dev); //设备路径 /** * "copy_mount_string --> strndup_user --> memdup_user" * * 用户态到内核态的拷贝,都会涉及到两个必要的步骤: * void *memdup_user(const void __user *src, size_t len) * { * void *p; * * p = kmalloc_track_caller(len, GFP_KERNEL); //内核态分配个空间 * if (!p) * return ERR_PTR(-ENOMEM); * * if (copy_from_user(p, src, len)) { //从用户态拷过来 * kfree(p); * return ERR_PTR(-EFAULT); * } * * return p; * } */ if (ret < 0) goto out_dev; ret = copy_mount_options(data, &data_page); //获得data_page if (ret < 0) goto out_data; /*以上所做的一切,只为获得do_mount的参数*/ ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, (void *)data_page); //--> free_page(data_page); out_data: kfree(kernel_dev); out_dev: putname(kernel_dir); out_dir: kfree(kernel_type); out_type: return ret; }
mount正式开始:
long do_mount(char *dev_name, char *dir_name, char *type_page, unsigned long flags, void *data_page) { struct path path; int retval = 0; int mnt_flags = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; /* Basic sanity checks */ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) //字符串的长度不能超过一个页面大小 return -EINVAL; /** * void *memchr(const void *s, int c, size_t n) * { * const unsigned char *p = s; * while (n-- != 0) { * if ((unsigned char)c == *p++) { //找字符串的结尾 * return (void *)(p - 1); * } * } * return NULL; * } */ if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; /* ... and get the mountpoint */ retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) return retval; retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); if (retval) goto dput_out; /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; /* Separate the per-mountpoint flags 安全模式,这个细抠起来比较复杂诶*/ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; if (flags & MS_NOATIME) mnt_flags |= MNT_NOATIME; if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); //改变一个原已安装设备的安装方式 else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); //回接设备的处理 --> else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else retval = do_new_mount(&path, type_page, flags, mnt_flags, //增加新的mount点 dev_name, data_page); dput_out: path_put(&path); return retval; }
这里出现了个loopback,回接设备。何为回接设备,一种机制的体现,什么机制?将一个普通文件当作块设备用。
[jesse@localhost linux-3.0]$ ls /dev/loop
loop0 loop1 loop2 loop3 loop4 loop5 loop6 loop7
新建文件blkfile
[root@localhost test]# dd if=/dev/zero of=./blkfile bs=1k count=100 100+0 records in 100+0 records out 102400 bytes (102 kB) copied, 0.00113843 s, 89.9 MB/s
[root@localhost test]# ll total 100 -rw-rw-r--. 1 jesse jesse 102400 Nov 13 11:35 blkfile
选loop1回接
[root@localhost test]# losetup /dev/loop1 ./blkfile
直接格式化blkfile当然不行,not a block special device...
[root@localhost test]# mkfs -t ext2 ./blkfile 100 mke2fs 1.41.12 (17-May-2010) ./blkfile is not a block special device.
格式化loop1,其实操作的是blkfile
[root@localhost test]# mkfs -t ext2 /dev/loop1 100 mke2fs 1.41.12 (17-May-2010) Filesystem label= OS type: Linux Block size=1024 (log=0) Fragment size=1024 (log=0) Stride=0 blocks, Stripe width=0 blocks 16 inodes, 100 blocks 5 blocks (5.00%) reserved for the super user First data block=1 1 block group 8192 blocks per group, 8192 fragments per group 16 inodes per group Writing inode tables: done Writing superblocks and filesystem accounting information: done This filesystem will be automatically checked every 25 mounts or 180 days, whichever comes first. Use tune2fs -c or -i to override.
结论:看来回接至少能将一个普通文件当块设备来用。让后挂载到/mnt,哇!就这么有了自定义的子系统。
然后,恍然大悟:
/backup/iso/rhel6.1.iso /rhel6 iso9660 loop 0 0
呵呵~
----------------------------------------------------------------------------------------------------------------------------------
了解了回接设备,我们返回正题。
//其实,我们更关心这个 retval = do_new_mount(&path, type_page, flags, mnt_flags, dev_name, data_page);
/* * create a new mount for userspace and request it to be added into the * namespace's tree */ static int do_new_mount(struct path *path, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; int err; if (!type) return -EINVAL; /* we need capabilities... */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); //--> if (IS_ERR(mnt)) return PTR_ERR(mnt); err = do_add_mount(mnt, path, mnt_flags); // if (err) mntput(mnt); return err; }
-->
struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); struct vfsmount *mnt; //把一个设备安装到一个目录结点 if (!type) return ERR_PTR(-ENODEV); mnt = vfs_kern_mount(type, flags, name, data); //返回特定fs的file_system_type-->
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); return mnt; }
-->
struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct vfsmount *mnt; struct dentry *root; if (!type) return ERR_PTR(-ENODEV); mnt = alloc_vfsmnt(name); //申请 struct vfsmount 空间,然后便是填充 if (!mnt) return ERR_PTR(-ENOMEM); if (flags & MS_KERNMOUNT) mnt->mnt_flags = MNT_INTERNAL; root = mount_fs(type, flags, name, data); //--> if (IS_ERR(root)) { free_vfsmnt(mnt); return ERR_CAST(root); } mnt->mnt_root = root; mnt->mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; return mnt; }
-->
struct dentry * mount_fs(struct file_system_type *type, int flags, const char *name, void *data) { ... root = type->mount(type, flags, name, data); //回调具体文件系统的mount if (IS_ERR(root)) { error = PTR_ERR(root); goto out_free_secdata; } ... }
看到一个回调函数:
root = type->mount(type, flags, name, data);
内核支持的文件系统不止一种,每一种文件系统都该有个结构体来描述:
回调函数调的是哪个文件系统的mount,这就取决type->mount中的type。而type怎么来的,当然是find出来的咯。
struct vfsmount * do_kern_mount(const char *fstype, int flags, const char *name, void *data) { struct file_system_type *type = get_fs_type(fstype); | fs = __get_fs_type(name, len); | read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); //--> if (fs && !try_module_get(fs->owner)) //增加该fs引用计数 fs = NULL; read_unlock(&file_systems_lock); ... ... }
-->
static struct file_system_type **find_filesystem(const char *name, unsigned len) { struct file_system_type **p; for ( p=&file_systems; *p; p=&(*p)->next ) //原来是个简单的队列 --> if (strlen((*p)->name) == len && strncmp((*p)->name, name, len) == 0) break; return p; }
-->
新添加个文件系统
-- fs/ext2/super.c -- static int __init init_ext2_fs(void) { int err = init_ext2_xattr(); if (err) return err; err = init_inodecache(); if (err) goto out1; err = register_filesystem(&ext2_fs_type); //将自己挂载上队列 if (err) goto out; return 0; out: destroy_inodecache(); out1: exit_ext2_xattr(); return err; }
说到底,要看的其实是具体fs的mount函数。获得struct vfsmount之后,开始正式挂载上去。
mnt = do_kern_mount(type, flags, name, data); if (IS_ERR(mnt)) return PTR_ERR(mnt); err = do_add_mount(mnt, path, mnt_flags); //--> if (err) mntput(mnt);
-->
/* * add a mount into a namespace's mount tree */ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) { int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); err = lock_mount(path); if (err) return err; err = -EINVAL; if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ err = -EBUSY; if (path->mnt->mnt_sb == newmnt->mnt_sb && path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) goto unlock; newmnt->mnt_flags = mnt_flags; err = graft_tree(newmnt, path); //又接到了什么树 -->!!! unlock: unlock_mount(path); return err; }
一个新的文件系统就这么挂了上去,只是大概的流程,要具体到文件系统,就要从回调mount入手。
-- fs/ext2/super.c -- static int __init init_ext2_fs(void) { int err = init_ext2_xattr(); if (err) return err; err = init_inodecache(); if (err) goto out1; err = register_filesystem(&ext2_fs_type);
if (err) goto out; return 0; out: destroy_inodecache(); out1: exit_ext2_xattr(); return err; }
ext2的挂载:
static struct dentry *ext2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super); }
mount_bdev函数首先
/* open a block device by name */
struct block_device *bdev = blkdev_get_by_path();
然后
/* find or create a superblock */
struct super_block *s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
获得了超级块指针,如果s->s_root,也就是目录挂载点为空的化,那就要填充ext2的超级块,这涉及到ext2文件系统格式的理解。
挂上文件系统后意味着什么?意味着struct file_operations,struct inode_operations挂上了具体的操作函数。
读取一个文件由vfs层的read调到ext2的read,又调到具体的磁盘驱动;又或者是嵌入式,先调到mtd的read,最后调到nand驱动的read,根据nand datasheet的时序控制gpio引脚,发送cmd,接收数据。:-)
最后,来个山寨小实验:
增加个自己的文件系统,ext2_jes。其实就是个ext2的副本。
第一步:
[jesse@localhost linux-2.6.39]$ vim fs/ext2/ acl.c ialloc.c super.c xattr_user.c acl.h inode.c symlink.c xip.c balloc.c ioctl.c xattr.c xip.h dir.c Kconfig xattr.h ext2.h Makefile xattr_security.c file.c namei.c xattr_trusted.c [jesse@localhost linux-2.6.39]$ vim include/linux/ ext2_fs.h ext2_fs_sb.h
以上部分果断山寨一份。
[jesse@localhost linux-2.6.39]$ vim fs/ext2_jes/ acl.c ialloc.c super.c xattr_user.c acl.h inode.c symlink.c xip.c balloc.c ioctl.c xattr.c xip.h dir.c Kconfig xattr.h ext2.h Makefile xattr_security.c file.c namei.c xattr_trusted.c [jesse@localhost linux-2.6.39]$ vim include/linux/ ext2_fs_jes.h ext2_fs_sb_jes.h
当然,好的山寨必将是彻底的,将fs/ext2_jes/里内容里的所有ext2改为ext2_jes。看来你需要一个脚本。
cat $f | sed 's/ext2/ext2_jes/g' > ${f}_tmp mv ${f}_tmp $f
*.h文件同理。
要记得大写的EXT2也要改哦。
第二步:
修改Makefile和Kconfig,照猫画虎即刻。
第三步:
编译过程中,可能会不断报错,原因大多是缺少函数,复制对应的然后粘贴即可。
最后还要改一下magic,起个吉利的8888作为名字。
-- include/linux/magic.h -- #define EXT2_SUPER_MAGIC 0xEF53 #define EXT2_JES_SUPER_MAGIC 0x8888
编译好后,app作个测试,用到之前说到的回接:
#dd if=/dev/zero of=jesfs bs=1M count=1 #mkfs.ext2 jesfs /** * 注意,这里的mkfs.ext2仍然用的是ext2格式,需要改下它的magic, * 用vim打开,找到0xEF53,然后改为0x8888即可 */ #mount -t ext2_jes -o loop ./jesfs /mnt