Linux内核源码阅读之打开文件篇

Linux中打开文件是通过open系统调用实现，其函数中调用了do_sys_open()函数完成打开功能，所以下面主要分析do_sys_open()函数，首先先看下open系统调用的入口函数，再具体看do_sys_open()函数：

[cpp]view plaincopy
				
				SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)  
			
				{  
			
				    long ret;  
			
				    if (force_o_largefile())  
			
				        flags |= O_LARGEFILE;  
			
				    ret = do_sys_open(AT_FDCWD, filename, flags, mode);  
			
				    /* avoid REGPARM breakage on x86: */  
			
				    asmlinkage_protect(3, ret, filename, flags, mode);  
			
				    return ret;  
			
				}  
			
				long do_sys_open(int dfd, const char __user *filename, int flags, int mode)  
			
				{  
			
				    /*获取文件名称，由getname()函数完成，其内部首先创建存取文件名称的空间，然后*从用户空间把文件名拷贝过来*/  
			
				    char *tmp = getname(filename);  
			
				    int fd = PTR_ERR(tmp);  
			
				    if (!IS_ERR(tmp)) {  
			
				    /*获取一个可用的fd，此函数调用alloc_fd()函数从fd_table中获取一个可用fd,并做些简单初始化，此函数内部实现比较简单，此次分析不细看*/  
			
				        fd = get_unused_fd_flags(flags);  
			
				        if (fd >= 0) {  
			
				            /*fd获取成功则开始打开文件，此函数是主要完成打开功能的函数，在此先放一放，下面详细分析*/  
			
				            struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);  
			
				            if (IS_ERR(f)) {  
			
				                /*打开失败，释放fd*/  
			
				                put_unused_fd(fd);  
			
				                fd = PTR_ERR(f);  
			
				            } else {  
			
				                /*文件如果已经被打开了，调用fsnotify_open()函数*/  
			
				                fsnotify_open(f->f_path.dentry);  
			
				                /*将文件指针安装在fd数组中*/  
			
				                fd_install(fd, f);  
			
				            }  
			
				        }  
			
				        /*释放放置从用户空间拷贝过来的文件名的存储空间*/  
			
				        putname(tmp);  
			
				    }  
			
				    return fd;  
			
				}

接下来即将进入到打开功能的真正实现功能的函数do_filp_open()函数：

[cpp]view plaincopy
				
				struct file *do_filp_open(int dfd, const char *pathname,int open_flag, int mode, int acc_mode)  
			
				{  
			
				    /* 
			
				    *…若干变量声明 
			
				    */  
			
				    /*改变参数flag的值，具体做法是flag+1*/  
			
				    int flag = open_to_namei_flags(open_flag);  
			
				    int force_reval = 0;  
			
				    /*根据__O_SYNC标志来设置O_DSYNC 标志，用以防止恶意破坏程序*/  
			
				    if (open_flag & __O_SYNC)  
			
				        open_flag |= O_DSYNC;  
			
				    /*设置访问权限*/  
			
				    if (!acc_mode)  
			
				        acc_mode = MAY_OPEN | ACC_MODE(open_flag);  
			
				    /*根据 O_TRUNC标志设置写权限 */  
			
				    if (flag & O_TRUNC)  
			
				        acc_mode |= MAY_WRITE;  
			
				    /* 设置O_APPEND 标志*/  
			
				    if (flag & O_APPEND)  
			
				        acc_mode |= MAY_APPEND;  
			
				    /*如果不是创建文件*/  
			
				    if (!(flag & O_CREAT)) {  
			
				        /*返回特定的file结构体指针*/  
			
				        filp = get_empty_filp();  
			
				        if (filp == NULL)  
			
				            return ERR_PTR(-ENFILE);  
			
				        /*填充nameidata 结构*/  
			
				        nd.intent.open.file = filp;  
			
				        filp->f_flags = open_flag;  
			
				        nd.intent.open.flags = flag;  
			
				        nd.intent.open.create_mode = 0;  
			
				        /*当内核要访问一个文件的时候，第一步要做的是找到这个文件，而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构，并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后通过文件描述符，来访问这些数据结构*/  
			
				        error = do_path_lookup(dfd, pathname,lookup_flags(flag)|LOOKUP_OPEN, &nd);  
			
				        if (IS_ERR(nd.intent.open.file)) {  
			
				                if (error == 0) {  
			
				                         error = PTR_ERR(nd.intent.open.file);  
			
				                         /*减少dentry和vsmount得计数*/  
			
				                         path_put(&nd.path);  
			
				                }  
			
				        } else if (error)  
			
				            /*如果查找失败则释放一些资源*/  
			
				            release_open_intent(&nd);  
			
				        if (error)  
			
				            return ERR_PTR(error);  
			
				        goto ok;  
			
				    }  
			
				    /*到此则是要创建文件*/  
			
				reval:  
			
				    /* path-init为查找作准备工作，path_walk真正上路查找，这两个函数联合起来根据一段路径名找到对应的dentry */  
			
				    error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);  
			
				    if (error)  
			
				        return ERR_PTR(error);  
			
				    if (force_reval)  
			
				        nd.flags |= LOOKUP_REVAL;  
			
				    /*这个函数相当重要，就如源代码注释的那样，是整个NFS的名字解析函数，其实也是NFS得以构筑的函数。这里作一重点分析。这里先作一个综述。该函数采用一个for循环，对name路径根据目录的层次，一层一层推进，直到终点或失败。在推进的过程中，一步步建立了目录树的dentry和对应的inode */  
			
				    error = path_walk(pathname, &nd);  
			
				    if (error) {  
			
				        if (nd.root.mnt)  
			
				            path_put(&nd.root);  
			
				        return ERR_PTR(error);  
			
				    }  
			
				    if (unlikely(!audit_dummy_context()))  
			
				        /*保存inode节点信息*/  
			
				        audit_inode(pathname, nd.path.dentry);  
			
				    /*父节点信息*/  
			
				    error = -EISDIR;  
			
				    if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])  
			
				        goto exit_parent;  
			
				    error = -ENFILE;  
			
				    /*获取文件指针*/  
			
				    filp = get_empty_filp();  
			
				    if (filp == NULL)  
			
				        goto exit_parent;  
			
				    /*填充nameidata 结构*/  
			
				    nd.intent.open.file = filp;  
			
				    filp->f_flags = open_flag;  
			
				    nd.intent.open.flags = flag;  
			
				    nd.intent.open.create_mode = mode;  
			
				    dir = nd.path.dentry;  
			
				    nd.flags &= ~LOOKUP_PARENT;  
			
				    nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;  
			
				    if (flag & O_EXCL)  
			
				        nd.flags |= LOOKUP_EXCL;  
			
				    mutex_lock(&dir->d_inode->i_mutex);  
			
				    /*从哈希表中查找nd对应的dentry*/  
			
				    path.dentry = lookup_hash(&nd);  
			
				    path.mnt = nd.path.mnt;  
			
				do_last:  
			
				    error = PTR_ERR(path.dentry);  
			
				    if (IS_ERR(path.dentry)) {  
			
				        mutex_unlock(&dir->d_inode->i_mutex);  
			
				        goto exit;  
			
				    }  
			
				    if (IS_ERR(nd.intent.open.file)) {  
			
				        error = PTR_ERR(nd.intent.open.file);  
			
				        goto exit_mutex_unlock;  
			
				    }  
			
				    /*如果此dentry结构没有对应的inode节点，说明是无效的，应该创建文件节点 */  
			
				    if (!path.dentry->d_inode) {  
			
				        /*write权限是必需的*/  
			
				        error = mnt_want_write(nd.path.mnt);  
			
				        if (error)  
			
				            goto exit_mutex_unlock;  
			
				        /*按照namei格式的flag open*/  
			
				        error = __open_namei_create(&nd, &path, flag, mode);  
			
				        if (error) {  
			
				            mnt_drop_write(nd.path.mnt);  
			
				            goto exit;  
			
				        }  
			
				        /*根据nameidata 得到相应的file结构*/  
			
				        filp = nameidata_to_filp(&nd);  
			
				        /*放弃写权限*/  
			
				        mnt_drop_write(nd.path.mnt);  
			
				        if (nd.root.mnt)  
			
				            /*计数减一*/  
			
				            path_put(&nd.root);  
			
				        if (!IS_ERR(filp)) {  
			
				            error = ima_file_check(filp, acc_mode);  
			
				            if (error) {  
			
				                fput(filp);  
			
				                filp = ERR_PTR(error);  
			
				            }  
			
				        }  
			
				        return filp;  
			
				    }  
			
				    /*要打开的文件已经存在*/  
			
				    mutex_unlock(&dir->d_inode->i_mutex);  
			
				    /*保存inode节点*/  
			
				    audit_inode(pathname, path.dentry);  
			
				    /*省略若干flag标志检查代码        */  
			
				    /*路径装化为相应的nameidata 结构*/  
			
				    path_to_nameidata(&path, &nd);  
			
				    error = -EISDIR;  
			
				    /*如果是文件夹*/  
			
				    if (S_ISDIR(path.dentry->d_inode->i_mode))  
			
				        goto exit;  
			
				ok:  
			
				    /*检测是否截断文件标志*/  
			
				    will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);  
			
				    if (will_truncate) {  
			
				    /*要截断的话就要获取写权限*/  
			
				        error = mnt_want_write(nd.path.mnt);  
			
				        if (error)  
			
				            goto exit;  
			
				    }  
			
				    //may_open执行权限检测、文件打开和truncate的操作  
			
				    error = may_open(&nd.path, acc_mode, flag);  
			
				    if (error) {  
			
				        if (will_truncate)  
			
				            mnt_drop_write(nd.path.mnt);  
			
				        goto exit;  
			
				    }  
			
				    filp = nameidata_to_filp(&nd);  
			
				    if (!IS_ERR(filp)) {  
			
				        error = ima_file_check(filp, acc_mode);  
			
				        if (error) {  
			
				            fput(filp);  
			
				            filp = ERR_PTR(error);  
			
				        }  
			
				    }  
			
				    if (!IS_ERR(filp)) {  
			
				        if (acc_mode & MAY_WRITE)  
			
				            vfs_dq_init(nd.path.dentry->d_inode);  
			
				        if (will_truncate) {  
			
				            //处理截断  
			
				            error = handle_truncate(&nd.path);  
			
				            if (error) {  
			
				            fput(filp);  
			
				            filp = ERR_PTR(error);  
			
				            }  
			
				        }  
			
				    }  
			
				    //安全的放弃写权限  
			
				    if (will_truncate)  
			
				        mnt_drop_write(nd.path.mnt);  
			
				    if (nd.root.mnt)  
			
				        path_put(&nd.root);  
			
				    return filp;  
			
				exit_mutex_unlock:  
			
				    mutex_unlock(&dir->d_inode->i_mutex);  
			
				exit_dput:  
			
				    path_put_conditional(&path, &nd);  
			
				exit:  
			
				    if (!IS_ERR(nd.intent.open.file))  
			
				        release_open_intent(&nd);  
			
				exit_parent:  
			
				    if (nd.root.mnt)  
			
				        path_put(&nd.root);  
			
				    path_put(&nd.path);  
			
				    return ERR_PTR(error);  
			
				//允许遍历连接文件，则手工找到连接文件对应的文件  
			
				do_link:  
			
				    error = -ELOOP;  
			
				    if (flag & O_NOFOLLOW)  
			
				        //不允许遍历连接文件，返回错误  
			
				        goto exit_dput;  
			
				    /*以下是手工找到链接文件对应的文件dentry结构代码*/  
			
				    // 设置查找LOOKUP_PARENT标志  
			
				    nd.flags |= LOOKUP_PARENT;  
			
				    //判断操作是否安全  
			
				    error = security_inode_follow_link(path.dentry, &nd);  
			
				    if (error)  
			
				        goto exit_dput;  
			
				    // 处理符号链接  
			
				    error = __do_follow_link(&path, &nd);  
			
				    path_put(&path);  
			
				    if (error) {  
			
				        release_open_intent(&nd);  
			
				        if (nd.root.mnt)  
			
				            path_put(&nd.root);  
			
				        if (error == -ESTALE && !force_reval) {  
			
				            force_reval = 1;  
			
				            goto reval;  
			
				        }  
			
				        return ERR_PTR(error);  
			
				    }  
			
				    nd.flags &= ~LOOKUP_PARENT;  
			
				    // 检查最后一段文件或目录名的属性情况  
			
				    if (nd.last_type == LAST_BIND)  
			
				        goto ok;  
			
				    error = -EISDIR;  
			
				    if (nd.last_type != LAST_NORM)  
			
				        goto exit;  
			
				    if (nd.last.name[nd.last.len]) {  
			
				        __putname(nd.last.name);  
			
				        goto exit;  
			
				    }  
			
				    error = -ELOOP;  
			
				    // 出现回环标志: 循环超过32次  
			
				    if (count++==32) {  
			
				        __putname(nd.last.name);  
			
				        goto exit;  
			
				    }  
			
				    dir = nd.path.dentry;  
			
				    mutex_lock(&dir->d_inode->i_mutex);  
			
				    // 更新路径的挂接点和dentry  
			
				    path.dentry = lookup_hash(&nd);  
			
				    path.mnt = nd.path.mnt;  
			
				    __putname(nd.last.name);  
			
				    goto do_last;  
			
				}

分析完上述主要函数以后，我们来看一下整个打开流程是如何做到的：
在内核中要打开一个文件，首先应该找到这个文件，而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构，并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后通过文件描述符，来访问这些数据结构。
基本函数流程及调用方式如下所示：
打开过程首先是open系统调用访问SYSCALL_DEFINE3函数，然后调用do_sys_open 函数完成主要功能，再调用函数do_filp_open完成主要的打开功能，下面详细看下do_filp_open中调用的do_path_lookup主要过程：

[cpp]view plaincopy
				
				staic int  do_path_lookup(int dfd,const char *name,unsigned int flags,struct nameidata *nd)  
			
				{  
			
				    int retval=path_init(dfd,name,flags,nd);  
			
				    //设置nd->root=根路径（绝对地址）或者当前工作目录（相对地址） 。  
			
				    //这一步做完了后，内核会建立一些数据结构（dentry,inode）来初始化查找的起点  
			
				    if(!retval)  
			
				        retval = path_walk(name,nd);  
			
				    //path_walk,会遍历路径的每一份量，也就是用“/”分隔开的每一部分，  
			
				    //最中找到name指向的文件，walk的意思就是walk path的每一个组分（component）  
			
				}

我们进一步看看path_walk

[cpp]view plaincopy
				
				int path_walk(const char *name,struct nameidata *nd)  
			
				{  
			
				    return link_path_walk(name,nd);  
			
				    //path_walk其实相当于直接调用link_path_walk来完成工作  
			
				}

link_path_walk的主要工作是有其内部函数__link_path_walk 来完成的
result = __link_path_walk(name,nd)

至此我们转向最重要的代码__link_walk_path,该函数把传进来的字符串name，也就是用户指定的路径，按路径分隔符分解成一系列小的component。比如用户说，我要找/path/to/dest这个文件，那么我们的文件系统就会按path,to,dest一个一个来找，知道最后一个分量是文件或者查找完成。他找的时候，会先用path_init初始化过的根路径去找第一个分量，也就是path。然后用path的dentry->d_inode去找to，这样循环到最后一个。注意，内核会缓存找到的路径分量，所以往往只有第一次访问一个路径的时候，才会去访问磁盘，后面的访问会直接从缓存里找，下面会看到，很多与页告诉缓存打交道的代码。但不管怎样，第一遍查找总是会访问磁盘的。
static int __link_path_walk(const char *name,struct nameidata *nd)
{
}
至此，按照每一个component查找完成之后，就会找到相应的文件，然后相应的打开工作就基本完成了。

阅读(538) | 评论(0) | 转发(0) |

上一篇：内核网络设备的注册与初始化

下一篇： Linux内核态下的文件操作

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6