Chinaunix首页 | 论坛 | 博客
  • 博客访问: 349580
  • 博文数量: 102
  • 博客积分: 3140
  • 博客等级: 中校
  • 技术积分: 680
  • 用 户 组: 普通用户
  • 注册时间: 2008-12-28 11:44
个人简介

开阔未来

文章分类

全部博文(102)

文章存档

2015年(10)

2014年(1)

2013年(1)

2012年(4)

2011年(8)

2010年(24)

2009年(51)

2008年(3)

我的朋友

分类: LINUX

2010-08-28 14:26:30

一:前言
文件的操作主要包括了文件的打开关闭和读写.在这节中主要分析了linux内核中的文件操作的实现.还是同前两节一样,涉及到块设备与页面缓存的部份先放一边.后续有会有专题分析与此相关的内容.
二:文件的打开
在用户空间的,打开文件常用的api是open().它的系统调用入口是sys_open():
. asmlinkage long sys_open(const char __user * filename, int flags, int mode)
{
     char * tmp;
     int fd, error;
 
#if BITS_PER_LONG != 32
     flags |= O_LARGEFILE;
#endif
     //从用户空间copy值
     tmp = getname(filename);
     fd = PTR_ERR(tmp);
     if (!IS_ERR(tmp)) {
         //分配一个没有被使用的fd
         fd = get_unused_fd();
         if (fd >= 0) {
              //取得与文件相关的file结构
              struct file *f = filp_open(tmp, flags, mode);
              error = PTR_ERR(f);
              if (IS_ERR(f))
                   goto out_error;
              //将file 添加file_struct中的fd数组的相应项
              fd_install(fd, f);
         }
out:
         //释放分配的内存空间
         putname(tmp);
     }
     return fd;
 
out_error:
     put_unused_fd(fd);
     fd = error;
     goto out;
}
与进程相关的文件系统结构在<>已经分析过了.如有不太清楚的可以自行参阅这篇文章.
首先在进程中取得一个没有被使用的文件描述符.这是在get_unused_fd()中完成的.它的代码如下:
int get_unused_fd(void)
{
     struct files_struct * files = current->files;
     int fd, error;
 
     error = -EMFILE;
     spin_lock(&files->file_lock);
 
repeat:
     //取得files->open_fds->fds_bits中下一个没有使用的位
     fd = find_next_zero_bit(files->open_fds->fds_bits,
                   files->max_fdset,
                   files->next_fd);
 
     /*
      * N.B. For clone tasks sharing a files structure, this test
      * will limit the total number of files that can be opened.
      */
      //超过了文件描述符的最大值限制
     if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
         goto out;
 
     /* Do we need to expand the fdset array? */
     //max_fdset: 位图位的总数
 
     //如果超过了位图的总数
     if (fd >= files->max_fdset) {
         error = expand_fdset(files, fd);
         if (!error) {
              error = -EMFILE;
              goto repeat;
         }
         goto out;
     }
    
     /*
      * Check whether we need to expand the fd array.
      */
      //如果超过了所描述对象的总数
     if (fd >= files->max_fds) {
         //扩充文件描述对象数组
         error = expand_fd_array(files, fd);
         if (!error) {
              error = -EMFILE;
              goto repeat;
         }
         goto out;
     }
 
     //在open_fds置该位
     FD_SET(fd, files->open_fds);
     //在close_on_exec中清除该位.表示如果调用exec()执行一个新程序的时候不需要关闭这个
     //文件描述符
     FD_CLR(fd, files->close_on_exec);
     files->next_fd = fd + 1;
#if 1
     /* Sanity check */
     //如果在fd中的相应项不为NULL 将其置NULL
     if (files->fd[fd] != NULL) {
         printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
         files->fd[fd] = NULL;
     }
#endif
     error = fd;
 
out:
     spin_unlock(&files->file_lock);
     return error;
}
如果文件描述符位图空间不够或者文件对象描述符数组空间不够.就会调用expand_fdset()和expand_fd_array()进行空间的扩展.代码分别如下所示:
int expand_fdset(struct files_struct *files, int nr)
{
     fd_set *new_openset = NULL, *new_execset = NULL;
     int error, nfds = 0;
 
     error = -EMFILE;
     //超过了总限制
     if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
         goto out;
 
     //现在文件描述符的最大值
     nfds = files->max_fdset;
     spin_unlock(&files->file_lock);
 
     /* Expand to the max in easy steps */
     //如果现在的文件描述符数目少于8个page大小,则扩展到8个page
     //否则将其扩大两倍.其值不能超过规定的最大值
     do {
         if (nfds < (PAGE_SIZE * 8))
              nfds = PAGE_SIZE * 8;
         else {
              nfds = nfds * 2;
              if (nfds > NR_OPEN)
                   nfds = NR_OPEN;
         }
     } while (nfds <= nr);
 
     //分新配大小分配存储空间
     error = -ENOMEM;
     new_openset = alloc_fdset(nfds);
     new_execset = alloc_fdset(nfds);
     spin_lock(&files->file_lock);
     if (!new_openset || !new_execset)
         goto out;
 
     error = 0;
    
     /* Copy the existing tables and install the new pointers */
     //将旧值copy到新分配的空间内.并将剩余空间置为0
     //新新空间挂载到进程的file中.并释放旧空间
     if (nfds > files->max_fdset) {
         int i = files->max_fdset / (sizeof(unsigned long) * 8);
         int count = (nfds - files->max_fdset) / 8;
        
         /*
          * Don't copy the entire array if the current fdset is
          * not yet initialised. 
          */
          //copy和剩余段置零的过程
         if (i) {
              memcpy (new_openset, files->open_fds, files->max_fdset/8);
              memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
              memset (&new_openset->fds_bits[i], 0, count);
              memset (&new_execset->fds_bits[i], 0, count);
         }
 
         //交换新旧空晨
         nfds = xchg(&files->max_fdset, nfds);
         new_openset = xchg(&files->open_fds, new_openset);
         new_execset = xchg(&files->close_on_exec, new_execset);
         spin_unlock(&files->file_lock);
         //将旧空间释放掉
         free_fdset (new_openset, nfds);
         free_fdset (new_execset, nfds);
         spin_lock(&files->file_lock);
         return 0;
     }
     /* Somebody expanded the array while we slept ... */
 
out:
     spin_unlock(&files->file_lock);
     if (new_openset)
         free_fdset(new_openset, nfds);
     if (new_execset)
         free_fdset(new_execset, nfds);
     spin_lock(&files->file_lock);
     return error;
}
expand_fd_array()的代码如下:
int expand_fd_array(struct files_struct *files, int nr)
{
     struct file **new_fds;
     int error, nfds;
 
    
     error = -EMFILE;
     if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
         goto out;
 
     //取得现在的文件描述对象数
     nfds = files->max_fds;
     spin_unlock(&files->file_lock);
 
     /*
      * Expand to the max in easy steps, and keep expanding it until
      * we have enough for the requested fd array size.
      */
 
     //设置新的描述对象数值
     do {
#if NR_OPEN_DEFAULT < 256
         if (nfds < 256)
              nfds = 256;
         else
#endif
         if (nfds < (PAGE_SIZE / sizeof(struct file *)))
              nfds = PAGE_SIZE / sizeof(struct file *);
         else {
              nfds = nfds * 2;
              if (nfds > NR_OPEN)
                   nfds = NR_OPEN;
         }
     } while (nfds <= nr);
 
     error = -ENOMEM;
     new_fds = alloc_fd_array(nfds);
     spin_lock(&files->file_lock);
     if (!new_fds)
         goto out;
 
     /* Copy the existing array and install the new pointer */
     //copy和设置剩余空间的过程,并将新旧空间交换.操作完成过后,释放旧空间
     if (nfds > files->max_fds) {
         struct file **old_fds;
         int i;
        
         old_fds = xchg(&files->fd, new_fds);
         i = xchg(&files->max_fds, nfds);
 
         /* Don't copy/clear the array if we are creating a new
            fd array for fork() */
         if (i) {
              memcpy(new_fds, old_fds, i * sizeof(struct file *));
              /* clear the remainder of the array */
              memset(&new_fds[i], 0,
                     (nfds-i) * sizeof(struct file *));
 
              spin_unlock(&files->file_lock);
              free_fd_array(old_fds, i);
              spin_lock(&files->file_lock);
         }
     } else {
         /* Somebody expanded the array while we slept ... */
         spin_unlock(&files->file_lock);
         free_fd_array(new_fds, nfds);
         spin_lock(&files->file_lock);
     }
     error = 0;
out:
     return error;
}
取得空闲文件描述符之后,将取得与文件对应的file.将file与文件对象符关联起来的操作是在fd_install()关联起来的.它的代码如下:
void fastcall fd_install(unsigned int fd, struct file * file)
{
     struct files_struct *files = current->files;
     spin_lock(&files->file_lock);
     //如果相应项已经有对象了.则是一个BUG
     if (unlikely(files->fd[fd] != NULL))
         BUG();
     //将file添加至对象描述符数组
     files->fd[fd] = file;
     spin_unlock(&files->file_lock);
}
Sys_open()核心的操作是取得与文件相对应的file.这个操作是在filp_open()中完成的.它的代码如下:
/*
 * Note that while the flag value (low two bits) for sys_open means:
 *   00 - read-only
 *   01 - write-only
 *   10 - read-write
 *   11 - special
 * it is changed into
 *   00 - no permissions needed
 *   01 - read-permission
 *   10 - write-permission
 *   11 - read-write
 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
 * used by symlinks.
 */
struct file *filp_open(const char * filename, int flags, int mode)
{
     int namei_flags, error;
     struct nameidata nd;
 
     //因为在sys_open对flag的定义如filp_open的定义不相同。因此要把两者的flag进行转换
     namei_flags = flags;
     //转换低两位
     if ((namei_flags+1) & O_ACCMODE)
         namei_flags++;
     //O_TRUNC:表示需要截尾,因此如果O_TRUNC被置是需要写权限的
     if (namei_flags & O_TRUNC)
         namei_flags |= 2;
 
     //取得文件结点对应的nameidata.如果节点不存在,则新建之
     error = open_namei(filename, namei_flags, mode, &nd);
     if (!error)
         //将文件节点对应的nameidata转换为file
         return dentry_open(nd.dentry, nd.mnt, flags);
 
     return ERR_PTR(error);
}
这段代码要注意作者附加给的注释.在sys_open与filp_open()中标志位定义有些不相同.所示有必须对标志进行相应的转换.
转进去看一下open_namei()的操作.代码如下:
{
     int acc_mode, error = 0;
     struct dentry *dentry;
     struct dentry *dir;
     int count = 0;
 
     acc_mode = ACC_MODE(flag);
 
     /* Allow the LSM permission hook to distinguish append
        access from general write access. */
        //附加模式
     if (flag & O_APPEND)
         acc_mode |= MAY_APPEND;
 
     /* Fill in the open() intent data */
     nd->intent.open.flags = flag;
     nd->intent.open.create_mode = mode;
 
     /*
      * The simplest case - just a plain lookup.
      */
      //O_CREAT:如果文件不存在.则新建之
 
     //如果没有定义O_CREAT标志.只要查找文件系统中结点是否存在就可以了
     if (!(flag & O_CREAT)) {
         error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
         if (error)
              return error;
         goto ok;
     }
 
     /*
      * Create - we need to know the parent.
      */
      //如果定义了O_CREAT标志.则先查找父结点
     error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
     if (error)
         return error;
 
     /*
      * We have the parent and last component. First of all, check
      * that we are not asked to creat(2) an obvious directory - that
      * will not do.
      */
     error = -EISDIR;
     //判断查找是否成功
     if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
         goto exit;
 
     dir = nd->dentry;
     nd->flags &= ~LOOKUP_PARENT;
     down(&dir->d_inode->i_sem);
 
     //到父目录中查找是否有该结点.如果没有该结点就会创建相应的dentry但dentry->d_inode为空
     dentry = __lookup_hash(&nd->last, nd->dentry, nd);
 
do_last:
     error = PTR_ERR(dentry);
     //查找错误,出错返回
     if (IS_ERR(dentry)) {
         up(&dir->d_inode->i_sem);
         goto exit;
     }
 
     /* Negative dentry, just create the file */
    
     //dentry->d_inode为空.说明这个结点是新建的
     if (!dentry->d_inode) {
         if (!IS_POSIXACL(dir->d_inode))
              mode &= ~current->fs->umask;
         error = vfs_create(dir->d_inode, dentry, mode, nd);
         up(&dir->d_inode->i_sem);
         dput(nd->dentry);
         nd->dentry = dentry;
         if (error)
              goto exit;
         /* Don't check for write permission, don't truncate */
         acc_mode = 0;
         flag &= ~O_TRUNC;
         goto ok;
     }
 
     /*
      * It already exists.
      */
      //结点原本就存在的情况
     up(&dir->d_inode->i_sem);
 
     error = -EEXIST;
     if (flag & O_EXCL)
         goto exit_dput;
 
     //如果是挂载目录.则跳转到挂载文件系统的根目录
     if (d_mountpoint(dentry)) {
         error = -ELOOP;
         if (flag & O_NOFOLLOW)
              goto exit_dput;
         while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
     }
     error = -ENOENT;
     //对异常情况的排除
     if (!dentry->d_inode)
         goto exit_dput;
     //如果结点是一个符号链接
     if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
         goto do_link;
 
     dput(nd->dentry);
     nd->dentry = dentry;
     error = -EISDIR;
 
     //如果结点是一个目录,出错退出
     if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
         goto exit;
ok:
     //对打开文件进行的各项统一处理
     error = may_open(nd, acc_mode, flag);
     if (error)
         goto exit;
     return 0;
 
exit_dput:
     dput(dentry);
exit:
     path_release(nd);
     return error;
 
do_link:
     error = -ELOOP;
     if (flag & O_NOFOLLOW)
         goto exit_dput;
     /*
      * This is subtle. Instead of calling do_follow_link() we do the
      * thing by hands. The reason is that this way we have zero link_count
      * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
      * After that we have the parent and last component, i.e.
      * we are in the same situation as after the first path_walk().
      * Well, almost - if the last component is normal we get its copy
      * stored in nd->last.name and we will have to putname() it when we
      * are done. Procfs-like symlinks just set LAST_BIND.
      */
     nd->flags |= LOOKUP_PARENT;
     error = security_inode_follow_link(dentry, nd);
     if (error)
         goto exit_dput;
     touch_atime(nd->mnt, dentry);
     nd_set_link(nd, NULL);
     error = dentry->d_inode->i_op->follow_link(dentry, nd);
     if (!error) {
         char *s = nd_get_link(nd);
         if (s)
              error = __vfs_follow_link(nd, s);
         if (dentry->d_inode->i_op->put_link)
              dentry->d_inode->i_op->put_link(dentry, nd);
     }
     dput(dentry);
     if (error)
         return error;
     nd->flags &= ~LOOKUP_PARENT;
     if (nd->last_type == LAST_BIND) {
         dentry = nd->dentry;
         goto ok;
     }
     error = -EISDIR;
     if (nd->last_type != LAST_NORM)
         goto exit;
     if (nd->last.name[nd->last.len]) {
         putname(nd->last.name);
         goto exit;
     }
     error = -ELOOP;
     if (count++==32) {
         putname(nd->last.name);
         goto exit;
     }
     dir = nd->dentry;
     down(&dir->d_inode->i_sem);
     dentry = __lookup_hash(&nd->last, nd->dentry, nd);
     putname(nd->last.name);
     goto do_last;
}
在这里忽略了结点为符号链接的情况,这种情况下就是找到符号链接的路径,然后重新进行一次相同的操作而已经.我们把注意力主要放在一般的文件操上.
在这里,对于已存在文件和要新建的文件有着不同的处理,只要是新创建文件会调用vfs_create()处理.其代码如下:
int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
         struct nameidata *nd)
{
     //创建文件之前的检查.(在sys_mkdir()的时候已经分析过个函数)
     int error = may_create(dir, dentry, nd);
 
     if (error)
         return error;
 
     //如果文件系统不允许creat
     if (!dir->i_op || !dir->i_op->create)
         return -EACCES;    /* shouldn't it be ENOSYS? */
     mode &= S_IALLUGO;
     mode |= S_IFREG;
     error = security_inode_create(dir, dentry, mode);
     if (error)
         return error;
     DQUOT_INIT(dir);
     //调用父结点对应的create操作
     error = dir->i_op->create(dir, dentry, mode, nd);
     if (!error) {
         //如果创建成功,则发出通知
         inode_dir_notify(dir, DN_CREATE);
         security_inode_post_create(dir, dentry, mode);
     }
     return error;
}
要这里,我们可以看到,它会调用父目录结点的creat操作来创建结点.等分析完sys _open()操作之后,再转入具体的文件系统进行分析.
不管是新建的结点还是已经建立的结点,都会进入到may_open()中进行处理.其代码如下所示:
int may_open(struct nameidata *nd, int acc_mode, int flag)
{
     struct dentry *dentry = nd->dentry;
     struct inode *inode = dentry->d_inode;
     int error;
 
     //结点所对应的inode不存在
     if (!inode)
         return -ENOENT;
 
     //是一个链接或者是目录的情况
     if (S_ISLNK(inode->i_mode))
         return -ELOOP;
    
     if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
         return -EISDIR;
 
     //检查是否有相应的权限
     error = permission(inode, acc_mode, nd);
     if (error)
         return error;
 
     /*
      * FIFO's, sockets and device files are special: they don't
      * actually live on the filesystem itself, and as such you
      * can write to them even if the filesystem is read-only.
      */
      //如果是FIFO和SOCK文件,则将O_TRUNC标志去掉
     if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
         flag &= ~O_TRUNC;
     } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
         //如果是一个块设备文件或者是一个字符设备文件,却挂载选项带有MNT_NODEV
         //标志.出错退出
         if (nd->mnt->mnt_flags & MNT_NODEV)
              return -EACCES;
 
         flag &= ~O_TRUNC;
     } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
         //操作是可写出,但结点又是只读的.出错退出
         return -EROFS;
     /*
      * An append-only file must be opened in append mode for writing.
      */
      //如果节点是append模式的,则必须要以append模式打开
     if (IS_APPEND(inode)) {
         if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
              return -EPERM;
         if (flag & O_TRUNC)
              return -EPERM;
     }
 
     /* O_NOATIME can only be set by the owner or superuser */
     //如果操作带有O_NOATIME标志,则只允许文件的所有者或者是root用户操作
     if (flag & O_NOATIME)
         if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
              return -EPERM;
 
     /*
      * Ensure there are no outstanding leases on the file.
      */
     error = break_lease(inode, flag);
     if (error)
         return error;
 
     if (flag & O_TRUNC) {
         error = get_write_access(inode);
         if (error)
              return error;
 
         /*
          * Refuse to truncate files with mandatory locks held on them.
          */
          //检查文件系统是否使用了强制锁且已经加上了强制锁
         error = locks_verify_locked(inode);
         if (!error) {
              DQUOT_INIT(inode);
              //对文件进行截尾
              error = do_truncate(dentry, 0);
         }
         put_write_access(inode);
         if (error)
              return error;
     } else
         if (flag & FMODE_WRITE)
              DQUOT_INIT(inode);
 
     return 0;
}
在这里,涉及到了两种锁.文件租借锁与强制锁.简单介绍如下:
文件租借锁:
当一个进程试图打开被租借锁保护的文件时,它会阻塞.同时,拥有这个租借锁的所有进程都会收到一个相应的信号.拥有进程会更新文件的内容,使文件保持一致.如果拥有租借锁的进程没有在规定时间内完成.则内核将租借锁删除,因租借锁阻塞的时候进程继续执行.
强制锁:
系统默认是劝告锁,当挂载文件系统时指定MS_MANDLOCK安装标志时,强制锁被打开.文件的组设置位为1且组执行位为0的进程都是强制锁的候选者.
break_lease()用来判断文件是否有租借锁.被对租借锁的相应处理.代码如下:
static inline int break_lease(struct inode *inode, unsigned int mode)
{
     //当前节点有锁
     if (inode->i_flock)
         return __break_lease(inode, mode);
     //没有锁直接返回
     return 0;
}
int __break_lease(struct inode *inode, unsigned int mode)
{
     int error = 0, future;
     struct file_lock *new_fl, *flock;
     struct file_lock *fl;
     int alloc_err;
     unsigned long break_time;
     int i_have_this_lease = 0;
 
     //申请一个租借锁
     alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK,
              &new_fl);
 
     lock_kernel();
 
     //对文件中现有租借锁的延时进行处理
     time_out_leases(inode);
 
     flock = inode->i_flock;
 
     //如果没有锁,或者锁不为租借锁,退出
     //租借锁都会存放在inode->i_flock的头部
     if ((flock == NULL) || !IS_LEASE(flock))
         goto out;
 
     //如果进程本身是租借锁的拥有者,i_have_this_lease为1
     for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
         if (fl->fl_owner == current->files)
              i_have_this_lease = 1;
 
     if (mode & FMODE_WRITE) {
         /* If we want write access, we have to revoke any lease. */
         //如果是带有写属性的open`需要将租借锁全部都移除
         future = F_UNLCK | F_INPROGRESS;
     } else if (flock->fl_type & F_INPROGRESS) {
         /* If the lease is already being broken, we just leave it */
         //操作正在进行
         future = flock->fl_type;
     } else if (flock->fl_type & F_WRLCK) {
         /* Downgrade the exclusive lease to a read-only lease. */
         future = F_RDLCK | F_INPROGRESS;
     } else {
         /* the existing lease was read-only, so we can read too. */
         goto out;
     }
 
     //如果分配内存失败且本进程不允许强制锁且不允许阻塞.退出
     if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) {
         error = alloc_err;
         goto out;
     }
 
     //设置break_time
     break_time = 0;
     if (lease_break_time > 0) {
         break_time = jiffies + lease_break_time * HZ;
         if (break_time == 0)
              break_time++; /* so that 0 means no break time */
     }
 
     //因为进程要获得此租用锁了,将其类型更将,指定延时到达时间为初始化时间
     //且向其它拥有租用锁的进程发送信号
     for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
         if (fl->fl_type != future) {
              fl->fl_type = future;
              fl->fl_break_time = break_time;
              kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
         }
     }
 
     //如果进程本身就是锁的拥有者,或者不允许阻塞,退出
 
     if (i_have_this_lease || (mode & O_NONBLOCK)) {
         error = -EWOULDBLOCK;
         goto out;
     }
 
restart:
     //计算剩余的延时到达时间
     break_time = flock->fl_break_time;
     if (break_time != 0) {
         break_time -= jiffies;
         if (break_time == 0)
              break_time++;
     }
 
     //将新分配的租用锁插入到链表。直接break time到达,或者是被其它拥有者唤醒
     error = locks_block_on_timeout(flock, new_fl, break_time);
     if (error >= 0) {
         //如果正常返回,更新结点中的租借锁状态
         if (error == 0)
              time_out_leases(inode);
         /* Wait for the next lease that has not been broken yet */
         //如果还有租用锁没有被处理,继续前述的处理过程
         for (flock = inode->i_flock; flock && IS_LEASE(flock);
                   flock = flock->fl_next) {
              if (flock->fl_type & F_INPROGRESS)
                   goto restart;
         }
         error = 0;
     }
 
out:
     unlock_kernel();
     if (!alloc_err)
         locks_free_lock(new_fl);
     return error;
}
对强制锁的检查是在locks_verify_locked()中完成的.代码如下:
static inline int locks_verify_locked(struct inode *inode)
{
     //强制锁的初始条件
     //即:1:挂载文件系统的类型为MS_MANDLOCK 且文件的组设置位为1且组执行位为0
     if (MANDATORY_LOCK(inode))
         //判断文件中是否有强制锁
         return locks_mandatory_locked(inode);
     return 0;
}
int locks_mandatory_locked(struct inode *inode)
{
     fl_owner_t owner = current->files;
     struct file_lock *fl;
 
     /*
      * Search the lock list for this inode for any POSIX locks.
      */
     lock_kernel();
     for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
         //判断是否为强制锁
         if (!IS_POSIX(fl))
              continue;
         //不是进程的强制锁.说明被其它的进程置了强制锁了
         if (fl->fl_owner != owner)
              break;
     }
     unlock_kernel();
     return fl ? -EAGAIN : 0;
}
另外,还有一个很重要的过程,即对文件截短的操作.因为这个过程涉及到i_mapping的东东.以后再专题分析.
回到filp_open().找到文件对应的结点之后,要将inode结构与file结构关联起来.这里在dentry_open()中处理的.它的代码如下:
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
     struct file * f;
     struct inode *inode;
     int error;
 
     error = -ENFILE;
     f = get_empty_filp();
     if (!f)
         goto cleanup_dentry;
     f->f_flags = flags;
     f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
     inode = dentry->d_inode;
     if (f->f_mode & FMODE_WRITE) {
         error = get_write_access(inode);
         if (error)
              goto cleanup_file;
     }
 
     f->f_mapping = inode->i_mapping;
     //file所对应的dentry与vfsmount
     f->f_dentry = dentry;
     f->f_vfsmnt = mnt;
     f->f_pos = 0;
     //将文件的操作指向inode->i_fop
     f->f_op = fops_get(inode->i_fop);
     file_move(f, &inode->i_sb->s_files);
 
     //如果file结构中指定了文件的open函数,调用它
     if (f->f_op && f->f_op->open) {
         error = f->f_op->open(inode,f);
         if (error)
              goto cleanup_all;
     }
     f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
     file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 
     /* NB: we're sure to have correct a_ops only after f_op->open */
     if (f->f_flags & O_DIRECT) {
         if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
              fput(f);
              f = ERR_PTR(-EINVAL);
         }
     }
 
     return f;
 
cleanup_all:
     fops_put(f->f_op);
     if (f->f_mode & FMODE_WRITE)
         put_write_access(inode);
     file_kill(f);
     f->f_dentry = NULL;
     f->f_vfsmnt = NULL;
cleanup_file:
     put_filp(f);
cleanup_dentry:
     dput(dentry);
     mntput(mnt);
     return ERR_PTR(error);
}
从上面的代码中可以看出.对file的各种操作,都会对应到inode的f_op中.
在上面的代码曾分析到,对不存在的文件会调用vfs_create().继续会调用目录结点的create()方法.下面分析一下rootfs和ext2中的create实现.
 
2.1: rootfs中的文件创建
经过以前的分析,可得知rootfs中inode对应的操作如下:
static struct inode_operations ramfs_dir_inode_operations = {
     .create       = ramfs_create,
     .lookup       = simple_lookup,
     .link         = simple_link,
     .unlink       = simple_unlink,
     .symlink = ramfs_symlink,
     .mkdir        = ramfs_mkdir,
     .rmdir        = simple_rmdir,
     .mknod        = ramfs_mknod,
     .rename       = simple_rename,
}
对应的create为ramfs_create.代码如下:
static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
{
     //S_IFREG模式
     return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
}
从上面可以看到.上面的过程与rootfs中目录的建立大体相同.只是文件的模式改为了S_IFREG.即一般的文件.
 
2.2:ext2中的文件创建
经过前面的分析我们可以得知,ext2中目录对应的操作为:
struct inode_operations ext2_dir_inode_operations = {
     .create       = ext2_create,
     .lookup       = ext2_lookup,
     .link         = ext2_link,
     .unlink       = ext2_unlink,
     .symlink = ext2_symlink,
     .mkdir        = ext2_mkdir,
     .rmdir        = ext2_rmdir,
     .mknod        = ext2_mknod,
     .rename       = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
     .setxattr = generic_setxattr,
     .getxattr = generic_getxattr,
     .listxattr    = ext2_listxattr,
     .removexattr  = generic_removexattr,
#endif
     .setattr = ext2_setattr,
     .permission   = ext2_permission,
}
其create函数的入口为ext2_create().代码如下:
static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
{
     //分配一个新的结点
     struct inode * inode = ext2_new_inode (dir, mode);
     int err = PTR_ERR(inode);
     //指定i_op和i_fop.页面缓存的操作方式
     if (!IS_ERR(inode)) {
         inode->i_op = &ext2_file_inode_operations;
         inode->i_fop = &ext2_file_operations;
         if (test_opt(inode->i_sb, NOBH))
              inode->i_mapping->a_ops = &ext2_nobh_aops;
         else
              inode->i_mapping->a_ops = &ext2_aops;
         //将inode置脏
         mark_inode_dirty(inode);
         err = ext2_add_nondir(dentry, inode);
     }
     return err;
}
ext2_new_inode()的代码在前面的分析中已经讨论过.这里不再赘述.
 
三:文件的关闭
关闭文件在用户空间的api接口为close().它在内核中的系统调用入口是sys_close().代码如下:
asmlinkage long sys_close(unsigned int fd)
{
     struct file * filp;
     struct files_struct *files = current->files;
 
     spin_lock(&files->file_lock);
     //参数有效性判断
     if (fd >= files->max_fds)
         goto out_unlock;
     //取得文件描述符对应的file
     filp = files->fd[fd];
     if (!filp)
         goto out_unlock;
     //将文件描述符对应的file置空
     files->fd[fd] = NULL;
     //清除close_on_exec的标志位,表示进程结束时不应该关闭对应位的文件描述对象
     FD_CLR(fd, files->close_on_exec);
     //清除文件描述的分配位图
     __put_unused_fd(files, fd);
     spin_unlock(&files->file_lock);
     return filp_close(filp, files);
 
out_unlock:
     spin_unlock(&files->file_lock);
     return -EBADF;
}
转到filp_close():
int filp_close(struct file *filp, fl_owner_t id)
{
     int retval;
 
     /* Report and clear outstanding errors */
     retval = filp->f_error;
     if (retval)
         filp->f_error = 0;
 
     //file引用计数为零.已经无效了
     if (!file_count(filp)) {
         printk(KERN_ERR "VFS: Close: file count is 0\n");
         return retval;
     }
 
     //如果文件对象有flush()操作,调用之
     if (filp->f_op && filp->f_op->flush) {
         int err = filp->f_op->flush(filp);
         if (!retval)
              retval = err;
     }
 
     //发出flush通告
     dnotify_flush(filp, id);
     //文件要关闭了,将进程拥有的文件的强制锁清除掉
     locks_remove_posix(filp, id);
     //释放file对象
     fput(filp);
     return retval;
}
下面以具体的文件为例,讨论file的flush过程.
 
3.1 rootfs的flush()
Rootfs格式的一般文件的i_fop对应为:
struct file_operations ramfs_file_operations = {
     .read         = generic_file_read,
     .write        = generic_file_write,
     .mmap         = generic_file_mmap,
     .fsync        = simple_sync_file,
     .sendfile = generic_file_sendfile,
     .llseek       = generic_file_llseek,
}
可以看到里面并没有flush()操作,对文件的关闭无需进行特殊的操作.
 
3.2:ext2的flush()
Ext2类型的文件系统对应的普通文件的i_fop为:
struct file_operations ext2_file_operations = {
     .llseek       = generic_file_llseek,
     .read         = generic_file_read,
     .write        = generic_file_write,
     .aio_read = generic_file_aio_read,
     .aio_write    = generic_file_aio_write,
     .ioctl        = ext2_ioctl,
     .mmap         = generic_file_mmap,
     .open         = generic_file_open,
     .release = ext2_release_file,
     .fsync        = ext2_sync_file,
     .readv        = generic_file_readv,
     .writev       = generic_file_writev,
     .sendfile = generic_file_sendfile,
}
可以看到,里面也没有定义flush操作.
 
四:小结
在本节里,主要概述了文件的打开与关闭操作.其中文件的关闭操作对大部份文件系统来说,只要处理好进程本身的文件描述符映射就可以了.无需进程其它特殊的操作.
阅读(1651) | 评论(1) | 转发(0) |
给主人留下些什么吧!~~

chinaunix网友2010-08-31 10:39:48

Download More than 1000 free IT eBooks: http://free-ebooks.appspot.com