linux文件系统之文件的打开与关闭-p2pt-ChinaUnix博客

p2ptp2pt.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

p2pt

博客访问： 519400
博文数量： 80
博客积分： 1475
博客等级：上尉
技术积分： 1047
用户组：普通用户
注册时间： 2010-04-01 22:58

文章分类

全部博文（80）

音视频（1）
AIX（9）
Linux（36）

linux进程管理（4）

linux设备驱动之p（4）

Linux文件系统（11）
实用评测（1）
杂谈（1）
集群技术（1）
数据库（2）
多路径技术（6）
LVM（2）
VMware数据中心（10）
存储技术（11）
未分配的博文（0）

文章存档

2012年（3）

2010年（77）

我的朋友

相关博文

linux文件系统之文件的打开与关闭

分类： LINUX

2010-05-09 15:08:17

一：前言

文件的操作主要包括了文件的打开关闭和读写.在这节中主要分析了linux内核中的文件操作的实现.还是同前两节一样，涉及到块设备与页面缓存的部份先放一边.后续有会有专题分析与此相关的内容.

二：文件的打开

在用户空间的，打开文件常用的api是open().它的系统调用入口是sys_open()：

. asmlinkage long sys_open(const char __user * filename, int flags, int mode)

{

char * tmp;

int fd, error;

#if BITS_PER_LONG != 32

flags |= O_LARGEFILE;

#endif

//从用户空间copy值

tmp = getname(filename);

fd = PTR_ERR(tmp);

if (!IS_ERR(tmp)) {

//分配一个没有被使用的fd

fd = get_unused_fd();

if (fd >= 0) {

//取得与文件相关的file结构

struct file *f = filp_open(tmp, flags, mode);

error = PTR_ERR(f);

if (IS_ERR(f))

goto out_error;

//将file 添加file_struct中的fd数组的相应项

fd_install(fd, f);

}

out:

//释放分配的内存空间

putname(tmp);

}

return fd;

out_error:

put_unused_fd(fd);

fd = error;

goto out;

}

与进程相关的文件系统结构在<>已经分析过了.如有不太清楚的可以自行参阅这篇文章.

首先在进程中取得一个没有被使用的文件描述符.这是在get_unused_fd（）中完成的.它的代码如下：

int get_unused_fd(void)

{

struct files_struct * files = current->files;

int fd, error;

error = -EMFILE;

spin_lock(&files->file_lock);

repeat:

//取得files->open_fds->fds_bits中下一个没有使用的位

fd = find_next_zero_bit(files->open_fds->fds_bits,

files->max_fdset,

files->next_fd);

* N.B. For clone tasks sharing a files structure, this test

* will limit the total number of files that can be opened.

//超过了文件描述符的最大值限制

if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)

goto out;

/* Do we need to expand the fdset array? */

//max_fdset: 位图位的总数

//如果超过了位图的总数

if (fd >= files->max_fdset) {

error = expand_fdset(files, fd);

if (!error) {

error = -EMFILE;

goto repeat;

}

goto out;

}

* Check whether we need to expand the fd array.

//如果超过了所描述对象的总数

if (fd >= files->max_fds) {

//扩充文件描述对象数组

error = expand_fd_array(files, fd);

if (!error) {

error = -EMFILE;

goto repeat;

}

goto out;

}

//在open_fds置该位

FD_SET(fd, files->open_fds);

//在close_on_exec中清除该位.表示如果调用exec()执行一个新程序的时候不需要关闭这个

//文件描述符

FD_CLR(fd, files->close_on_exec);

files->next_fd = fd + 1;

#if 1

/* Sanity check */

//如果在fd中的相应项不为NULL 将其置NULL

if (files->fd[fd] != NULL) {

printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);

files->fd[fd] = NULL;

}

#endif

error = fd;

out:

spin_unlock(&files->file_lock);

return error;

}

如果文件描述符位图空间不够或者文件对象描述符数组空间不够.就会调用expand_fdset()和expand_fd_array()进行空间的扩展.代码分别如下所示：

int expand_fdset(struct files_struct *files, int nr)

{

fd_set *new_openset = NULL, *new_execset = NULL;

int error, nfds = 0;

error = -EMFILE;

//超过了总限制

if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)

goto out;

//现在文件描述符的最大值

nfds = files->max_fdset;

spin_unlock(&files->file_lock);

/* Expand to the max in easy steps */

//如果现在的文件描述符数目少于8个page大小，则扩展到8个page

//否则将其扩大两倍.其值不能超过规定的最大值

do {

if (nfds < (PAGE_SIZE * 8))

nfds = PAGE_SIZE * 8;

else {

nfds = nfds * 2;

if (nfds > NR_OPEN)

nfds = NR_OPEN;

}

} while (nfds <= nr);

//分新配大小分配存储空间

error = -ENOMEM;

new_openset = alloc_fdset(nfds);

new_execset = alloc_fdset(nfds);

spin_lock(&files->file_lock);

if (!new_openset || !new_execset)

goto out;

error = 0;

/* Copy the existing tables and install the new pointers */

//将旧值copy到新分配的空间内.并将剩余空间置为0

//新新空间挂载到进程的file中.并释放旧空间

if (nfds > files->max_fdset) {

int i = files->max_fdset / (sizeof(unsigned long) * 8);

int count = (nfds - files->max_fdset) / 8;

* Don't copy the entire array if the current fdset is

* not yet initialised.

//copy和剩余段置零的过程

if (i) {

memcpy (new_openset, files->open_fds, files->max_fdset/8);

memcpy (new_execset, files->close_on_exec, files->max_fdset/8);

memset (&new_openset->fds_bits[i], 0, count);

memset (&new_execset->fds_bits[i], 0, count);

}

//交换新旧空晨

nfds = xchg(&files->max_fdset, nfds);

new_openset = xchg(&files->open_fds, new_openset);

new_execset = xchg(&files->close_on_exec, new_execset);

spin_unlock(&files->file_lock);

//将旧空间释放掉

free_fdset (new_openset, nfds);

free_fdset (new_execset, nfds);

spin_lock(&files->file_lock);

return 0;

}

/* Somebody expanded the array while we slept ... */

out:

spin_unlock(&files->file_lock);

if (new_openset)

free_fdset(new_openset, nfds);

if (new_execset)

free_fdset(new_execset, nfds);

spin_lock(&files->file_lock);

return error;

}

expand_fd_array（）的代码如下：

int expand_fd_array(struct files_struct *files, int nr)

{

struct file **new_fds;

int error, nfds;

error = -EMFILE;

if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)

goto out;

//取得现在的文件描述对象数

nfds = files->max_fds;

spin_unlock(&files->file_lock);

* Expand to the max in easy steps, and keep expanding it until

* we have enough for the requested fd array size.

//设置新的描述对象数值

do {

#if NR_OPEN_DEFAULT < 256

if (nfds < 256)

nfds = 256;

else

#endif

if (nfds < (PAGE_SIZE / sizeof(struct file *)))

nfds = PAGE_SIZE / sizeof(struct file *);

else {

nfds = nfds * 2;

if (nfds > NR_OPEN)

nfds = NR_OPEN;

}

} while (nfds <= nr);

error = -ENOMEM;

new_fds = alloc_fd_array(nfds);

spin_lock(&files->file_lock);

if (!new_fds)

goto out;

/* Copy the existing array and install the new pointer */

//copy和设置剩余空间的过程，并将新旧空间交换.操作完成过后，释放旧空间

if (nfds > files->max_fds) {

struct file **old_fds;

int i;

old_fds = xchg(&files->fd, new_fds);

i = xchg(&files->max_fds, nfds);

/* Don't copy/clear the array if we are creating a new

fd array for fork() */

if (i) {

memcpy(new_fds, old_fds, i * sizeof(struct file *));

/* clear the remainder of the array */

memset(&new_fds[i], 0,

(nfds-i) * sizeof(struct file *));

spin_unlock(&files->file_lock);

free_fd_array(old_fds, i);

spin_lock(&files->file_lock);

}

} else {

/* Somebody expanded the array while we slept ... */

spin_unlock(&files->file_lock);

free_fd_array(new_fds, nfds);

spin_lock(&files->file_lock);

}

error = 0;

out:

return error;

}

取得空闲文件描述符之后，将取得与文件对应的file.将file与文件对象符关联起来的操作是在fd_install()关联起来的.它的代码如下：

void fastcall fd_install(unsigned int fd, struct file * file)

{

struct files_struct *files = current->files;

spin_lock(&files->file_lock);

//如果相应项已经有对象了.则是一个ＢＵＧ

if (unlikely(files->fd[fd] != NULL))

BUG();

//将file添加至对象描述符数组

files->fd[fd] = file;

spin_unlock(&files->file_lock);

}

Sys_open()核心的操作是取得与文件相对应的file.这个操作是在filp_open()中完成的.它的代码如下：

* Note that while the flag value (low two bits) for sys_open means:

* 00 - read-only

* 01 - write-only

* 10 - read-write

* 11 - special

* it is changed into

* 00 - no permissions needed

* 01 - read-permission

* 10 - write-permission

* 11 - read-write

* for the internal routines (ie open_namei()/follow_link() etc). 00 is

* used by symlinks.

struct file *filp_open(const char * filename, int flags, int mode)

{

int namei_flags, error;

struct nameidata nd;

//因为在sys_open对flag的定义如filp_open的定义不相同。因此要把两者的flag进行转换

namei_flags = flags;

//转换低两位

if ((namei_flags+1) & O_ACCMODE)

namei_flags++;

//O_TRUNC:表示需要截尾，因此如果O_TRUNC被置是需要写权限的

if (namei_flags & O_TRUNC)

namei_flags |= 2;

//取得文件结点对应的nameidata.如果节点不存在,则新建之

error = open_namei(filename, namei_flags, mode, &nd);

if (!error)

//将文件节点对应的nameidata转换为file

return dentry_open(nd.dentry, nd.mnt, flags);

return ERR_PTR(error);

}

这段代码要注意作者附加给的注释.在sys_open与filp_open()中标志位定义有些不相同.所示有必须对标志进行相应的转换.

转进去看一下open_namei()的操作.代码如下：

{

int acc_mode, error = 0;

struct dentry *dentry;

struct dentry *dir;

int count = 0;

acc_mode = ACC_MODE(flag);

/* Allow the LSM permission hook to distinguish append

access from general write access. */

//附加模式

if (flag & O_APPEND)

acc_mode |= MAY_APPEND;

/* Fill in the open() intent data */

nd->intent.open.flags = flag;

nd->intent.open.create_mode = mode;

* The simplest case - just a plain lookup.

//O_CREAT：如果文件不存在.则新建之

//如果没有定义O_CREAT标志.只要查找文件系统中结点是否存在就可以了

if (!(flag & O_CREAT)) {

error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);

if (error)

return error;

goto ok;

}

* Create - we need to know the parent.

//如果定义了O_CREAT标志.则先查找父结点

error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);

if (error)

return error;

* We have the parent and last component. First of all, check

* that we are not asked to creat(2) an obvious directory - that

* will not do.

error = -EISDIR;

//判断查找是否成功

if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])

goto exit;

dir = nd->dentry;

nd->flags &= ~LOOKUP_PARENT;

down(&dir->d_inode->i_sem);

//到父目录中查找是否有该结点.如果没有该结点就会创建相应的dentry但dentry->d_inode为空

dentry = __lookup_hash(&nd->last, nd->dentry, nd);

do_last:

error = PTR_ERR(dentry);

//查找错误，出错返回

if (IS_ERR(dentry)) {

up(&dir->d_inode->i_sem);

goto exit;

}

/* Negative dentry, just create the file */

//dentry->d_inode为空.说明这个结点是新建的

if (!dentry->d_inode) {

if (!IS_POSIXACL(dir->d_inode))

mode &= ~current->fs->umask;

error = vfs_create(dir->d_inode, dentry, mode, nd);

up(&dir->d_inode->i_sem);

dput(nd->dentry);

nd->dentry = dentry;

if (error)

goto exit;

/* Don't check for write permission, don't truncate */

acc_mode = 0;

flag &= ~O_TRUNC;

goto ok;

}

* It already exists.

//结点原本就存在的情况

up(&dir->d_inode->i_sem);

error = -EEXIST;

if (flag & O_EXCL)

goto exit_dput;

//如果是挂载目录.则跳转到挂载文件系统的根目录

if (d_mountpoint(dentry)) {

error = -ELOOP;

if (flag & O_NOFOLLOW)

goto exit_dput;

while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));

}

error = -ENOENT;

//对异常情况的排除

if (!dentry->d_inode)

goto exit_dput;

//如果结点是一个符号链接

if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)

goto do_link;

dput(nd->dentry);

nd->dentry = dentry;

error = -EISDIR;

//如果结点是一个目录，出错退出

if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))

goto exit;

ok:

//对打开文件进行的各项统一处理

error = may_open(nd, acc_mode, flag);

if (error)

goto exit;

return 0;

exit_dput:

dput(dentry);

exit:

path_release(nd);

return error;

do_link:

error = -ELOOP;

if (flag & O_NOFOLLOW)

goto exit_dput;

* This is subtle. Instead of calling do_follow_link() we do the

* thing by hands. The reason is that this way we have zero link_count

* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.

* After that we have the parent and last component, i.e.

* we are in the same situation as after the first path_walk().

* Well, almost - if the last component is normal we get its copy

* stored in nd->last.name and we will have to putname() it when we

* are done. Procfs-like symlinks just set LAST_BIND.

nd->flags |= LOOKUP_PARENT;

error = security_inode_follow_link(dentry, nd);

if (error)

goto exit_dput;

touch_atime(nd->mnt, dentry);

nd_set_link(nd, NULL);

error = dentry->d_inode->i_op->follow_link(dentry, nd);

if (!error) {

char *s = nd_get_link(nd);

if (s)

error = __vfs_follow_link(nd, s);

if (dentry->d_inode->i_op->put_link)

dentry->d_inode->i_op->put_link(dentry, nd);

}

dput(dentry);

if (error)

return error;

nd->flags &= ~LOOKUP_PARENT;

if (nd->last_type == LAST_BIND) {

dentry = nd->dentry;

goto ok;

}

error = -EISDIR;

if (nd->last_type != LAST_NORM)

goto exit;

if (nd->last.name[nd->last.len]) {

putname(nd->last.name);

goto exit;

}

error = -ELOOP;

if (count++==32) {

putname(nd->last.name);

goto exit;

}

dir = nd->dentry;

down(&dir->d_inode->i_sem);

dentry = __lookup_hash(&nd->last, nd->dentry, nd);

putname(nd->last.name);

goto do_last;

}

在这里忽略了结点为符号链接的情况,这种情况下就是找到符号链接的路径，然后重新进行一次相同的操作而已经.我们把注意力主要放在一般的文件操上.

在这里，对于已存在文件和要新建的文件有着不同的处理，只要是新创建文件会调用vfs_create()处理.其代码如下：

int vfs_create(struct inode *dir, struct dentry *dentry, int mode,

struct nameidata *nd)

{

//创建文件之前的检查.(在sys_mkdir()的时候已经分析过个函数)

int error = may_create(dir, dentry, nd);

if (error)

return error;

//如果文件系统不允许creat

if (!dir->i_op || !dir->i_op->create)

return -EACCES; /* shouldn't it be ENOSYS? */

mode &= S_IALLUGO;

mode |= S_IFREG;

error = security_inode_create(dir, dentry, mode);

if (error)

return error;

DQUOT_INIT(dir);

//调用父结点对应的create操作

error = dir->i_op->create(dir, dentry, mode, nd);

if (!error) {

//如果创建成功，则发出通知

inode_dir_notify(dir, DN_CREATE);

security_inode_post_create(dir, dentry, mode);

}

return error;

}

要这里，我们可以看到，它会调用父目录结点的creat操作来创建结点.等分析完sys_open()操作之后，再转入具体的文件系统进行分析.

不管是新建的结点还是已经建立的结点，都会进入到may_open()中进行处理.其代码如下所示:

int may_open(struct nameidata *nd, int acc_mode, int flag)

{

struct dentry *dentry = nd->dentry;

struct inode *inode = dentry->d_inode;

int error;

//结点所对应的inode不存在

if (!inode)

return -ENOENT;

//是一个链接或者是目录的情况

if (S_ISLNK(inode->i_mode))

return -ELOOP;

if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))

return -EISDIR;

//检查是否有相应的权限

error = permission(inode, acc_mode, nd);

if (error)

return error;

* FIFO's, sockets and device files are special: they don't

* actually live on the filesystem itself, and as such you

* can write to them even if the filesystem is read-only.

//如果是FIFO和SOCK文件，则将O_TRUNC标志去掉

if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {

flag &= ~O_TRUNC;

} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {

//如果是一个块设备文件或者是一个字符设备文件，却挂载选项带有MNT_NODEV

//标志.出错退出

if (nd->mnt->mnt_flags & MNT_NODEV)

return -EACCES;

flag &= ~O_TRUNC;

} else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))

//操作是可写出，但结点又是只读的.出错退出

return -EROFS;

* An append-only file must be opened in append mode for writing.

//如果节点是append模式的，则必须要以append模式打开

if (IS_APPEND(inode)) {

if ((flag & FMODE_WRITE) && !(flag & O_APPEND))

return -EPERM;

if (flag & O_TRUNC)

return -EPERM;

}

/* O_NOATIME can only be set by the owner or superuser */

//如果操作带有O_NOATIME标志，则只允许文件的所有者或者是root用户操作

if (flag & O_NOATIME)

if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))

return -EPERM;

* Ensure there are no outstanding leases on the file.

error = break_lease(inode, flag);

if (error)

return error;

if (flag & O_TRUNC) {

error = get_write_access(inode);

if (error)

return error;

* Refuse to truncate files with mandatory locks held on them.

//检查文件系统是否使用了强制锁且已经加上了强制锁

error = locks_verify_locked(inode);

if (!error) {

DQUOT_INIT(inode);

//对文件进行截尾

error = do_truncate(dentry, 0);

}

put_write_access(inode);

if (error)

return error;

} else

if (flag & FMODE_WRITE)

DQUOT_INIT(inode);

return 0;

}

在这里，涉及到了两种锁.文件租借锁与强制锁.简单介绍如下:

文件租借锁:

当一个进程试图打开被租借锁保护的文件时，它会阻塞.同时，拥有这个租借锁的所有进程都会收到一个相应的信号.拥有进程会更新文件的内容，使文件保持一致.如果拥有租借锁的进程没有在规定时间内完成.则内核将租借锁删除，因租借锁阻塞的时候进程继续执行.

强制锁：

系统默认是劝告锁，当挂载文件系统时指定MS_MANDLOCK安装标志时，强制锁被打开.文件的组设置位为1且组执行位为0的进程都是强制锁的候选者.

break_lease（）用来判断文件是否有租借锁.被对租借锁的相应处理.代码如下：

static inline int break_lease(struct inode *inode, unsigned int mode)

{

//当前节点有锁

if (inode->i_flock)

return __break_lease(inode, mode);

//没有锁直接返回

return 0;

}

int __break_lease(struct inode *inode, unsigned int mode)

{

int error = 0, future;

struct file_lock *new_fl, *flock;

struct file_lock *fl;

int alloc_err;

unsigned long break_time;

int i_have_this_lease = 0;

//申请一个租借锁

alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK,

&new_fl);

lock_kernel();

//对文件中现有租借锁的延时进行处理

time_out_leases(inode);

flock = inode->i_flock;

//如果没有锁，或者锁不为租借锁，退出

//租借锁都会存放在inode->i_flock的头部

if ((flock == NULL) || !IS_LEASE(flock))

goto out;

//如果进程本身是租借锁的拥有者，i_have_this_lease为1

for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)

if (fl->fl_owner == current->files)

i_have_this_lease = 1;

if (mode & FMODE_WRITE) {

/* If we want write access, we have to revoke any lease. */

//如果是带有写属性的open`需要将租借锁全部都移除

future = F_UNLCK | F_INPROGRESS;

} else if (flock->fl_type & F_INPROGRESS) {

/* If the lease is already being broken, we just leave it */

//操作正在进行

future = flock->fl_type;

} else if (flock->fl_type & F_WRLCK) {

/* Downgrade the exclusive lease to a read-only lease. */

future = F_RDLCK | F_INPROGRESS;

} else {

/* the existing lease was read-only, so we can read too. */

goto out;

}

//如果分配内存失败且本进程不允许强制锁且不允许阻塞.退出

if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) {

error = alloc_err;

goto out;

}

//设置break_time

break_time = 0;

if (lease_break_time > 0) {

break_time = jiffies + lease_break_time * HZ;

if (break_time == 0)

break_time++; /* so that 0 means no break time */

}

//因为进程要获得此租用锁了，将其类型更将，指定延时到达时间为初始化时间

//且向其它拥有租用锁的进程发送信号

for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {

if (fl->fl_type != future) {

fl->fl_type = future;

fl->fl_break_time = break_time;

kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);

}

//如果进程本身就是锁的拥有者，或者不允许阻塞，退出

if (i_have_this_lease || (mode & O_NONBLOCK)) {

error = -EWOULDBLOCK;

goto out;

}

restart:

//计算剩余的延时到达时间

break_time = flock->fl_break_time;

if (break_time != 0) {

break_time -= jiffies;

if (break_time == 0)

break_time++;

}

//将新分配的租用锁插入到链表。直接break time到达，或者是被其它拥有者唤醒

error = locks_block_on_timeout(flock, new_fl, break_time);

if (error >= 0) {

//如果正常返回，更新结点中的租借锁状态

if (error == 0)

time_out_leases(inode);

/* Wait for the next lease that has not been broken yet */

//如果还有租用锁没有被处理，继续前述的处理过程

for (flock = inode->i_flock; flock && IS_LEASE(flock);

flock = flock->fl_next) {

if (flock->fl_type & F_INPROGRESS)

goto restart;

}

error = 0;

}

out:

unlock_kernel();

if (!alloc_err)

locks_free_lock(new_fl);

return error;

}

对强制锁的检查是在locks_verify_locked()中完成的.代码如下：

static inline int locks_verify_locked(struct inode *inode)

{

//强制锁的初始条件

//即：1：挂载文件系统的类型为MS_MANDLOCK 且文件的组设置位为1且组执行位为0

if (MANDATORY_LOCK(inode))

//判断文件中是否有强制锁

return locks_mandatory_locked(inode);

return 0;

}

int locks_mandatory_locked(struct inode *inode)

{

fl_owner_t owner = current->files;

struct file_lock *fl;

* Search the lock list for this inode for any POSIX locks.

lock_kernel();

for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {

//判断是否为强制锁

if (!IS_POSIX(fl))

continue;

//不是进程的强制锁.说明被其它的进程置了强制锁了

if (fl->fl_owner != owner)

break;

}

unlock_kernel();

return fl ? -EAGAIN : 0;

}

另外，还有一个很重要的过程，即对文件截短的操作.因为这个过程涉及到i_mapping的东东.以后再专题分析.

回到filp_open().找到文件对应的结点之后，要将inode结构与file结构关联起来.这里在dentry_open()中处理的.它的代码如下：

struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)

{

struct file * f;

struct inode *inode;

int error;

error = -ENFILE;

f = get_empty_filp();

if (!f)

goto cleanup_dentry;

f->f_flags = flags;

f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;

inode = dentry->d_inode;

if (f->f_mode & FMODE_WRITE) {

error = get_write_access(inode);

if (error)

goto cleanup_file;

}

f->f_mapping = inode->i_mapping;

//file所对应的dentry与vfsmount

f->f_dentry = dentry;

f->f_vfsmnt = mnt;

f->f_pos = 0;

//将文件的操作指向inode->i_fop

f->f_op = fops_get(inode->i_fop);

file_move(f, &inode->i_sb->s_files);

//如果file结构中指定了文件的open函数，调用它

if (f->f_op && f->f_op->open) {

error = f->f_op->open(inode,f);

if (error)

goto cleanup_all;

}

f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

/* NB: we're sure to have correct a_ops only after f_op->open */

if (f->f_flags & O_DIRECT) {

if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {

fput(f);

f = ERR_PTR(-EINVAL);

}

return f;

cleanup_all:

fops_put(f->f_op);

if (f->f_mode & FMODE_WRITE)

put_write_access(inode);

file_kill(f);

f->f_dentry = NULL;

f->f_vfsmnt = NULL;

cleanup_file:

put_filp(f);

cleanup_dentry:

dput(dentry);

mntput(mnt);

return ERR_PTR(error);

}

从上面的代码中可以看出.对file的各种操作，都会对应到inode的f_op中.

在上面的代码曾分析到，对不存在的文件会调用vfs_create().继续会调用目录结点的create()方法.下面分析一下rootfs和ext2中的create实现.

2.1: rootfs中的文件创建

经过以前的分析，可得知rootfs中inode对应的操作如下：

static struct inode_operations ramfs_dir_inode_operations = {

.create = ramfs_create,

.lookup = simple_lookup,

.link = simple_link,

.unlink = simple_unlink,

.symlink = ramfs_symlink,

.mkdir = ramfs_mkdir,

.rmdir = simple_rmdir,

.mknod = ramfs_mknod,

.rename = simple_rename,

}

对应的create为ramfs_create.代码如下：

static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)

{

//S_IFREG模式

return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);

}

从上面可以看到.上面的过程与rootfs中目录的建立大体相同.只是文件的模式改为了S_IFREG.即一般的文件.

2.2:ext2中的文件创建

经过前面的分析我们可以得知，ext2中目录对应的操作为：

struct inode_operations ext2_dir_inode_operations = {

.create = ext2_create,

.lookup = ext2_lookup,

.link = ext2_link,

.unlink = ext2_unlink,

.symlink = ext2_symlink,

.mkdir = ext2_mkdir,

.rmdir = ext2_rmdir,

.mknod = ext2_mknod,

.rename = ext2_rename,

#ifdef CONFIG_EXT2_FS_XATTR

.setxattr = generic_setxattr,

.getxattr = generic_getxattr,

.listxattr = ext2_listxattr,

.removexattr = generic_removexattr,

#endif

.setattr = ext2_setattr,

.permission = ext2_permission,

}

其create函数的入口为ext2_create（）.代码如下：

static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)

{

//分配一个新的结点

struct inode * inode = ext2_new_inode (dir, mode);

int err = PTR_ERR(inode);

//指定i_op和i_fop.页面缓存的操作方式

if (!IS_ERR(inode)) {

inode->i_op = &ext2_file_inode_operations;

inode->i_fop = &ext2_file_operations;

if (test_opt(inode->i_sb, NOBH))

inode->i_mapping->a_ops = &ext2_nobh_aops;

else

inode->i_mapping->a_ops = &ext2_aops;

//将inode置脏

mark_inode_dirty(inode);

err = ext2_add_nondir(dentry, inode);

}

return err;

}

ext2_new_inode（）的代码在前面的分析中已经讨论过.这里不再赘述.

三：文件的关闭

关闭文件在用户空间的api接口为close().它在内核中的系统调用入口是sys_close().代码如下:

asmlinkage long sys_close(unsigned int fd)

{

struct file * filp;

struct files_struct *files = current->files;

spin_lock(&files->file_lock);

//参数有效性判断

if (fd >= files->max_fds)

goto out_unlock;

//取得文件描述符对应的file

filp = files->fd[fd];

if (!filp)

goto out_unlock;

//将文件描述符对应的file置空

files->fd[fd] = NULL;

//清除close_on_exec的标志位，表示进程结束时不应该关闭对应位的文件描述对象

FD_CLR(fd, files->close_on_exec);

//清除文件描述的分配位图

__put_unused_fd(files, fd);

spin_unlock(&files->file_lock);

return filp_close(filp, files);

out_unlock:

spin_unlock(&files->file_lock);

return -EBADF;

}

转到filp_close()：

int filp_close(struct file *filp, fl_owner_t id)

{

int retval;

/* Report and clear outstanding errors */

retval = filp->f_error;

if (retval)

filp->f_error = 0;

//file引用计数为零.已经无效了

if (!file_count(filp)) {

printk(KERN_ERR "VFS: Close: file count is 0\n");

return retval;

}

//如果文件对象有flush()操作，调用之

if (filp->f_op && filp->f_op->flush) {

int err = filp->f_op->flush(filp);

if (!retval)

retval = err;

}

//发出flush通告

dnotify_flush(filp, id);

//文件要关闭了，将进程拥有的文件的强制锁清除掉

locks_remove_posix(filp, id);

//释放file对象

fput(filp);

return retval;

}

下面以具体的文件为例，讨论file的flush过程.

3.1 rootfs的flush()

Rootfs格式的一般文件的i_fop对应为:

struct file_operations ramfs_file_operations = {

.read = generic_file_read,

.write = generic_file_write,

.mmap = generic_file_mmap,

.fsync = simple_sync_file,

.sendfile = generic_file_sendfile,

.llseek = generic_file_llseek,

}

可以看到里面并没有flush()操作,对文件的关闭无需进行特殊的操作.

3.2:ext2的flush()

Ext2类型的文件系统对应的普通文件的i_fop为：

struct file_operations ext2_file_operations = {

.llseek = generic_file_llseek,

.read = generic_file_read,

.write = generic_file_write,

.aio_read = generic_file_aio_read,

.aio_write = generic_file_aio_write,

.ioctl = ext2_ioctl,

.mmap = generic_file_mmap,

.open = generic_file_open,

.release = ext2_release_file,

.fsync = ext2_sync_file,

.readv = generic_file_readv,

.writev = generic_file_writev,

.sendfile = generic_file_sendfile,

}

可以看到，里面也没有定义flush操作.

四：小结

在本节里，主要概述了文件的打开与关闭操作.其中文件的关闭操作对大部份文件系统来说，只要处理好进程本身的文件描述符映射就可以了.无需进程其它特殊的操作.

阅读(4947) | 评论(0) | 转发(2) |

上一篇：Linux文件系统之sysfs

下一篇：Linux文件系统之文件的读写

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6