一:前言
文件的操作主要包括了文件的打开关闭和读写.在这节中主要分析了linux内核中的文件操作的实现.还是同前两节一样,涉及到块设备与页面缓存的部份先放一边.后续有会有专题分析与此相关的内容.
二:文件的打开
在用户空间的,打开文件常用的api是open().它的系统调用入口是sys_open():
. asmlinkage long sys_open(const char __user * filename, int flags, int mode)
{
char * tmp;
int fd, error;
#if BITS_PER_LONG != 32
flags |= O_LARGEFILE;
#endif
//从用户空间copy值
tmp = getname(filename);
fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
//分配一个没有被使用的fd
fd = get_unused_fd();
if (fd >= 0) {
//取得与文件相关的file结构
struct file *f = filp_open(tmp, flags, mode);
error = PTR_ERR(f);
if (IS_ERR(f))
goto out_error;
//将file 添加file_struct中的fd数组的相应项
fd_install(fd, f);
}
out:
//释放分配的内存空间
putname(tmp);
}
return fd;
out_error:
put_unused_fd(fd);
fd = error;
goto out;
}
与进程相关的文件系统结构在<>已经分析过了.如有不太清楚的可以自行参阅这篇文章.
首先在进程中取得一个没有被使用的文件描述符.这是在get_unused_fd()中完成的.它的代码如下:
int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
error = -EMFILE;
spin_lock(&files->file_lock);
repeat:
//取得files->open_fds->fds_bits中下一个没有使用的位
fd = find_next_zero_bit(files->open_fds->fds_bits,
files->max_fdset,
files->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
//超过了文件描述符的最大值限制
if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
/* Do we need to expand the fdset array? */
//max_fdset: 位图位的总数
//如果超过了位图的总数
if (fd >= files->max_fdset) {
error = expand_fdset(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
/*
* Check whether we need to expand the fd array.
*/
//如果超过了所描述对象的总数
if (fd >= files->max_fds) {
//扩充文件描述对象数组
error = expand_fd_array(files, fd);
if (!error) {
error = -EMFILE;
goto repeat;
}
goto out;
}
//在open_fds置该位
FD_SET(fd, files->open_fds);
//在close_on_exec中清除该位.表示如果调用exec()执行一个新程序的时候不需要关闭这个
//文件描述符
FD_CLR(fd, files->close_on_exec);
files->next_fd = fd + 1;
#if 1
/* Sanity check */
//如果在fd中的相应项不为NULL 将其置NULL
if (files->fd[fd] != NULL) {
printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
files->fd[fd] = NULL;
}
#endif
error = fd;
out:
spin_unlock(&files->file_lock);
return error;
}
如果文件描述符位图空间不够或者文件对象描述符数组空间不够.就会调用expand_fdset()和expand_fd_array()进行空间的扩展.代码分别如下所示:
int expand_fdset(struct files_struct *files, int nr)
{
fd_set *new_openset = NULL, *new_execset = NULL;
int error, nfds = 0;
error = -EMFILE;
//超过了总限制
if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
goto out;
//现在文件描述符的最大值
nfds = files->max_fdset;
spin_unlock(&files->file_lock);
/* Expand to the max in easy steps */
//如果现在的文件描述符数目少于8个page大小,则扩展到8个page
//否则将其扩大两倍.其值不能超过规定的最大值
do {
if (nfds < (PAGE_SIZE * 8))
nfds = PAGE_SIZE * 8;
else {
nfds = nfds * 2;
if (nfds > NR_OPEN)
nfds = NR_OPEN;
}
} while (nfds <= nr);
//分新配大小分配存储空间
error = -ENOMEM;
new_openset = alloc_fdset(nfds);
new_execset = alloc_fdset(nfds);
spin_lock(&files->file_lock);
if (!new_openset || !new_execset)
goto out;
error = 0;
/* Copy the existing tables and install the new pointers */
//将旧值copy到新分配的空间内.并将剩余空间置为0
//新新空间挂载到进程的file中.并释放旧空间
if (nfds > files->max_fdset) {
int i = files->max_fdset / (sizeof(unsigned long) * 8);
int count = (nfds - files->max_fdset) / 8;
/*
* Don't copy the entire array if the current fdset is
* not yet initialised.
*/
//copy和剩余段置零的过程
if (i) {
memcpy (new_openset, files->open_fds, files->max_fdset/8);
memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
memset (&new_openset->fds_bits[i], 0, count);
memset (&new_execset->fds_bits[i], 0, count);
}
//交换新旧空晨
nfds = xchg(&files->max_fdset, nfds);
new_openset = xchg(&files->open_fds, new_openset);
new_execset = xchg(&files->close_on_exec, new_execset);
spin_unlock(&files->file_lock);
//将旧空间释放掉
free_fdset (new_openset, nfds);
free_fdset (new_execset, nfds);
spin_lock(&files->file_lock);
return 0;
}
/* Somebody expanded the array while we slept ... */
out:
spin_unlock(&files->file_lock);
if (new_openset)
free_fdset(new_openset, nfds);
if (new_execset)
free_fdset(new_execset, nfds);
spin_lock(&files->file_lock);
return error;
}
expand_fd_array()的代码如下:
int expand_fd_array(struct files_struct *files, int nr)
{
struct file **new_fds;
int error, nfds;
error = -EMFILE;
if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
goto out;
//取得现在的文件描述对象数
nfds = files->max_fds;
spin_unlock(&files->file_lock);
/*
* Expand to the max in easy steps, and keep expanding it until
* we have enough for the requested fd array size.
*/
//设置新的描述对象数值
do {
#if NR_OPEN_DEFAULT < 256
if (nfds < 256)
nfds = 256;
else
#endif
if (nfds < (PAGE_SIZE / sizeof(struct file *)))
nfds = PAGE_SIZE / sizeof(struct file *);
else {
nfds = nfds * 2;
if (nfds > NR_OPEN)
nfds = NR_OPEN;
}
} while (nfds <= nr);
error = -ENOMEM;
new_fds = alloc_fd_array(nfds);
spin_lock(&files->file_lock);
if (!new_fds)
goto out;
/* Copy the existing array and install the new pointer */
//copy和设置剩余空间的过程,并将新旧空间交换.操作完成过后,释放旧空间
if (nfds > files->max_fds) {
struct file **old_fds;
int i;
old_fds = xchg(&files->fd, new_fds);
i = xchg(&files->max_fds, nfds);
/* Don't copy/clear the array if we are creating a new
fd array for fork() */
if (i) {
memcpy(new_fds, old_fds, i * sizeof(struct file *));
/* clear the remainder of the array */
memset(&new_fds[i], 0,
(nfds-i) * sizeof(struct file *));
spin_unlock(&files->file_lock);
free_fd_array(old_fds, i);
spin_lock(&files->file_lock);
}
} else {
/* Somebody expanded the array while we slept ... */
spin_unlock(&files->file_lock);
free_fd_array(new_fds, nfds);
spin_lock(&files->file_lock);
}
error = 0;
out:
return error;
}
取得空闲文件描述符之后,将取得与文件对应的file.将file与文件对象符关联起来的操作是在fd_install()关联起来的.它的代码如下:
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
//如果相应项已经有对象了.则是一个BUG
if (unlikely(files->fd[fd] != NULL))
BUG();
//将file添加至对象描述符数组
files->fd[fd] = file;
spin_unlock(&files->file_lock);
}
Sys_open()核心的操作是取得与文件相对应的file.这个操作是在filp_open()中完成的.它的代码如下:
/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
* 10 - read-write
* 11 - special
* it is changed into
* 00 - no permissions needed
* 01 - read-permission
* 10 - write-permission
* 11 - read-write
* for the internal routines (ie open_namei()/follow_link() etc). 00 is
* used by symlinks.
*/
struct file *filp_open(const char * filename, int flags, int mode)
{
int namei_flags, error;
struct nameidata nd;
//因为在sys_open对flag的定义如filp_open的定义不相同。因此要把两者的flag进行转换
namei_flags = flags;
//转换低两位
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
//O_TRUNC:表示需要截尾,因此如果O_TRUNC被置是需要写权限的
if (namei_flags & O_TRUNC)
namei_flags |= 2;
//取得文件结点对应的nameidata.如果节点不存在,则新建之
error = open_namei(filename, namei_flags, mode, &nd);
if (!error)
//将文件节点对应的nameidata转换为file
return dentry_open(nd.dentry, nd.mnt, flags);
return ERR_PTR(error);
}
这段代码要注意作者附加给的注释.在sys_open与filp_open()中标志位定义有些不相同.所示有必须对标志进行相应的转换.
转进去看一下open_namei()的操作.代码如下:
{
int acc_mode, error = 0;
struct dentry *dentry;
struct dentry *dir;
int count = 0;
acc_mode = ACC_MODE(flag);
/* Allow the LSM permission hook to distinguish append
access from general write access. */
//附加模式
if (flag & O_APPEND)
acc_mode |= MAY_APPEND;
/* Fill in the open() intent data */
nd->intent.open.flags = flag;
nd->intent.open.create_mode = mode;
/*
* The simplest case - just a plain lookup.
*/
//O_CREAT:如果文件不存在.则新建之
//如果没有定义O_CREAT标志.只要查找文件系统中结点是否存在就可以了
if (!(flag & O_CREAT)) {
error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
if (error)
return error;
goto ok;
}
/*
* Create - we need to know the parent.
*/
//如果定义了O_CREAT标志.则先查找父结点
error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
if (error)
return error;
/*
* We have the parent and last component. First of all, check
* that we are not asked to creat(2) an obvious directory - that
* will not do.
*/
error = -EISDIR;
//判断查找是否成功
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
goto exit;
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
down(&dir->d_inode->i_sem);
//到父目录中查找是否有该结点.如果没有该结点就会创建相应的dentry但dentry->d_inode为空
dentry = __lookup_hash(&nd->last, nd->dentry, nd);
do_last:
error = PTR_ERR(dentry);
//查找错误,出错返回
if (IS_ERR(dentry)) {
up(&dir->d_inode->i_sem);
goto exit;
}
/* Negative dentry, just create the file */
//dentry->d_inode为空.说明这个结点是新建的
if (!dentry->d_inode) {
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
error = vfs_create(dir->d_inode, dentry, mode, nd);
up(&dir->d_inode->i_sem);
dput(nd->dentry);
nd->dentry = dentry;
if (error)
goto exit;
/* Don't check for write permission, don't truncate */
acc_mode = 0;
flag &= ~O_TRUNC;
goto ok;
}
/*
* It already exists.
*/
//结点原本就存在的情况
up(&dir->d_inode->i_sem);
error = -EEXIST;
if (flag & O_EXCL)
goto exit_dput;
//如果是挂载目录.则跳转到挂载文件系统的根目录
if (d_mountpoint(dentry)) {
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
}
error = -ENOENT;
//对异常情况的排除
if (!dentry->d_inode)
goto exit_dput;
//如果结点是一个符号链接
if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
goto do_link;
dput(nd->dentry);
nd->dentry = dentry;
error = -EISDIR;
//如果结点是一个目录,出错退出
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
goto exit;
ok:
//对打开文件进行的各项统一处理
error = may_open(nd, acc_mode, flag);
if (error)
goto exit;
return 0;
exit_dput:
dput(dentry);
exit:
path_release(nd);
return error;
do_link:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
* thing by hands. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
* Well, almost - if the last component is normal we get its copy
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
nd->flags |= LOOKUP_PARENT;
error = security_inode_follow_link(dentry, nd);
if (error)
goto exit_dput;
touch_atime(nd->mnt, dentry);
nd_set_link(nd, NULL);
error = dentry->d_inode->i_op->follow_link(dentry, nd);
if (!error) {
char *s = nd_get_link(nd);
if (s)
error = __vfs_follow_link(nd, s);
if (dentry->d_inode->i_op->put_link)
dentry->d_inode->i_op->put_link(dentry, nd);
}
dput(dentry);
if (error)
return error;
nd->flags &= ~LOOKUP_PARENT;
if (nd->last_type == LAST_BIND) {
dentry = nd->dentry;
goto ok;
}
error = -EISDIR;
if (nd->last_type != LAST_NORM)
goto exit;
if (nd->last.name[nd->last.len]) {
putname(nd->last.name);
goto exit;
}
error = -ELOOP;
if (count++==32) {
putname(nd->last.name);
goto exit;
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
dentry = __lookup_hash(&nd->last, nd->dentry, nd);
putname(nd->last.name);
goto do_last;
}
在这里忽略了结点为符号链接的情况,这种情况下就是找到符号链接的路径,然后重新进行一次相同的操作而已经.我们把注意力主要放在一般的文件操上.
在这里,对于已存在文件和要新建的文件有着不同的处理,只要是新创建文件会调用vfs_create()处理.其代码如下:
int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
//创建文件之前的检查.(在sys_mkdir()的时候已经分析过个函数)
int error = may_create(dir, dentry, nd);
if (error)
return error;
//如果文件系统不允许creat
if (!dir->i_op || !dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
mode &= S_IALLUGO;
mode |= S_IFREG;
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
DQUOT_INIT(dir);
//调用父结点对应的create操作
error = dir->i_op->create(dir, dentry, mode, nd);
if (!error) {
//如果创建成功,则发出通知
inode_dir_notify(dir, DN_CREATE);
security_inode_post_create(dir, dentry, mode);
}
return error;
}
要这里,我们可以看到,它会调用父目录结点的creat操作来创建结点.等分析完sys _open()操作之后,再转入具体的文件系统进行分析.
不管是新建的结点还是已经建立的结点,都会进入到may_open()中进行处理.其代码如下所示:
int may_open(struct nameidata *nd, int acc_mode, int flag)
{
struct dentry *dentry = nd->dentry;
struct inode *inode = dentry->d_inode;
int error;
//结点所对应的inode不存在
if (!inode)
return -ENOENT;
//是一个链接或者是目录的情况
if (S_ISLNK(inode->i_mode))
return -ELOOP;
if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
return -EISDIR;
//检查是否有相应的权限
error = permission(inode, acc_mode, nd);
if (error)
return error;
/*
* FIFO's, sockets and device files are special: they don't
* actually live on the filesystem itself, and as such you
* can write to them even if the filesystem is read-only.
*/
//如果是FIFO和SOCK文件,则将O_TRUNC标志去掉
if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
flag &= ~O_TRUNC;
} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
//如果是一个块设备文件或者是一个字符设备文件,却挂载选项带有MNT_NODEV
//标志.出错退出
if (nd->mnt->mnt_flags & MNT_NODEV)
return -EACCES;
flag &= ~O_TRUNC;
} else if (IS_RDONLY(inode) && (flag & FMODE_WRITE))
//操作是可写出,但结点又是只读的.出错退出
return -EROFS;
/*
* An append-only file must be opened in append mode for writing.
*/
//如果节点是append模式的,则必须要以append模式打开
if (IS_APPEND(inode)) {
if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
return -EPERM;
if (flag & O_TRUNC)
return -EPERM;
}
/* O_NOATIME can only be set by the owner or superuser */
//如果操作带有O_NOATIME标志,则只允许文件的所有者或者是root用户操作
if (flag & O_NOATIME)
if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
return -EPERM;
/*
* Ensure there are no outstanding leases on the file.
*/
error = break_lease(inode, flag);
if (error)
return error;
if (flag & O_TRUNC) {
error = get_write_access(inode);
if (error)
return error;
/*
* Refuse to truncate files with mandatory locks held on them.
*/
//检查文件系统是否使用了强制锁且已经加上了强制锁
error = locks_verify_locked(inode);
if (!error) {
DQUOT_INIT(inode);
//对文件进行截尾
error = do_truncate(dentry, 0);
}
put_write_access(inode);
if (error)
return error;
} else
if (flag & FMODE_WRITE)
DQUOT_INIT(inode);
return 0;
}
在这里,涉及到了两种锁.文件租借锁与强制锁.简单介绍如下:
文件租借锁:
当一个进程试图打开被租借锁保护的文件时,它会阻塞.同时,拥有这个租借锁的所有进程都会收到一个相应的信号.拥有进程会更新文件的内容,使文件保持一致.如果拥有租借锁的进程没有在规定时间内完成.则内核将租借锁删除,因租借锁阻塞的时候进程继续执行.
强制锁:
系统默认是劝告锁,当挂载文件系统时指定MS_MANDLOCK安装标志时,强制锁被打开.文件的组设置位为1且组执行位为0的进程都是强制锁的候选者.
break_lease()用来判断文件是否有租借锁.被对租借锁的相应处理.代码如下:
static inline int break_lease(struct inode *inode, unsigned int mode)
{
//当前节点有锁
if (inode->i_flock)
return __break_lease(inode, mode);
//没有锁直接返回
return 0;
}
int __break_lease(struct inode *inode, unsigned int mode)
{
int error = 0, future;
struct file_lock *new_fl, *flock;
struct file_lock *fl;
int alloc_err;
unsigned long break_time;
int i_have_this_lease = 0;
//申请一个租借锁
alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK,
&new_fl);
lock_kernel();
//对文件中现有租借锁的延时进行处理
time_out_leases(inode);
flock = inode->i_flock;
//如果没有锁,或者锁不为租借锁,退出
//租借锁都会存放在inode->i_flock的头部
if ((flock == NULL) || !IS_LEASE(flock))
goto out;
//如果进程本身是租借锁的拥有者,i_have_this_lease为1
for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next)
if (fl->fl_owner == current->files)
i_have_this_lease = 1;
if (mode & FMODE_WRITE) {
/* If we want write access, we have to revoke any lease. */
//如果是带有写属性的open`需要将租借锁全部都移除
future = F_UNLCK | F_INPROGRESS;
} else if (flock->fl_type & F_INPROGRESS) {
/* If the lease is already being broken, we just leave it */
//操作正在进行
future = flock->fl_type;
} else if (flock->fl_type & F_WRLCK) {
/* Downgrade the exclusive lease to a read-only lease. */
future = F_RDLCK | F_INPROGRESS;
} else {
/* the existing lease was read-only, so we can read too. */
goto out;
}
//如果分配内存失败且本进程不允许强制锁且不允许阻塞.退出
if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) {
error = alloc_err;
goto out;
}
//设置break_time
break_time = 0;
if (lease_break_time > 0) {
break_time = jiffies + lease_break_time * HZ;
if (break_time == 0)
break_time++; /* so that 0 means no break time */
}
//因为进程要获得此租用锁了,将其类型更将,指定延时到达时间为初始化时间
//且向其它拥有租用锁的进程发送信号
for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
if (fl->fl_type != future) {
fl->fl_type = future;
fl->fl_break_time = break_time;
kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
}
}
//如果进程本身就是锁的拥有者,或者不允许阻塞,退出
if (i_have_this_lease || (mode & O_NONBLOCK)) {
error = -EWOULDBLOCK;
goto out;
}
restart:
//计算剩余的延时到达时间
break_time = flock->fl_break_time;
if (break_time != 0) {
break_time -= jiffies;
if (break_time == 0)
break_time++;
}
//将新分配的租用锁插入到链表。直接break time到达,或者是被其它拥有者唤醒
error = locks_block_on_timeout(flock, new_fl, break_time);
if (error >= 0) {
//如果正常返回,更新结点中的租借锁状态
if (error == 0)
time_out_leases(inode);
/* Wait for the next lease that has not been broken yet */
//如果还有租用锁没有被处理,继续前述的处理过程
for (flock = inode->i_flock; flock && IS_LEASE(flock);
flock = flock->fl_next) {
if (flock->fl_type & F_INPROGRESS)
goto restart;
}
error = 0;
}
out:
unlock_kernel();
if (!alloc_err)
locks_free_lock(new_fl);
return error;
}
对强制锁的检查是在locks_verify_locked()中完成的.代码如下:
static inline int locks_verify_locked(struct inode *inode)
{
//强制锁的初始条件
//即:1:挂载文件系统的类型为MS_MANDLOCK 且文件的组设置位为1且组执行位为0
if (MANDATORY_LOCK(inode))
//判断文件中是否有强制锁
return locks_mandatory_locked(inode);
return 0;
}
int locks_mandatory_locked(struct inode *inode)
{
fl_owner_t owner = current->files;
struct file_lock *fl;
/*
* Search the lock list for this inode for any POSIX locks.
*/
lock_kernel();
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
//判断是否为强制锁
if (!IS_POSIX(fl))
continue;
//不是进程的强制锁.说明被其它的进程置了强制锁了
if (fl->fl_owner != owner)
break;
}
unlock_kernel();
return fl ? -EAGAIN : 0;
}
另外,还有一个很重要的过程,即对文件截短的操作.因为这个过程涉及到i_mapping的东东.以后再专题分析.
回到filp_open().找到文件对应的结点之后,要将inode结构与file结构关联起来.这里在dentry_open()中处理的.它的代码如下:
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
struct file * f;
struct inode *inode;
int error;
error = -ENFILE;
f = get_empty_filp();
if (!f)
goto cleanup_dentry;
f->f_flags = flags;
f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
f->f_mapping = inode->i_mapping;
//file所对应的dentry与vfsmount
f->f_dentry = dentry;
f->f_vfsmnt = mnt;
f->f_pos = 0;
//将文件的操作指向inode->i_fop
f->f_op = fops_get(inode->i_fop);
file_move(f, &inode->i_sb->s_files);
//如果file结构中指定了文件的open函数,调用它
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
fput(f);
f = ERR_PTR(-EINVAL);
}
}
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
file_kill(f);
f->f_dentry = NULL;
f->f_vfsmnt = NULL;
cleanup_file:
put_filp(f);
cleanup_dentry:
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
}
从上面的代码中可以看出.对file的各种操作,都会对应到inode的f_op中.
在上面的代码曾分析到,对不存在的文件会调用vfs_create().继续会调用目录结点的create()方法.下面分析一下rootfs和ext2中的create实现.
2.1: rootfs中的文件创建
经过以前的分析,可得知rootfs中inode对应的操作如下:
static struct inode_operations ramfs_dir_inode_operations = {
.create = ramfs_create,
.lookup = simple_lookup,
.link = simple_link,
.unlink = simple_unlink,
.symlink = ramfs_symlink,
.mkdir = ramfs_mkdir,
.rmdir = simple_rmdir,
.mknod = ramfs_mknod,
.rename = simple_rename,
}
对应的create为ramfs_create.代码如下:
static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
{
//S_IFREG模式
return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
}
从上面可以看到.上面的过程与rootfs中目录的建立大体相同.只是文件的模式改为了S_IFREG.即一般的文件.
2.2:ext2中的文件创建
经过前面的分析我们可以得知,ext2中目录对应的操作为:
struct inode_operations ext2_dir_inode_operations = {
.create = ext2_create,
.lookup = ext2_lookup,
.link = ext2_link,
.unlink = ext2_unlink,
.symlink = ext2_symlink,
.mkdir = ext2_mkdir,
.rmdir = ext2_rmdir,
.mknod = ext2_mknod,
.rename = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext2_listxattr,
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
.permission = ext2_permission,
}
其create函数的入口为ext2_create().代码如下:
static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
{
//分配一个新的结点
struct inode * inode = ext2_new_inode (dir, mode);
int err = PTR_ERR(inode);
//指定i_op和i_fop.页面缓存的操作方式
if (!IS_ERR(inode)) {
inode->i_op = &ext2_file_inode_operations;
inode->i_fop = &ext2_file_operations;
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
inode->i_mapping->a_ops = &ext2_aops;
//将inode置脏
mark_inode_dirty(inode);
err = ext2_add_nondir(dentry, inode);
}
return err;
}
ext2_new_inode()的代码在前面的分析中已经讨论过.这里不再赘述.
三:文件的关闭
关闭文件在用户空间的api接口为close().它在内核中的系统调用入口是sys_close().代码如下:
asmlinkage long sys_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
//参数有效性判断
if (fd >= files->max_fds)
goto out_unlock;
//取得文件描述符对应的file
filp = files->fd[fd];
if (!filp)
goto out_unlock;
//将文件描述符对应的file置空
files->fd[fd] = NULL;
//清除close_on_exec的标志位,表示进程结束时不应该关闭对应位的文件描述对象
FD_CLR(fd, files->close_on_exec);
//清除文件描述的分配位图
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(filp, files);
out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
转到filp_close():
int filp_close(struct file *filp, fl_owner_t id)
{
int retval;
/* Report and clear outstanding errors */
retval = filp->f_error;
if (retval)
filp->f_error = 0;
//file引用计数为零.已经无效了
if (!file_count(filp)) {
printk(KERN_ERR "VFS: Close: file count is 0\n");
return retval;
}
//如果文件对象有flush()操作,调用之
if (filp->f_op && filp->f_op->flush) {
int err = filp->f_op->flush(filp);
if (!retval)
retval = err;
}
//发出flush通告
dnotify_flush(filp, id);
//文件要关闭了,将进程拥有的文件的强制锁清除掉
locks_remove_posix(filp, id);
//释放file对象
fput(filp);
return retval;
}
下面以具体的文件为例,讨论file的flush过程.
3.1 rootfs的flush()
Rootfs格式的一般文件的i_fop对应为:
struct file_operations ramfs_file_operations = {
.read = generic_file_read,
.write = generic_file_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
.llseek = generic_file_llseek,
}
可以看到里面并没有flush()操作,对文件的关闭无需进行特殊的操作.
3.2:ext2的flush()
Ext2类型的文件系统对应的普通文件的i_fop为:
struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
.read = generic_file_read,
.write = generic_file_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
.mmap = generic_file_mmap,
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
.readv = generic_file_readv,
.writev = generic_file_writev,
.sendfile = generic_file_sendfile,
}
可以看到,里面也没有定义flush操作.
四:小结
在本节里,主要概述了文件的打开与关闭操作.其中文件的关闭操作对大部份文件系统来说,只要处理好进程本身的文件描述符映射就可以了.无需进程其它特殊的操作.