open()系统调用用来打开一个文件,本文就VFS层,对open系统调用的过程进行一个简单的分析。
-
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
-
{
-
long ret;
-
-
if (force_o_largefile())
-
flags |= O_LARGEFILE;
-
-
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
-
-
asmlinkage_protect(3, ret, filename, flags, mode);
-
return ret;
-
}
force_o_largefile()用来判断系统是否为32位的,如果不是32位,也就是说为64位,则将O_LARGEFILE置位,主体工作由do_sys_open()来做
-
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
-
{
-
char *tmp = getname(filename);
-
int fd = PTR_ERR(tmp);
-
-
if (!IS_ERR(tmp)) {
-
fd = get_unused_fd_flags(flags);
-
if (fd >= 0) {
-
-
struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
-
if (IS_ERR(f)) {
-
put_unused_fd(fd);
-
fd = PTR_ERR(f);
-
} else {
-
fsnotify_open(f->f_path.dentry);
-
fd_install(fd, f);
-
}
-
}
-
putname(tmp);
-
}
-
return fd;
-
}
open操作是特定于某个进程进行的,因此涉及到了VFS中特定于进程的结构,这里简单的介绍下
-
"font-size:12px;">struct files_struct {
-
-
-
-
atomic_t count;
-
struct fdtable *fdt;
-
struct fdtable fdtab;
-
-
-
-
spinlock_t file_lock ____cacheline_aligned_in_smp;
-
int next_fd;
-
struct embedded_fd_set close_on_exec_init;
-
struct embedded_fd_set open_fds_init;
-
struct file * fd_array[NR_OPEN_DEFAULT];
-
};
count表示共享该结构的进程数
fdtable是该进程的文件描述符数组
fdt指向fdtable
next_fd表示最大文件描述符号+1
embedded_fd_set是一个位图结构,用来标记文件描述符,close_on_exec_init用来标记那些执行exec时要关闭的文件的文件描述符,open_fds_init用来标记已经分配出去了的文件描述符
fd_array用来存储进程打开的文件的struct file指针
do_sys_open()的一个重要任务就是调用get_unused_fd_flags()为即将打开的文件分配一个文件描述符
-
"font-size:12px;">#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
-
"font-size:12px;">int alloc_fd(unsigned start, unsigned flags)
-
{
-
struct files_struct *files = current->files;
-
unsigned int fd;
-
int error;
-
struct fdtable *fdt;
-
-
spin_lock(&files->file_lock);
-
repeat:
-
fdt = files_fdtable(files);
-
fd = start;
-
if (fd < files->next_fd)
-
fd = files->next_fd;
-
-
if (fd < fdt->max_fds)
-
fd = find_next_zero_bit(fdt->open_fds->fds_bits,
-
fdt->max_fds, fd);
-
-
error = expand_files(files, fd);
-
if (error < 0)
-
goto out;
-
-
-
-
-
-
if (error)
-
goto repeat;
-
-
if (start <= files->next_fd)
-
files->next_fd = fd + 1;
-
-
FD_SET(fd, fdt->open_fds);
-
if (flags & O_CLOEXEC)
-
FD_SET(fd, fdt->close_on_exec);
-
else
-
FD_CLR(fd, fdt->close_on_exec);
-
error = fd;
-
#if 1
-
-
if (rcu_dereference(fdt->fd[fd]) != NULL) {
-
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
-
rcu_assign_pointer(fdt->fd[fd], NULL);
-
}
-
#endif
-
-
out:
-
spin_unlock(&files->file_lock);
-
return error;
-
}
-
-
int expand_files(struct files_struct *files, int nr)
-
{
-
struct fdtable *fdt;
-
-
fdt = files_fdtable(files);
-
-
-
-
-
-
-
if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
-
return -EMFILE;
-
-
-
if (nr < fdt->max_fds)
-
return 0;
-
-
-
if (nr >= sysctl_nr_open)
-
return -EMFILE;
-
-
-
return expand_fdtable(files, nr);
-
}
实际的扩充操作:
-
"font-size:12px;">static int expand_fdtable(struct files_struct *files, int nr)
-
__releases(files->file_lock)
-
__acquires(files->file_lock)
-
{
-
struct fdtable *new_fdt, *cur_fdt;
-
-
spin_unlock(&files->file_lock);
-
new_fdt = alloc_fdtable(nr);
-
spin_lock(&files->file_lock);
-
if (!new_fdt)
-
return -ENOMEM;
-
-
-
-
-
-
if (unlikely(new_fdt->max_fds <= nr)) {
-
free_fdarr(new_fdt);
-
free_fdset(new_fdt);
-
kfree(new_fdt);
-
return -EMFILE;
-
}
-
-
-
-
-
cur_fdt = files_fdtable(files);
-
if (nr >= cur_fdt->max_fds) {
-
-
copy_fdtable(new_fdt, cur_fdt);
-
rcu_assign_pointer(files->fdt, new_fdt);
-
if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
-
free_fdtable(cur_fdt);
-
} else {
-
-
free_fdarr(new_fdt);
-
free_fdset(new_fdt);
-
kfree(new_fdt);
-
}
-
return 1;
-
}
到此为止,分配新的fd的工作完成,如果分配fd成功,接下来do_sys_open()就要通过do_filp_open()函数查找文件并执行相应的打开操作
do_filp_open的工作针对两种情况进行:
1.flag中未标识O_CREAT,也就是只进行单纯的搜索打开,如果没有搜索到目标文件的话,不会进行创建,这种情况处理起来比较简单,主要工作就是通过路径解析来查找文件,查找到了的话再根据文件系统定义的open方式进行打开
2.flag中标识了O_CREAT,也就是说如果没找到目标文件要进行创建。这种情况要先查找目标文件的父目录(通过将LOOKUP_PARENT标识置位然后进行路径解析来实现),因为假如没查找到目标文件的话,创建工作需要在父目录下完成;然后再查找最后一个文件分量,也就是目标文件,并进行打开操作,其中涉及到的许多部分在前面几篇文章中也都已经分析过了
-
"font-size:12px;">struct file *do_filp_open(int dfd, const char *pathname,
-
int open_flag, int mode, int acc_mode)
-
{
-
struct file *filp;
-
struct nameidata nd;
-
int error;
-
struct path path;
-
struct dentry *dir;
-
int count = 0;
-
int will_write;
-
int flag = open_to_namei_flags(open_flag);
-
-
if (!acc_mode)
-
acc_mode = MAY_OPEN | ACC_MODE(flag);
-
-
-
if (flag & O_TRUNC)
-
acc_mode |= MAY_WRITE;
-
-
-
-
if (flag & O_APPEND)
-
acc_mode |= MAY_APPEND;
-
-
-
-
-
-
-
if (!(flag & O_CREAT)) {
-
error = path_lookup_open(dfd, pathname, lookup_flags(flag),
-
&nd, flag);
-
if (error)
-
return ERR_PTR(error);
-
goto ok;
-
}
-
-
-
-
-
-
error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
-
if (error)
-
return ERR_PTR(error);
-
-
error = path_walk(pathname, &nd);
-
if (error) {
-
if (nd.root.mnt)
-
path_put(&nd.root);
-
return ERR_PTR(error);
-
}
-
if (unlikely(!audit_dummy_context()))
-
audit_inode(pathname, nd.path.dentry);
-
-
-
-
-
-
-
error = -EISDIR;
-
-
-
if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
-
goto exit_parent;
-
-
error = -ENFILE;
-
filp = get_empty_filp();
-
if (filp == NULL)
-
goto exit_parent;
-
-
nd.intent.open.file = filp;
-
nd.intent.open.flags = flag;
-
nd.intent.open.create_mode = mode;
-
dir = nd.path.dentry;
-
nd.flags &= ~LOOKUP_PARENT;
-
nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
-
if (flag & O_EXCL)
-
nd.flags |= LOOKUP_EXCL;
-
mutex_lock(&dir->d_inode->i_mutex);
-
-
-
path.dentry = lookup_hash(&nd);
-
path.mnt = nd.path.mnt;
-
-
do_last:
-
error = PTR_ERR(path.dentry);
-
if (IS_ERR(path.dentry)) {
-
mutex_unlock(&dir->d_inode->i_mutex);
-
goto exit;
-
}
-
-
if (IS_ERR(nd.intent.open.file)) {
-
error = PTR_ERR(nd.intent.open.file);
-
goto exit_mutex_unlock;
-
}
-
-
-
if (!path.dentry->d_inode) {
-
-
-
-
-
-
-
-
error = mnt_want_write(nd.path.mnt);
-
if (error)
-
goto exit_mutex_unlock;
-
-
error = __open_namei_create(&nd, &path, flag, mode);
-
if (error) {
-
mnt_drop_write(nd.path.mnt);
-
goto exit;
-
}
-
-
filp = nameidata_to_filp(&nd, open_flag);
-
if (IS_ERR(filp))
-
ima_counts_put(&nd.path,
-
acc_mode & (MAY_READ | MAY_WRITE |
-
MAY_EXEC));
-
mnt_drop_write(nd.path.mnt);
-
if (nd.root.mnt)
-
path_put(&nd.root);
-
return filp;
-
}
-
-
-
-
-
mutex_unlock(&dir->d_inode->i_mutex);
-
audit_inode(pathname, path.dentry);
-
-
error = -EEXIST;
-
if (flag & O_EXCL)
-
goto exit_dput;
-
-
-
if (__follow_mount(&path)) {
-
error = -ELOOP;
-
if (flag & O_NOFOLLOW)
-
goto exit_dput;
-
}
-
-
error = -ENOENT;
-
if (!path.dentry->d_inode)
-
goto exit_dput;
-
if (path.dentry->d_inode->i_op->follow_link)
-
goto do_link;
-
-
-
path_to_nameidata(&path, &nd);
-
error = -EISDIR;
-
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
-
goto exit;
-
ok:
-
-
-
-
-
-
-
-
-
-
-
will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
-
if (will_write) {
-
error = mnt_want_write(nd.path.mnt);
-
if (error)
-
goto exit;
-
}
-
-
error = may_open(&nd.path, acc_mode, flag);
-
if (error) {
-
if (will_write)
-
mnt_drop_write(nd.path.mnt);
-
goto exit;
-
}
-
-
filp = nameidata_to_filp(&nd, open_flag);
-
if (IS_ERR(filp))
-
ima_counts_put(&nd.path,
-
acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
-
-
-
-
-
-
if (will_write)
-
mnt_drop_write(nd.path.mnt);
-
if (nd.root.mnt)
-
path_put(&nd.root);
-
return filp;
-
-
exit_mutex_unlock:
-
mutex_unlock(&dir->d_inode->i_mutex);
-
exit_dput:
-
path_put_conditional(&path, &nd);
-
exit:
-
if (!IS_ERR(nd.intent.open.file))
-
release_open_intent(&nd);
-
exit_parent:
-
if (nd.root.mnt)
-
path_put(&nd.root);
-
path_put(&nd.path);
-
return ERR_PTR(error);
-
-
do_link:
-
error = -ELOOP;
-
if (flag & O_NOFOLLOW)
-
goto exit_dput;
-
-
-
-
-
-
-
-
-
-
-
nd.flags |= LOOKUP_PARENT;
-
error = security_inode_follow_link(path.dentry, &nd);
-
if (error)
-
goto exit_dput;
-
error = __do_follow_link(&path, &nd);
-
if (error) {
-
-
-
-
-
release_open_intent(&nd);
-
if (nd.root.mnt)
-
path_put(&nd.root);
-
return ERR_PTR(error);
-
}
-
nd.flags &= ~LOOKUP_PARENT;
-
if (nd.last_type == LAST_BIND)
-
goto ok;
-
error = -EISDIR;
-
if (nd.last_type != LAST_NORM)
-
goto exit;
-
if (nd.last.name[nd.last.len]) {
-
__putname(nd.last.name);
-
goto exit;
-
}
-
error = -ELOOP;
-
if (count++==32) {
-
__putname(nd.last.name);
-
goto exit;
-
}
-
dir = nd.path.dentry;
-
mutex_lock(&dir->d_inode->i_mutex);
-
path.dentry = lookup_hash(&nd);
-
path.mnt = nd.path.mnt;
-
__putname(nd.last.name);
-
goto do_last;
-
}
-