struct task_struct {
//文件系统相关信息。
struct fs_struct *fs;
struct files_struct *files;
};
//关于一个进程文件相关的全局信息。
struct fs_struct {
atomic_t count;
rwlock_t lock;
int umask;
//分别代表当前进程的根目录、当前目录、替换根目录。
struct dentry * root, * pwd, * altroot;
struct vfsmount * rootmnt, * pwdmnt, * altrootmnt;
};
//记录一个进程打开的文件的信息。
struct files_struct
{
atomic_t count;
spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
struct fdtable *fdt;
struct fdtable fdtab;
fd_set close_on_exec_init;
fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
};
struct fdtable
{
unsigned int max_fds;
//open_fds数组大小。
int max_fdset;
//最近使用的句柄
int next_fd;
struct file ** fd; /* current fd array */
//记录要关闭的文件位图。
fd_set *close_on_exec;
//记录打开文件的位图。
fd_set *open_fds;
struct rcu_head rcu;
struct files_struct *free_files;
struct fdtable *next;
};
//代表一个文件的全部信息。
struct file {
struct list_head f_list;
//所在目录。
struct dentry * f_dentry;
//所在安装信息
struct vfsmount * f_vfsmnt;
struct file_operations * f_op;
atomic_t f_count;
unsigned int f_flags;
mode_t f_mode;
//当前位置。
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
struct file_ra_state f_ra;
unsigned long f_version;
void * f_security;
/* needed for tty driver, and maybe others */
void * private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space * f_mapping;
struct rcu_head f_rcuhead;
};
struct dentry {
//引用计数。
atomic_t d_count;
unsigned int d_flags; /* protected by d_lock */
spinlock_t d_lock; /* per dentry lock */
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
/*
* The next three fields are touched by __d_lookup. Place them here
* so they all fit in a cache line.
*/
struct hlist_node d_hash; /* lookup hash list */
//父目录。
struct dentry *d_parent; /* parent directory */
//文件名。
struct qstr d_name;
//用于未使用目录项链表的指针。
struct list_head d_lru; /* LRU list */
//属于父目录的孩子。
struct list_head d_child; /* child of parent list */
//所有属于自己的孩子。
struct list_head d_subdirs; /* our children */
//用于同一索引结点,相关目录,别名。
struct list_head d_alias; /* inode alias list */
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
void *d_fsdata; /* fs-specific data */
struct rcu_head d_rcu;
struct dcookie_struct *d_cookie; /* cookie, if any */
//对目录而言,用于安装到该目录的文件系统的数量。
int d_mounted;
// 短的目录名子
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};
//I结点,代表物理上的文件。
struct inode {
struct hlist_node i_hash;
//属于当前装态的链表。
struct list_head i_list;
//属于超级块索引结点链表。
struct list_head i_sb_list;
//引用当前inode的目录项的头。
struct list_head i_dentry;
//结点号。
unsigned long i_ino;
atomic_t i_count;
umode_t i_mode;
//硬链接数目。
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
//设备标识符。
dev_t i_rdev;
//文件的字节数。
loff_t i_size;
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
//块的位数。
unsigned int i_blkbits;
//块的字节数。
unsigned long i_blksize;
unsigned long i_version;
//文件的块数。
unsigned long i_blocks;
//文件中最后一个块的字节数。
unsigned short i_bytes;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct semaphore i_sem;
struct rw_semaphore i_alloc_sem;
struct inode_operations* i_op;
struct file_operations* i_fop; /* former ->i_op->default_file_ops */
struct super_block * i_sb;
struct file_lock * i_flock;
struct address_space* i_mapping;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot * i_dquot[MAXQUOTAS];
#endif
/* These three should probably be a union */
//当前I节点对应的一个设备。
struct list_head i_devices;
struct pipe_inode_info* i_pipe;
//块设备结构的指针。
struct block_device * i_bdev;
//字符设备驱动程序的指针。
struct cdev * i_cdev;
//字符设备索引,第几个次设备。
int i_cindex;
__u32 i_generation;
#ifdef CONFIG_DNOTIFY
unsigned long i_dnotify_mask; /* Directory notify events */
struct dnotify_struct* i_dnotify; /* for directory notifications */
#endif
#ifdef CONFIG_INOTIFY
struct list_head inotify_watches; /* watches on this inode */
struct semaphore inotify_sem; /* protects the watches list */
#endif
unsigned long i_state;
unsigned long dirtied_when; /* jiffies of first dirtying */
//文件系统的安装标志。
unsigned int i_flags;
atomic_t i_writecount;
void * i_security;
union {
void * generic_ip;
} u;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
};
struct super_block {
//超级块链表。
struct list_head s_list; /* Keep this first */
//块设备标识符。
dev_t s_dev; /* search index; _not_ kdev_t */
//以字节为单位的块大小。
unsigned long s_blocksize;
unsigned long s_old_blocksize;
unsigned char s_blocksize_bits;
unsigned char s_dirt;
//文件的最大长度。
unsigned long long s_maxbytes; /* Max file size */
//文件系统类型。
struct file_system_type *s_type;
struct super_operations *s_op;
struct dquot_operations *dq_op;
struct quotactl_ops *s_qcop;
struct export_operations *s_export_op;
//安装标志。
unsigned long s_flags;
//文件系统的魔数。
unsigned long s_magic;
//根目录的目录项。
struct dentry *s_root;
struct rw_semaphore s_umount;
struct semaphore s_lock;
int s_count;
int s_syncing;
int s_need_sync_fs;
atomic_t s_active;
void *s_security;
struct xattr_handler **s_xattr;
//所有inode的链表。
struct list_head s_inodes; /* all inodes */
struct list_head s_dirty; /* dirty inodes */
struct list_head s_io; /* parked for writeback */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
//文件对像的链表。
struct list_head s_files;
//块设备驱动程序。
struct block_device *s_bdev;
//文件系统的一个实例。
struct list_head s_instances;
struct quota_info s_dquot; /* Diskquota specific options */
int s_frozen;
wait_queue_head_t s_wait_unfrozen;
char s_id[32]; /* Informational name */
//指向超级块信息。
void * s_fs_info; /* Filesystem private info */
/*
* The next field is for VFS *only*. No filesystems have any business
* even looking at it. You had been warned.
*/
struct semaphore s_vfs_rename_sem; /* Kludge */
/* Granuality of c/m/atime in ns.
Cannot be worse than a second */
u32 s_time_gran;
};
struct vfsmount
{
struct list_head mnt_hash;
//当前vfsmount的父vfsmount.
struct vfsmount *mnt_parent; /* fs we are mounted on */
//父vfsmount的一个目录项,当前vfsmount安装之上。
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
//文件系统的根目录项。
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
//记录所有安装在之上的孩子。
struct list_head mnt_mounts; /* list of children, anchored here */
//是被安装vfsmount的一个孩子。
struct list_head mnt_child; /* and going through their mnt_child */
//引用计数。
atomic_t mnt_count;
int mnt_flags;
int mnt_expiry_mark; /* true if marked for expiry */
//就是安装设备的文件名,如/dev/mtdblock/3
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct namespace *mnt_namespace; /* containing namespace */
};
struct file_system_type {
const char *name;
int fs_flags;
struct super_block *(*get_sb) (struct file_system_type *, int,
const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
//所有超级块的链表头。
struct list_head fs_supers;
};
1、文件系统的布局:
一个文件系统要建立在一个块设备上,第一个块存放的是超级块struct super_block,后面存放了I节点和具体文件内容;每个文件都要有一个I
节点,I节点存放了它所代表文件在块设备上的位置;目录也是文件,目录项struct dentry不是块设备上的信息,只是为了操作文件而加的一
个结构只在内存里,它是一个桥梁.
2、文件系统加载过程:
核心就是把特定的文件系统组织起来放到一个目录项上。过程,从块设备上读出超级块组织成struct super_block,再把它的超级块操作函数集
赋给s_op成员。
再说mount之前要先说path_lookup
int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
read_lock(¤t->fs->lock);
//以下两个分支确定搜索路径起点对应的vfsmount和dentry并存到nd中
if (*name=='/')
{
//chroot()会改变altroot.
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
nd->mnt = mntget(current->fs->altrootmnt);
nd->dentry = dget(current->fs->altroot);
read_unlock(¤t->fs->lock);
if (__emul_lookup_dentry(name,nd))
goto out; /* found in altroot */
read_lock(¤t->fs->lock);
}
nd->mnt = mntget(current->fs->rootmnt);
nd->dentry = dget(current->fs->root);
}
else
{
nd->mnt = mntget(current->fs->pwdmnt);
nd->dentry = dget(current->fs->pwd);
}
read_unlock(¤t->fs->lock);
current->total_link_count = 0;
//开始通用查找。
retval = link_path_walk(name, nd);
out:
if (unlikely(current->audit_context
&& nd && nd->dentry && nd->dentry->d_inode))
audit_inode(name, nd->dentry->d_inode, flags);
return retval;
}
3、文件打开过程:
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
if (force_o_largefile())
flags |= O_LARGEFILE;
return do_sys_open(filename, flags, mode);
}
long do_sys_open(const char __user *filename, int flags, int mode)
{
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
//找一个空的文件描述符项。
fd = get_unused_fd();
if (fd >= 0) {
struct file *f = filp_open(tmp, flags, mode);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f->f_dentry);
//关联。
fd_install(fd, f);
}
}
putname(tmp);
}
return fd;
}
int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
struct fdtable *fdt;
error = -EMFILE;
spin_lock(&files->file_lock);
repeat:
//找可用的FD
fdt = files_fdtable(files);
fd = find_next_zero_bit(fdt->open_fds->fds_bits,
fdt->max_fdset,
fdt->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
* will limit the total number of files that can be opened.
*/
//打开文件限制。
if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
//是否要扩大。
/* Do we need to expand the fd array or fd set? */
error = expand_files(files, fd);
if (error < 0)
goto out;
if (error) {
/*
* If we needed to expand the fs array we
* might have blocked - try again.
*/
error = -EMFILE;
goto repeat;
}
//设置位图。
FD_SET(fd, fdt->open_fds);
FD_CLR(fd, fdt->close_on_exec);
fdt->next_fd = fd + 1;
#if 1
/* Sanity check */
if (fdt->fd[fd] != NULL) {
printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
fdt->fd[fd] = NULL;
}
#endif
error = fd;
out:
spin_unlock(&files->file_lock);
return error;
}
struct file *filp_open(const char * filename, int flags, int mode)
{
int namei_flags, error;
struct nameidata nd;
struct file *f;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
error = -ENFILE;
//分配file结构变量。
f = get_empty_filp();
if (f == NULL)
return ERR_PTR(error);
//打开文件,让nd返回它的目录项和vfsmount.
error = open_namei(filename, namei_flags, mode, &nd);
if (!error)
//用上面获取的目录项、I节点等填充struct file
return __dentry_open(nd.dentry, nd.mnt, flags, f);
put_filp(f);
return ERR_PTR(error);
}
int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
{
int acc_mode, error = 0;
struct path path;
struct dentry *dir;
int count = 0;
acc_mode = ACC_MODE(flag);
/* Allow the LSM permission hook to distinguish append
access from general write access. */
if (flag & O_APPEND)
acc_mode |= MAY_APPEND;
/* Fill in the open() intent data */
nd->intent.open.flags = flag;
nd->intent.open.create_mode = mode;
//如果不是创建,只要查找就行了。
/*
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
if (error)
return error;
goto ok;
}
//查找最后一个分量所在的目录。
/*
* Create - we need to know the parent.
*/
error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
if (error)
return error;
//如果最后一个分量不是文件名而是目录,出错。nd存放了最后一个分量的名子。
/*
* We have the parent and last component. First of all, check
* that we are not asked to creat(2) an obvious directory - that
* will not do.
*/
error = -EISDIR;
if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
goto exit;
dir = nd->dentry;
nd->flags &= ~LOOKUP_PARENT;
down(&dir->d_inode->i_sem);
//在目录里找最后一个分量的目录项,找不到就分配,但这种情况I节点是空的。
path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
path.mnt = nd->mnt;
do_last:
error = PTR_ERR(path.dentry);
if (IS_ERR(path.dentry)) {
up(&dir->d_inode->i_sem);
goto exit;
}
//I节点空
/* Negative dentry, just create the file */
if (!path.dentry->d_inode) {
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask;
//创建文件。
error = vfs_create(dir->d_inode, path.dentry, mode, nd);
up(&dir->d_inode->i_sem);
dput(nd->dentry);
nd->dentry = path.dentry;
if (error)
goto exit;
/* Don't check for write permission, don't truncate */
acc_mode = 0;
flag &= ~O_TRUNC;
goto ok;
}
//I节点不空,文件已经存在。
/*
* It already exists.
*/
up(&dir->d_inode->i_sem);
//如果要求一定不要存在。
error = -EEXIST;
if (flag & O_EXCL)
goto exit_dput;
if (__follow_mount(&path)) {
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
}
error = -ENOENT;
if (!path.dentry->d_inode)
goto exit_dput;
//是符号链接。
if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
goto do_link;
path_to_nameidata(&path, nd);
error = -EISDIR;
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
//检查
error = may_open(nd, acc_mode, flag);
if (error)
goto exit;
return 0;
exit_dput:
dput_path(&path, nd);
exit:
path_release(nd);
return error;
do_link:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
/*
* This is subtle. Instead of calling do_follow_link() we do the
* thing by hands. The reason is that this way we have zero link_count
* and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
* After that we have the parent and last component, i.e.
* we are in the same situation as after the first path_walk().
* Well, almost - if the last component is normal we get its copy
* stored in nd->last.name and we will have to putname() it when we
* are done. Procfs-like symlinks just set LAST_BIND.
*/
nd->flags |= LOOKUP_PARENT;
error = security_inode_follow_link(path.dentry, nd);
if (error)
goto exit_dput;
error = __do_follow_link(&path, nd);
if (error)
return error;
nd->flags &= ~LOOKUP_PARENT;
if (nd->last_type == LAST_BIND)
goto ok;
error = -EISDIR;
if (nd->last_type != LAST_NORM)
goto exit;
if (nd->last.name[nd->last.len]) {
__putname(nd->last.name);
goto exit;
}
error = -ELOOP;
if (count++==32) {
__putname(nd->last.name);
goto exit;
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
path.mnt = nd->mnt;
__putname(nd->last.name);
goto do_last;
}
//在目录dir里创建文件dentry.
int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
struct nameidata *nd)
{
int error = may_create(dir, dentry, nd);
if (error)
return error;
if (!dir->i_op || !dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
mode &= S_IALLUGO;
mode |= S_IFREG;
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
DQUOT_INIT(dir);
//ext2_create
//调用具体文件系统的创建函数。ext2_create
error = dir->i_op->create(dir, dentry, mode, nd);
if (!error)
fsnotify_create(dir, dentry->d_name.name);
return error;
}
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int flags, struct file *f)
{
struct inode *inode;
int error;
f->f_flags = flags;
f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
if (error)
goto cleanup_file;
}
f->f_mapping = inode->i_mapping;
f->f_dentry = dentry;
f->f_vfsmnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
file_move(f, &inode->i_sb->s_files);
//调用具体的OPEN函数。
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops ||
((!f->f_mapping->a_ops->direct_IO) &&
(!f->f_mapping->a_ops->get_xip_page))) {
fput(f);
f = ERR_PTR(-EINVAL);
}
}
return f;
cleanup_all:
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
file_kill(f);
f->f_dentry = NULL;
f->f_vfsmnt = NULL;
cleanup_file:
put_filp(f);
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
}