/* 文件系统学习笔记 */
注意 __s32 与 s32的区别和使用。
按照参考书的说法, 当kernel要把数据传递给user space的时候, 可以用__s32 ,而不要用s32,防止污染用户空间的名字空间。
而且在types.h 中, s32 是在定义了 __KERNEL__ 的情况下,才定义的,因此 s32才可以用在kernel和module中。
详细参考一下 semaphore.h (asm-i386)非常好, 好好研究一下:
再写一个module ,
关于什么时候,用信号量,什么时候, 用 lock ,
对具体操作独占的时候, 用信号量, 且实效性不是特别强,
对数据修改的时候, 很迫切, 用lock
对比下面两个例子:
/* We need to protect against concurrent writers.. */
down(&inode->i_sem); //不可中断, 相应还有可被信号中断的, down_interruptible()
ret = filemap_fdatasync(inode->i_mapping);
err = file->f_op->fsync(file, dentry, 0);
if (err && !ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
up(&inode->i_sem);
/*
* Check whether the specified fd has an open file.
*/
static inline struct file * fcheck(unsigned int fd)
{
struct file * file = NULL;
struct files_struct *files = current->files;
if (fd < files->max_fds) //max_fds 当前系统的文件描述符的最大数目
file = files->fd[fd];
return file;
}
struct file * fget(unsigned int fd)
{
struct file * file;
struct files_struct *files = current->files;
read_lock(&files->file_lock); //用读写锁的实例
file = fcheck(fd);
if (file)
get_file(file); // 文件对象引用计数增1
read_unlock(&files->file_lock);
return file;
}
×××: 当我们close一个文件的时候, 为什么会调用release()呢? 怎么实现的呢?
在这里:
void fput(struct file * file)
{
struct dentry * dentry = file->f_dentry;
struct vfsmount * mnt = file->f_vfsmnt;
struct inode * inode = dentry->d_inode;
if (atomic_dec_and_test(&file->f_count)) { //是不是应该加上 ==1 这项,否则,万一value=2呢澹恳惨猺elease吗澹烤筒欢粤税慑?
locks_remove_flock(file);
if (file->f_iobuf)
free_kiovec(1, &file->f_iobuf);
if (file->f_op && file->f_op->release) //这种顺序判断蛮好的,应该多用这种判断形式
file->f_op->release(inode, file);
fops_put(file->f_op);
if (file->f_mode & FMODE_WRITE)
put_write_access(inode);
file_list_lock();
file->f_dentry = NULL;
file->f_vfsmnt = NULL;
list_del(&file->f_list);
list_add(&file->f_list, &free_list);
files_stat.nr_free_files++;
file_list_unlock();
dput(dentry);
mntput(mnt);
}
}
无论在kernel中还是app中,这样的判断总是最好的:
if (file->f_op && file->f_op->release) //如果写后面,万一f_op是空的,就可能会有未知的问题发生了。
file->f_op->release(inode, file);
注意在文件系统数据结构方面 ,lock的重要性, 比如读写锁:
struct files_struct { //由系统的所有进程共享
//这样才能找到最到的那个已经分配的描述符
//接下来分配+1即可了
atomic_t count;
rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */
//rwlock_t permit multiple reader , but only one writer
int max_fds;
int max_fdset;
int next_fd;
struct file ** fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds;
fd_set close_on_exec_init;
fd_set open_fds_init;
struct file * fd_array[NR_OPEN_DEFAULT];
};
应用的时候, 如此:
read_lock(&files->file_lock);
/* 文件系统类型 */
/* file_system_type 类型的数据在kernel中组成了一个数组*/
struct file_system_type { //文件系统类型
const char *name;
int fs_flags;
struct super_block *(*read_super) (struct super_block *, void *, int); /* 负责填充内存中的超级块*/
struct module *owner;
struct file_system_type * next;
struct list_head fs_supers;
};
/* 定义文件系统操作函数 */ //gcc 扩展语法 , 可以这样给struct赋值
#define DECLARE_FSTYPE(var,type,read,flags) \
struct file_system_type var = { \
name: type, \
read_super: read, \
fs_flags: flags, \
owner: THIS_MODULE, \
}
super.c 含有大量的有用的跟文件系统有关的函数
kernel里面也用到zlib 函数(在cramfs中用到的)
/* mount 根文件系统 */
static void __init mount_root(void)
{
#ifdef CONFIG_ROOT_NFS
if (MAJOR(ROOT_DEV) == NFS_MAJOR
&& MINOR(ROOT_DEV) == NFS_MINOR) {
if (mount_nfs_root()) {
sys_chdir("/root");
ROOT_DEV = current->fs->pwdmnt->mnt_sb->s_dev;
printk("VFS: Mounted root (nfs filesystem).\n");
return;
}
printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
}
#endif
devfs_make_root(root_device_name);
create_dev("/dev/root", ROOT_DEV, root_device_name);
#ifdef CONFIG_BLK_DEV_FD
if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
/* rd_doload is 2 for a dual initrd/ramload setup */
if (rd_doload==2) {
if (rd_load_disk(1)) {
ROOT_DEV = MKDEV(RAMDISK_MAJOR, 1);
create_dev("/dev/root", ROOT_DEV, NULL);
}
} else
change_floppy("root floppy");
}
#endif
mount_block_root("/dev/root", root_mountflags);
}
/* 这里还就得用二重指针 */
static struct file_system_type **find_filesystem(const char *name)
{
struct file_system_type **p;
for (p=&file_systems; *p; p=&(*p)->next)
if (strcmp((*p)->name,name) == 0)
break;
return p;
}
static struct file_system_type *file_systems;
static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
这里用到读写信号量:
在操纵链表的时候:加上信号量的保护:
write_lock(&file_systems_lock);
p = find_filesystem(fs->name); //
if (*p)
res = -EBUSY; //说明已经被注册了
else
*p = fs; //加在单链表的末尾
write_unlock(&file_systems_lock);
/* kernel经常使用的一种技巧:*/
static DECLARE_FSTYPE(ramfs_fs_type, "ramfs", ramfs_read_super, FS_LITTER);
static DECLARE_FSTYPE(rootfs_fs_type, "rootfs", ramfs_read_super, FS_NOMOUNT|FS_LITTER);
static int __init init_ramfs_fs(void)
{
return register_filesystem(&ramfs_fs_type);
}
在这种情况下, 在SourceInsight中, 无法找到ramfs_fs_type的定义, ^_^
/* 虚拟文件系统 安装点 的数据结构体 */
struct vfsmount
{
struct list_head mnt_hash;
struct vfsmount *mnt_parent; /* fs we are mounted on */
struct dentry *mnt_mountpoint; /* dentry of mountpoint */
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
atomic_t mnt_count;
int mnt_flags;
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
};
众所周知,文件系统是Unix系统最基本的资源。最初的Unix系统一般都只支持一种单一类型的文件系统,在这种情况下,文件系统的结构深入到整个系统内核中。而现在的系统大多都在系统内核和文件系统之间提供一个标准的接口,这样不同文件结构之间的数据可以十分方便地交换。Linux也在系统内核和文件系统之间提供了一种叫做VFS(virtual file system)的标准接口。
其它VFS对象
系统上的每一进程都有自己的打开文件,根文件系统,当前工作目录,安装点等等。另外还有几个数据结构体将VFS层和文件的进程紧密联系,它们分别是:file_struct 和fs_struct
file_struct结构体由进程描述符中的files项指向。所有包含进程的信息和它的文件描述符都包含在其中。第二个和进程相关的结构体是fs_struct。该结构由进程描述符的fs项指向。它包含文件系统和进程相关的信息。每种结构体的详细信息不在这里说明了。
---
这样,文件系统的代码就分成了两部分:上层用于处理系统内核的各种表格和数据结构;而下层用来实现文件系统本身的函数,并通过VFS来调用。这些函数主要包括:
* 管理缓冲区(buffer. c)。
* 响应系统调用fcntl() 和ioctl()(fcntl.c and ioctl.c)。
* 将管道和文件输入/输出映射到索引节点和缓冲区(fifo.c, pipe.c)。
* 锁定和不锁定文件和记录(locks.c)。
* 映射名字到索引节点(namei.c, open.c)。
* 实现select( )函数(select . c)。
* 提供各种信息(stat.c)。
* 挂接和卸载文件系统(super.c)。
* 调用可执行代码和转存核心(exec.c)。
* 装入各种二进制格式(bin_fmt*.c)。
VFS接口则由一系列相对高级的操作组成,这些操作由和文件系统无关的代码调用,并且由不同的文件系统执行。其中最主要的结构有inode_operations 和file_operations。file_system_type是系统内核中指向真正文件系统的结构。每挂接一次文件系统,都将使用file_system_type组成的数组。file_system_type组成的数组嵌入到了fs/filesystems.c中。相关文件系统的read_super函数负责填充super_block结构。
内存中关于地址的转换:
#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) //#define __PAGE_OFFSET (0xC0000000) 3G
#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) //later : 128M
#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) //1024
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
对齐的经典计算方法:
#define PAGE_SIZE 1<<12 //2^12 4k对齐。
#define PAGE_MASK ~(1<<12 - 1 ) //低12位置0 ,表示可以被 1<<12 整除。
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) //向后对齐
#define PAGE_ALIGN(addr) ((addr)&PAGE_MASK) //向前对齐
----------------
另外相关的:
/* Alignment */
#define ROMFH_SIZE 16
#define ROMFH_PAD (ROMFH_SIZE-1)
#define ROMFH_MASK (~ROMFH_PAD) //低4bit必须是 0
//保证最低4bit是0,也就是必须能被16整除。
还有一些关于bit 操作的:
#define MINORBITS 8
#define MINORMASK ((1U << MINORBITS) - 1)
typedef unsigned short kdev_t;
#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) //0 ~255 move rightly 8 bits,the return value is 0
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) //255, 11111111
#define HASHDEV(dev) ((unsigned int) (dev))
#define NODEV 0
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
#define B_FREE 0xffff /* yuk */
//从这里我们可以知道如何取低8bit的value ,和高8bit的value , 所以也就知道了如何 转换endian
关于endian的补充:
对于应用程序,自然不用考虑endian的问题(除了socket编程) , 但是在kernel和module中,必须考虑了。 如果在spec中规定的是big-endian , 那么
存储在memory中, 必然
x86 必然颠倒转换
arm就不用了。
当mount一个文件系统的时候, 的标志value!
/*
* These are the fs-independent mount-flags: up to 32 flags are supported
*/
#define MS_RDONLY 1 /* Mount read-only */
#define MS_NOSUID 2 /* Ignore suid and sgid bits */
#define MS_NODEV 4 /* Disallow access to device special files */
#define MS_NOEXEC 8 /* Disallow program execution */
#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
#define MS_BIND 4096
#define MS_MOVE 8192
#define MS_REC 16384
#define MS_VERBOSE 32768
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
更有趣的,以后可借鉴的方式(比较字符串),
/* The basic structures of the romfs filesystem */
#define ROMBSIZE BLOCK_SIZE
#define ROMBSBITS BLOCK_SIZE_BITS
#define ROMBMASK (ROMBSIZE-1)
#define ROMFS_MAGIC 0x7275
#define ROMFS_MAXFN 128
#define __mkw(h,l) (((h)&0x00ff)<< 8|((l)&0x00ff))
#define __mkl(h,l) (((h)&0xffff)<<16|((l)&0xffff))
#define __mk4(a,b,c,d) htonl(__mkl(__mkw(a,b),__mkw(c,d)))
#define ROMSB_WORD0 __mk4('-','r','o','m')
#define ROMSB_WORD1 __mk4('1','f','s','-')
创建一个新的super block
/**
* alloc_super - create new superblock
*
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
static struct super_block *alloc_super(void)
{
static struct super_operations empty_sops = {};
struct super_block *s = kmalloc(sizeof(struct super_block), GFP_USER);
if (s) {
memset(s, 0, sizeof(struct super_block));
INIT_LIST_HEAD(&s->s_dirty);
INIT_LIST_HEAD(&s->s_locked_inodes);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_instances);
init_rwsem(&s->s_umount);
sema_init(&s->s_lock, 1);
down_write(&s->s_umount);
s->s_count = S_BIAS;
atomic_set(&s->s_active, 1);
sema_init(&s->s_vfs_rename_sem,1);
sema_init(&s->s_nfsd_free_path_sem,1);
sema_init(&s->s_dquot.dqio_sem, 1);
sema_init(&s->s_dquot.dqoff_sem, 1);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &empty_sops;
s->dq_op = sb_dquot_ops;
s->s_qcop = sb_quotactl_ops;
}
return s;
}
通用的***********
关于如何初始化一个信号量:
static inline void sema_init (struct semaphore *sem, int val) //其实semaphore也没有什么 , wait_queue_head_t 里面就是 一把lock 和 list_head类型变量
{
/*
* *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
*
* i'd rather use the more flexible initialization above, but sadly
* GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
*/
atomic_set(&sem->count, val);
sem->sleepers = 0; //the number of sleep processes
init_waitqueue_head(&sem->wait);
#if WAITQUEUE_DEBUG
sem->__magic = (int)&sem->__magic;
#endif
}
从这里可以看出 ,create()系统调用相当于 open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
/*
* For backward compatibility? Maybe this should be moved
* into arch/i386 instead?
*/
asmlinkage long sys_creat(const char * pathname, int mode)
{
return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
/* 小技巧 */
struct file *fd;
mm_segment_t fs;
fs = get_fs();
set_fs(KERNEL_DS);
if ((fd = filp_open( "/proc/miscio/speaker_enable", O_RDWR,0))){
fd->f_op->write(fd,"0",1,&fd->f_pos);
filp_close(fd,NULL);
}
set_fs(fs);
重要的文件 include/linux/fs.h
关于文件读写控制的。
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
要掌握sys_read() 与 sys_write()的方法和流程 。
关于信号量操作的
#ifndef _I386_SEMAPHORE_H
#define _I386_SEMAPHORE_H
#include
#ifdef __KERNEL__
/*
* SMP- and interrupt-safe semaphores..
*
* (C) Copyright 1996 Linus Torvalds
*
* Modified 1996-12-23 by Dave Grothe to fix bugs in
* the original code and to make semaphore waits
* interruptible so that processes waiting on
* semaphores can be killed.
* Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
* functions in asm/sempahore-helper.h while fixing a
* potential and subtle race discovered by Ulrich Schmid
* in down_interruptible(). Since I started to play here I
* also implemented the `trylock' semaphore operation.
* 1999-07-02 Artur Skawina
* Optimized "0(ecx)" -> "(ecx)" (the assembler does not
* do this). Changed calling sequences from push/jmp to
* traditional call/ret.
* Modified 2001-01-01 Andreas Franck
* Some hacks to ensure compatibility with recent
* GCC snapshots, to avoid stack corruption when compiling
* with -fomit-frame-pointer. It's not sure if this will
* be fixed in GCC, as our previous implementation was a
* bit dubious.
*
* If you would like to see an analysis of this implementation, please
* ftp to gcom.com and download the file
* /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz.
*
*/
#include
#include
#include
#include
struct semaphore {
atomic_t count;//等待队列中进程的个数
int sleepers; //count的辅助计数
wait_queue_head_t wait; //wait_queue_head_t 就是包含两个指针而已。seamaphore也想相互指向。
#if WAITQUEUE_DEBUG
long __magic;
#endif
};
#if WAITQUEUE_DEBUG
# define __SEM_DEBUG_INIT(name) \
, (int)&(name).__magic
#else
# define __SEM_DEBUG_INIT(name)
#endif
#define __SEMAPHORE_INITIALIZER(name,count) \
{ ATOMIC_INIT(count), 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
__SEM_DEBUG_INIT(name) }
#define __MUTEX_INITIALIZER(name) \
__SEMAPHORE_INITIALIZER(name,1)
#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
static inline void sema_init (struct semaphore *sem, int val)
{
/*
* *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
*
* i'd rather use the more flexible initialization above, but sadly
* GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
*/
atomic_set(&sem->count, val);
sem->sleepers = 0; //the number of sleep processes
init_waitqueue_head(&sem->wait);
#if WAITQUEUE_DEBUG
sem->__magic = (int)&sem->__magic;
#endif
}
static inline void init_MUTEX (struct semaphore *sem)
{
sema_init(sem, 1); //resource is only 1
}
static inline void init_MUTEX_LOCKED (struct semaphore *sem)
{
sema_init(sem, 0); //no resource can access
}
asmlinkage void __down_failed(void /* special register calling convention */);
asmlinkage int __down_failed_interruptible(void /* params in registers */);
asmlinkage int __down_failed_trylock(void /* params in registers */);
asmlinkage void __up_wakeup(void /* special register calling convention */);
asmlinkage void __down(struct semaphore * sem);
asmlinkage int __down_interruptible(struct semaphore * sem);
asmlinkage int __down_trylock(struct semaphore * sem);
asmlinkage void __up(struct semaphore * sem);
/*
* This is ugly, but we want the default case to fall through.
* "__down_failed" is a special asm handler that calls the C
* routine that actually waits. See arch/i386/kernel/semaphore.c
*/
static inline void down(struct semaphore * sem)
{
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
__asm__ __volatile__(
"# atomic down operation\n\t"
LOCK "decl %0\n\t" /* --sem->count */
"js 2f\n"
"1:\n"
LOCK_SECTION_START("")
"2:\tcall __down_failed\n\t"
"jmp 1b\n"
LOCK_SECTION_END
:"=m" (sem->count)
:"c" (sem)
:"memory");
}
/*
* Interruptible try to acquire a semaphore. If we obtained
* it, return zero. If we were interrupted, returns -EINTR
*/
static inline int down_interruptible(struct semaphore * sem) //p
{
int result;
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
LOCK_SECTION_START("")
"2:\tcall __down_failed_interruptible\n\t"
"jmp 1b\n"
LOCK_SECTION_END
:"=a" (result), "=m" (sem->count)
:"c" (sem)
:"memory");
return result;
}
/*
* Non-blockingly attempt to down() a semaphore.
* Returns zero if we acquired it
*/
static inline int down_trylock(struct semaphore * sem)
{
int result;
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
__asm__ __volatile__(
"# atomic interruptible down operation\n\t"
LOCK "decl %1\n\t" /* --sem->count */
"js 2f\n\t"
"xorl %0,%0\n"
"1:\n"
LOCK_SECTION_START("")
"2:\tcall __down_failed_trylock\n\t"
"jmp 1b\n"
LOCK_SECTION_END
:"=a" (result), "=m" (sem->count)
:"c" (sem)
:"memory");
return result;
}
/*
* Note! This is subtle. We jump to wake people up only if
* the semaphore was negative (== somebody was waiting on it).
* The default case (no contention) will result in NO
* jumps for both down() and up().
*/
static inline void up(struct semaphore * sem)
{
#if WAITQUEUE_DEBUG
CHECK_MAGIC(sem->__magic);
#endif
__asm__ __volatile__(
"# atomic up operation\n\t"
LOCK "incl %0\n\t" /* ++sem->count */
"jle 2f\n"
"1:\n"
LOCK_SECTION_START("")
"2:\tcall __up_wakeup\n\t"
"jmp 1b\n"
LOCK_SECTION_END
".subsection 0\n"
:"=m" (sem->count)
:"c" (sem)
:"memory");
}
static inline int sem_getcount(struct semaphore *sem)
{
return atomic_read(&sem->count);
}
#endif
#endif