文件系统 - 从路径名到目标节点2-thewayma-ChinaUnix博客

thewayma

首页　| 　博文目录　| 　关于我

thewayma

博客访问： 734025
博文数量： 183
博客积分： 2650
博客等级：少校
技术积分： 1428
用户组：普通用户
注册时间： 2008-11-22 17:02

文章分类

全部博文（183）

QEMU（9）
Xen（0）
虚拟化（5）
KVM（32）
系统优化（2）
嵌入式（5）
GPU架构（2）
硬件（4）
I2C总线（2）
构建嵌入式文件系（0）
MIPS架构研究（8）
Linux内核线程（2）
Linux 基础编程（2）
PCI总线（5）
Linux性能（3）
nsfocus产品（0）
Linux TCP/IP协议（6）
Linux内核初始化（5）
Linux文件系统（15）
Linux内核攻击（0）
内核同步（4）
Linux IPC（1）
Linux进程管理（2）
网络攻击（5）

IP分片攻击（3）
Linux rootkit（3）
Linux中断（1）
Linux设备驱动（19）

块设备（14）
Linux 内核重要算（7）

I/O调度分析（2）
GUN C（3）
算法设计（2）
Linux内存管理（24）
未分配的博文（5）

文章存档

2017年（1）

2015年（46）

2014年（4）

2013年（8）

2012年（2）

2011年（27）

2010年（35）

2009年（60）

我的朋友

相关博文

文件系统 - 从路径名到目标节点2

分类： LINUX

2010-01-28 17:15:18

Path_walk() -> cached_lookup() ./linux/fs/namei.c L243-258
/*
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
{
struct dentry * dentry = d_lookup(parent, name); //这才是真正的查找.

    if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { //重新验证函数是否存在.
        if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
           dput(dentry);
           dentry = NULL;
        }
    }
    return dentry;
}

Path_walk() -> cached_lookup() -> d_lookup() ./linux/fs/dcache.c L703 – 749
/**
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
*
* Searches the children of the parent dentry for the name in question. If
* the dentry is found its reference count is incremented and the dentry
* is returned. The caller must use d_put to free the entry when it has
* finished using it. %NULL is returned on failure.
*/

struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
    unsigned int len = name->len;
    unsigned int hash = name->hash;
    const unsigned char *str = name->name;
    struct list_head *head = d_hash(parent,hash);
    struct list_head *tmp;

    spin_lock(&dcache_lock);
    tmp = head->next;
    for (;;) {
        struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
        // 这里是整个函数的重点. 首先, 在parent_hash队列中存储的是dentry结构中的d_hash对象指针, list_entry, 是通过d_hash指针及 d_hash在dentry中的位置来找出dentry的指针. Tmp是当前hash值所对应的所有denry的链表中的一个.
        if (tmp == head) //如果等于队头, 那说明将所有对应hash值的队列都循环一圈了.
           break;
        tmp = tmp->next; // 下一个对应hash值的d_hash指针.
        if (dentry->d_name.hash != hash) // 找到的dentry中的哈希值与给定的不一样.
           continue;
        if (dentry->d_parent != parent)   // 找到的dentry中的父节点与给定的不一致.
           continue;
        if (parent->d_op && parent->d_op->d_compare) { // 有自己的比较名字函数. 因为可有能的支持空格或其它的什么,
           if (parent->d_op->d_compare(parent, &dentry->d_name, name))
                continue;
        } else { // 没有自己的名字比较函数,
            if (dentry->d_name.len != len)
                continue;
            if (memcmp(dentry->d_name.name, str, len))
                continue;
        }
        __dget_locked(dentry);
        dentry->d_flags |= DCACHE_REFERENCED;
        spin_unlock(&dcache_lock);
        return dentry; // 都没有返回说明是找到了.
    }
    spin_unlock(&dcache_lock);
    return NULL; //跳出来的, 说明循环一圈没有找到.
}

/**
* list_entry - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
利用给定结构的某个成员的指针找到对应给定结构的首地址.

Path_walk()-> cached_lookup()->d_lookup()->d_hash() ./linux/fs/dcache.c L696 – 701
Path_walk()->real_lookup() ./linux/fs/namei.c L260-310
/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
* We get the directory semaphore, and after getting that we also
* make sure that nobody added the entry to the dcache in the meantime..
* SMP-safe
*/
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
{
struct dentry * result;
struct inode *dir = parent->d_inode;

    down(&dir->i_sem); // 通过down进入临界区时可能会经历一段睡眠时间.
    /*
    * First re-do the cached lookup just in case it was created
    * while we waited for the directory semaphore..
    *
    * FIXME! This could use version numbering or similar to
    * avoid unnecessary cache lookups.
    */
    result = d_lookup(parent, name); //由于在上面donw 睡眠时, 对应节点dentry可能在内存中已经被其它进程建立好了. 所以这里再在内存中查找一遍.
    if (!result) { // 仍旧没有找到.
        struct dentry * dentry = d_alloc(parent, name); // 分配一个新的dentry;
        result = ERR_PTR(-ENOMEM);
        if (dentry) {
           lock_kernel();
           result = dir->i_op->lookup(dir, dentry); // 在物理设备在去查找, 用父节点的d_inode对象去查找.
           unlock_kernel();
           if (result)
                dput(dentry); // 找到了, 释放前面分配的.
           else
                result = dentry; // 没找到, ???
        }
        up(&dir->i_sem);
        return result;
    }

    /*
    * Uhhuh! Nasty case: the cache was re-populated while
    * we waited on the semaphore. Need to revalidate.
    */
    up(&dir->i_sem);
    if (result->d_op && result->d_op->d_revalidate) { // 重新验证.
        if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
           dput(result);
           result = ERR_PTR(-ENOENT);
        }
    }
    return result; //返回结果.
}

Path_walk()->real_lookup()->d_alloc() ./linux/fs/dcache.c L589-646
/**
* d_alloc - allocate a dcache entry
* @parent: parent of entry to allocate
* @name: qstr of the name
*
* Allocates a dentry. It returns %NULL if there is insufficient memory
* available. On a success the dentry is returned. The name passed in is
* copied and the copy passed in may be reused after this call.
*/

struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
{
char * str;
struct dentry *dentry;

    dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); // 从缓冲池中取出一个来.
    if (!dentry)
        return NULL;

    if (name->len > DNAME_INLINE_LEN-1) {
        str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL); // 分配名字所需空间.
        if (!str) {
           kmem_cache_free(dentry_cache, dentry);
           return NULL;
        }
    } else
        str = dentry->d_iname;

    memcpy(str, name->name, name->len); // 拷贝节点名称. 为啥还要拷贝上份, 全在内存空间,
    // 可能是怕传进来的名字指针在别的地方被释放. 所以这里才新分配一个新空间, 在本对象被释放时会释放这里分配的空间.
    str[name->len] = 0;

    atomic_set(&dentry->d_count, 1);
    dentry->d_flags = 0;
    dentry->d_inode = NULL;
    dentry->d_parent = NULL;
    dentry->d_sb = NULL;
    dentry->d_name.name = str;
    dentry->d_name.len = name->len;
    dentry->d_name.hash = name->hash;
    dentry->d_op = NULL;
    dentry->d_fsdata = NULL;
    INIT_LIST_HEAD(&dentry->d_vfsmnt);
    INIT_LIST_HEAD(&dentry->d_hash);
    INIT_LIST_HEAD(&dentry->d_lru);
    INIT_LIST_HEAD(&dentry->d_subdirs);
    INIT_LIST_HEAD(&dentry->d_alias);
    if (parent) {
        dentry->d_parent = dget(parent);
        dentry->d_sb = parent->d_sb;
        spin_lock(&dcache_lock);
        list_add(&dentry->d_child, &parent->d_subdirs); // 将自己加入到parent对象中的”子”队列中去.
        spin_unlock(&dcache_lock);
    } else
    INIT_LIST_HEAD(&dentry->d_child);

dentry_stat.nr_dentry++;
return dentry;
}

从磁盘上寻找的过程因文件系统而异, 所以要通过父节点inode 结构中的指针 i_op找到相应的inode_operations数据结构. 对于代表着目录的inode和代表文件的inode, 其inode_operations结构常常是不同的, 就Ext2而言, 对目录节点的函数跳转结构为ext2_dir_inode_operations, 定义在 ./linux/fs/ext2/namei.c;
/*
* directories can handle most operations...
*/
struct inode_operations ext2_dir_inode_operations = {
        create: ext2_create,
        lookup: ext2_lookup,
        link: ext2_link,
        unlink: ext2_unlink,
        symlink: ext2_symlink,
        mkdir: ext2_mkdir,
        rmdir: ext2_rmdir,
        mknod: ext2_mknod,
        rename: ext2_rename,
};
So Dentry->d_inode->i_op->lookup() 指向 ext2_lookup()

Path_walk()->real_lookup()->ext2_lookup()./linux/fs/ext2/namei.c; L163 - 184
static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry)
{
    struct inode * inode;
    struct ext2_dir_entry_2 * de;
    struct buffer_head * bh;

if (dentry->d_name.len > EXT2_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);

    bh = ext2_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &de);
    inode = NULL;
    if (bh) {
        unsigned long ino = le32_to_cpu(de->inode);
        brelse (bh);
        inode = iget(dir->i_sb, ino);

        if (!inode)
           return ERR_PTR(-EACCES);
    }
    d_add(dentry, inode);
    return NULL;
}

Path_walk()->real_lookup()->ext2_lookup()->ext2_find_entry()./linux/fs/ext2/namei.c; L52 - 161
/*
* ext2_find_entry()
*
* finds an entry in the specified directory with the wanted name. It
* returns the cache buffer in which the entry was found, and the entry
* itself (as a parameter - res_dir). It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
*/
static struct buffer_head * ext2_find_entry (struct inode * dir,
          const char * const name, int namelen,
          struct ext2_dir_entry_2 ** res_dir)
{
    struct super_block * sb;
    struct buffer_head * bh_use[NAMEI_RA_SIZE];
    struct buffer_head * bh_read[NAMEI_RA_SIZE];
    unsigned long offset;
    int block, toread, i, err;

*res_dir = NULL;
sb = dir->i_sb;

if (namelen > EXT2_NAME_LEN)
return NULL;

    memset (bh_use, 0, sizeof (bh_use));
    toread = 0;
    for (block = 0; block < NAMEI_RA_SIZE; ++block) {
        struct buffer_head * bh;

        if ((block << EXT2_BLOCK_SIZE_BITS (sb)) >= dir->i_size)
           break;
        bh = ext2_getblk (dir, block, 0, &err); // 读入指定块. 暂时没有介绍具体代码.
        bh_use[block] = bh;
        if (bh && !buffer_uptodate(bh))
           bh_read[toread++] = bh;
    }

    for (block = 0, offset = 0; offset < dir->i_size; block++) { // 遍历刚读入的所有块.
        struct buffer_head * bh;
        struct ext2_dir_entry_2 * de;
        char * dlimit;

        if ((block % NAMEI_RA_BLOCKS) == 0 && toread) {
            ll_rw_block (READ, toread, bh_read);
            toread = 0;
        }
        bh = bh_use[block % NAMEI_RA_SIZE];
        if (!bh) {
#if 0
           ext2_error (sb, "ext2_find_entry",
                "directory #%lu contains a hole at offset %lu",
              dir->i_ino, offset);
#endif
          offset += sb->s_blocksize;
          continue;
      }
       wait_on_buffer (bh);
       if (!buffer_uptodate(bh)) {
          /*
           * read error: all bets are off
            */
           break;
      }

        de = (struct ext2_dir_entry_2 *) bh->b_data; // 每个块的开始都必定是个新结构对象的开始.
        dlimit = bh->b_data + sb->s_blocksize;
        while ((char *) de < dlimit) { // 在每个记录块中寻找inode.
          /* this code is executed quadratically often */
          /* do minimal checking `by hand' */
           int de_len;

          if ((char *) de + namelen <= dlimit &&
               ext2_match (namelen, name, de)) { // 比较当前目录项的名字.
            /* found a match -
               just to be sure, do a full check */
           if (!ext2_check_dir_entry("ext2_find_entry",
                 dir, de, bh, offset))
            goto failure;
            for (i = 0; i < NAMEI_RA_SIZE; ++i) {
                 if (bh_use[i] != bh)
                      brelse (bh_use[i]);
          }
            *res_dir = de; // 找到对应目录项.
            return bh;     // 返回结果.
          }
          /* prevent looping on a bad block */
          de_len = le16_to_cpu(de->rec_len); // 当前目录项长度.
          if (de_len <= 0)
              goto failure;
          offset += de_len;
          de = (struct ext2_dir_entry_2 *)
          ((char *) de + de_len); // 本块中下一个目录项.
       } // end while

        brelse (bh);
        if (((block + NAMEI_RA_SIZE) << EXT2_BLOCK_SIZE_BITS (sb)) >=
             dir->i_size)
        bh = NULL;
       else
        bh = ext2_getblk (dir, block + NAMEI_RA_SIZE, 0, &err);
        bh_use[block % NAMEI_RA_SIZE] = bh;
        if (bh && !buffer_uptodate(bh))
           bh_read[toread++] = bh;
       } // end for   下一个记录块.

    failure:
    for (i = 0; i < NAMEI_RA_SIZE; ++i)
        brelse (bh_use[i]);
    return NULL;
}
这个比较烦…

回到ext2_lookup的代码中, 下一步是根据查得的索引节点号通过iget找到或建立起所需的inode结构. 这里的iget()实际上是调用了iget4函数.
Path_walk()->real_lookup()->ext2_lookup()->iget() ./linux/include/fs.h L1185-1188
static inline struct inode *iget(struct super_block *sb, unsigned long ino)
{
return iget4(sb, ino, NULL, NULL);
}

Path_walk()->real_lookup()->ext2_lookup()->iget()->iget4() ./linux/fs/inode.c L774-794
struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
{
struct list_head * head = inode_hashtable + hash(sb,ino);
struct inode * inode;

    spin_lock(&inode_lock);
    inode = find_inode(sb, ino, head, find_actor, opaque); // 在内存中查找该node是否存在.
    if (inode) { // 找到了.
        __iget(inode);
        spin_unlock(&inode_lock);
        wait_on_inode(inode);
        return inode;
    }
    spin_unlock(&inode_lock);

/*
* get_new_inode() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
return get_new_inode(sb, ino, head, find_actor, opaque); // 又到物理设备去查找, 不再追溯了.
}

找到了或者建立了所需的inode结构以后, 就返回到ext2_lookup(), 在那里还要通过 d_add() 将 inode 结构与dentry结构挂上钩, 并将dentry结构挂入杂凑表中的某个队列.
Path_walk()->real_lookup()->ext2_lookup()->d_add() ./linux/include/linux/dcache.h L191-204
/**
* d_add - add dentry to hash queues
* @entry: dentry to add
* @inode: The inode to attach to this dentry
*
* This adds the entry to the hash queues and initializes @inode.
* The entry was actually filled in earlier during d_alloc().
*/

static __inline__ void d_add(struct dentry * entry, struct inode * inode)
{
d_instantiate(entry, inode);
d_rehash(entry);
}

Path_walk()->real_lookup()->ext2_lookup()->d_add()->d_instantiate()   ./linux/fs/dcache.h L648-670
/**
* d_instantiate - fill in inode information for a dentry
* @entry: dentry to complete
* @inode: inode to attach to this dentry
*
* Fill in inode information in the entry.
*
* This turns negative dentries into productive full members
* of society.
*
* NOTE! This assumes that the inode count has been incremented
* (or otherwise set) by the caller to indicate that it is now
* in use by the dcache.
*/

void d_instantiate(struct dentry *entry, struct inode * inode)
{
    spin_lock(&dcache_lock);
    if (inode)
        list_add(&entry->d_alias, &inode->i_dentry); // 将entry结构中的d_alias队列头指针放入inode结构中的i_dentry队列中. 一个inode可通过i_dentry队列知道自己对应多少个dentry;
    entry->d_inode = inode; // 填充要返回的dentry.
    spin_unlock(&dcache_lock);
}

Path_walk()->real_lookup()->ext2_lookup()->d_add()->d_instantiate()   ./linux/fs/dcache.h L847-860
这个函数是将dentry结构挂入杂凑队列.
/**
* d_rehash - add an entry back to the hash
* @entry: dentry to add to the hash
*
* Adds a dentry to the hash according to its name.
*/

void d_rehash(struct dentry * entry)
{
    struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); // 找到对应的杂凑队列.
    spin_lock(&dcache_lock);
    list_add(&entry->d_hash, list); // 将自己挂入已找到的杂凑队列.
    spin_unlock(&dcache_lock);
}
在real_lookup()代码返回时, 已经找到或建立了所需的dentry结构, 接着就把回到path_walk中.

然而, 找到的节点是不是链接呢?
if ((lookup_flags & LOOKUP_FOLLOW)
      && inode && inode->i_op && inode->i_op->follow_link) {
   err = do_follow_link(dentry, nd);
   dput(dentry);
   if (err)
    goto return_err;
   inode = nd->dentry->d_inode;
}
如果是链接, 并且 inode->i_op->follow_link函数指针有值, 那么调用 do_follow_link函数.
Path_walk()->do_follow_link() ./linux/fs/namei.c L312-325

static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{
    int err;
    if (current->link_count >= 8) // 在这里限定嵌套的层数..大于8层则不进行处理了.
        goto loop;
    current->link_count++;
    UPDATE_ATIME(dentry->d_inode);
    err = dentry->d_inode->i_op->follow_link(dentry, nd); // 真正的调用follow_link了.
    current->link_count--;
    return err;
loop:
    path_release(nd);
    return -ELOOP;
}

如果节点是链接的话, 对应节点的 d_inode->i_op应该被inode_operations 结构对象 ext2_fast_symlink_inode_operations赋值,
ext2_fast_symlink_inode_operations被定义为:
struct inode_operations ext2_fast_symlink_inode_operations {
readlink: ext2_readlink,
follow_link: ext2_follow_link,
};

Path_walk()->do_follow_link()->ext2_follow_link() ./linux/fs/ext2/symlink.c L29-33
static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char *s = (char *)dentry->d_inode->u.ext2_i.i_data; // 找到本链接对应的真实结点路径.
return vfs_follow_link(nd, s); // 找真实结点的路径.
}
在函数 vfs_follow_link中直接调用了__vfs_follow_link函数. 在__vfs_follow_link函数中也是再用本链接指定的结点名再调用一遍path_walk函数.

阅读(1075) | 评论(0) | 转发(0) |

上一篇：文件系统 - 从路径名到目标节点1

下一篇：索引节点表及实例分析---ext2_read_inode

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6