Chinaunix首页 | 论坛 | 博客
  • 博客访问: 67700
  • 博文数量: 25
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 10
  • 用 户 组: 普通用户
  • 注册时间: 2013-04-25 16:11
个人简介

生命是个奇迹,我用时间来证明!

文章分类
文章存档

2013年(25)

我的朋友

分类: LINUX

2013-05-31 17:48:56

原文地址:link_path_walk()路径名查找 作者:tq08g2z

link_path_walk()路径名查找

link_path_walk()函数。它接收的参数为要解析的路径名指针name和拥有目录项信息和安装文件系统信息的nameidata数据结构的地址nd,此时ndpath字段存放的是查找的路径名的基目录的路径。其定义如下:

---------------------------------------------------------------------

fs/namei.c

/*

 * Name resolution.

 * This is the basic name resolution function, turning a pathname into

 * the final dentry. We expect 'base' to be positive and a directory.

 *

 * Returns 0 and nd will have valid dentry and mnt on success.

 * Returns error and drops reference to input namei data on failure.

 */

814 static int link_path_walk(const char *name, struct nameidata *nd)

815 {

816         struct path next;

817         struct inode *inode;

818         int err;

819         unsigned int lookup_flags = nd->flags;

820        

821         while (*name=='/')

822                 name++;

823         if (!*name)

824                 goto return_reval;

825

826         inode = nd->path.dentry->d_inode;

827         if (nd->depth)

828                 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);

829

830         /* At this point we know we have a real path component. */

831         for(;;) {

832                 unsigned long hash;

833                 struct qstr this;

834                 unsigned int c;

835

836                 nd->flags |= LOOKUP_CONTINUE;

837                 err = exec_permission(inode);

838                 if (err)

839                         break;

840

841                 this.name = name;

842                 c = *(const unsigned char *)name;

843

844                 hash = init_name_hash();

845                 do {

846                         name++;

847                         hash = partial_name_hash(c, hash);

848                         c = *(const unsigned char *)name;

849                 } while (c && (c != '/'));

850                 this.len = name - (const char *) this.name;

851                 this.hash = end_name_hash(hash);

852

853                 /* remove trailing slashes? */

854                 if (!c)

855                         goto last_component;

856                 while (*++name == '/');

857                 if (!*name)

858                         goto last_with_slashes;

859

860                 /*

861                  * "." and ".." are special - ".." especially so because it has

862                  * to be able to know about the current root directory and

863                  * parent relationships.

864                  */

865                 if (this.name[0] == '.') switch (this.len) {

866                         default:

867                                 break;

868                         case 2:

869                                 if (this.name[1] != '.')

870                                         break;

871                                 follow_dotdot(nd);

872                                 inode = nd->path.dentry->d_inode;

873                                 /* fallthrough */

874                         case 1:

875                                 continue;

876                 }

877                 /* This does the actual lookups.. */

878                 err = do_lookup(nd, &this, &next);

879                 if (err)

880                         break;

881

882                 err = -ENOENT;

883                 inode = next.dentry->d_inode;

884                 if (!inode)

885                         goto out_dput;

886

887                 if (inode->i_op->follow_link) {

888                         err = do_follow_link(&next, nd);

889                         if (err)

890                                 goto return_err;

891                         err = -ENOENT;

892                         inode = nd->path.dentry->d_inode;

893                         if (!inode)

894                                 break;

895                 } else

896                         path_to_nameidata(&next, nd);

897                 err = -ENOTDIR;

898                 if (!inode->i_op->lookup)

899                         break;

900                 continue;

901                 /* here ends the main loop */

902

903 last_with_slashes:

904                 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;

905 last_component:

906                 /* Clear LOOKUP_CONTINUE iff it was previously unset */

907                 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;

908                 if (lookup_flags & LOOKUP_PARENT)

909                         goto lookup_parent;

910                 if (this.name[0] == '.') switch (this.len) {

911                         default:

912                                 break;

913                         case 2:

914                                 if (this.name[1] != '.')

915                                         break;

916                                 follow_dotdot(nd);

917                                 inode = nd->path.dentry->d_inode;

918                                 /* fallthrough */

919                         case 1:

920                                 goto return_reval;

921                 }

922                 err = do_lookup(nd, &this, &next);

923                 if (err)

924                         break;

925                 inode = next.dentry->d_inode;

926                 if (follow_on_final(inode, lookup_flags)) {

927                         err = do_follow_link(&next, nd);

928                         if (err)

929                                 goto return_err;

930                         inode = nd->path.dentry->d_inode;

931                 } else

932                         path_to_nameidata(&next, nd);

933                 err = -ENOENT;

934                 if (!inode)

935                         break;

936                 if (lookup_flags & LOOKUP_DIRECTORY) {

937                         err = -ENOTDIR;

938                         if (!inode->i_op->lookup)

939                                 break;

940                 }

941                 goto return_base;

942 lookup_parent:

943                 nd->last = this;

944                 nd->last_type = LAST_NORM;

945                 if (this.name[0] != '.')

946                         goto return_base;

947                 if (this.len == 1)

948                         nd->last_type = LAST_DOT;

949                 else if (this.len == 2 && this.name[1] == '.')

950                         nd->last_type = LAST_DOTDOT;

951                 else

952                         goto return_base;

953 return_reval:

954                 /*

955                  * We bypassed the ordinary revalidation routines.

956                * We may need to check the cached dentry for staleness.

957                  */

958                 if (nd->path.dentry && nd->path.dentry->d_sb &&

959                     (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {

960                         err = -ESTALE;

961                         /* Note: we do not d_invalidate() */

962                         if (!nd->path.dentry->d_op->d_revalidate(

963                                         nd->path.dentry, nd))

964                                 break;

965                 }

966 return_base:

967                 return 0;

968 out_dput:

969                 path_put_conditional(&next, nd);

970                 break;

971         }

972         path_put(&nd->path);

973 return_err:

974         return err;

975 }

---------------------------------------------------------------------

这是一个非常长的函数。link_path_walk()执行下列步骤:

1、用nd->flags初始化lookup_flags局部变量(819行)。

 

2、跳过路径名第一个分量前的任何斜杠(/)(821行)。

 

3、如果剩余的路径名为空,则返回0。没有改变nameidata结构数据,nd->path中存放将要查找的路径名的基路径(823行)。

 

4、把将要查找的路径名的基路径的inode地址存放在局部变量inode中,即初始化最近一个所解析分量的索引节点对象的地址为将要查找的路径名的基路径的inode地址(826行)。

 

5、如果nd描述符中的depth字段(即符号链接嵌套的当前级别)的值为正(大于0),则把lookup_flags局部变量置为LOOKUP_FOLLOW标志(这个跟符号链接查找相关)(827行)。

 

6、执行一个循环,把name参数中传递的路径名分解为分量(中间的“/”被当做文件名分隔符对待)(831行);对于每个找到的分量,该函数:

a.设置lookup_flags局部变量置的LOOKUP_CONTINUE标志(836行)。

 

b. 执行exec_permission(inode)函数检查存放到索引节点中的最近那个所解析分量的许可权是否允许执行(在Unix中,只有目录是可执行的,它才可以被遍历)(837行)。exec_permission()函数定义如下:

---------------------------------------------------------------------

fs/namei.c

463 static int exec_permission(struct inode *inode)

464 {

465         int ret;

466

467         if (inode->i_op->permission) {

468                 ret = inode->i_op->permission(inode, MAY_EXEC);

469                 if (!ret)

470                         goto ok;

471                 return ret;

472         }

473         ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);

474         if (!ret)

475                 goto ok;

476

477         if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))

478                 goto ok;

479

480         return ret;

481 ok:

482         return security_inode_permission(inode, MAY_EXEC);

483 }

---------------------------------------------------------------------

如果文件系统提供了inode->i_op->permission方法,则exec_permission()调用该例程执行EXEC权限检查,如果不允许执行则返回错误码,若允许,则调用security_inode_permission(),使用LSMsecurity_ops->inode_permission()方法来执行权限检查,并返回该方法的返回值。

inode->i_op->permission方法不存在,则调用acl_permission_check()执行基本的POSIX ACL权限检查,若通过检查,则调用security_inode_permission(),使用LSMsecurity_ops->inode_permission()方法来执行权限检查,并返回该方法的返回值。

若不通过,则执行权能检查,若同样不允许,则返回错误码。若允许,则调用security_inode_permission(),使用LSMsecurity_ops->inode_permission()方法来执行权限检查,并返回该方法的返回值。

如果最近所解析分量不允许执行,那么link_path_walk()跳出循环并返回一个错误码。

 

c. 考虑要解析的下一个分量(841-851行)。从它的名字,函数为目录项高速缓存散列表计算一个32位的散列值。

注意,这里用到了目录项名字数据结构qstr

---------------------------------------------------------------------

include/linux/dcache.h

33 struct qstr {

34         unsigned int hash;

35         unsigned int len;

36         const unsigned char *name;

37 };

---------------------------------------------------------------------

当前目录分量存放到了指向qstr结构的this局部变量中。

 

散列表的32位散列值如下计算:

---------------------------------------------------------------------

include/linux/dcache.h

50 #define init_name_hash()      0

51

52 /* partial hash update function. Assume roughly 4 bits per character */

53 static inline unsigned long

54 partial_name_hash(unsigned long c, unsigned long prevhash)

55 {

56         return (prevhash + (c << 4) + (c >> 4)) * 11;

57 }

 

63 static inline unsigned long end_name_hash(unsigned long hash)

64 {

65         return (unsigned int) hash;

66 }

---------------------------------------------------------------------

d. 如果要解析的分量是原路径名中的最后一个分量,则跳到第last_component标号处去执行。后面“link_path_walk()对于路径名最后一个分量的处理”部分会有更详细的说明。

 

e. 如果“/”终止了要解析的分量名,则跳过“/”之后的任何尾部“/”。多么强大的处理路径名的能力啊,也就是说路径名中两个目录之间是可以插入多个“/”。这一步为解析下一个分量做准备。而如果在一连串的“/”之后没有内容了,则跳转到标号last_with_slashes处执行。这是最后一个分量的特殊情况,也就是它必须一个目录。同样在后面“link_path_walk()对于路径名最后一个分量的处理”部分说明。

 

f. 如果分量名是一个“.”(单个圆点),则继续下一个分量(“.”指的是当前目录,因此,这个点在目录内没有什么效果)(874行)。

 

g.如果分量名是“..”(两个圆点),则尝试回到父目录(871行)。这里面有个重要的follow_dotdot(nd)函数:

---------------------------------------------------------------------

fs/namei.c

670 static __always_inline void follow_dotdot(struct nameidata *nd)

671 {

672         set_root(nd);

673

674         while(1) {

675                 struct dentry *old = nd->path.dentry;

676

677                 if (nd->path.dentry == nd->root.dentry &&

678                     nd->path.mnt == nd->root.mnt) {

679                         break;

680                 }

681                 if (nd->path.dentry != nd->path.mnt->mnt_root) {

682                   /* rare case of legitimate dget_parent()... */

683                   nd->path.dentry = dget_parent(nd->path.dentry);

684                   dput(old);

685                   break;

686                 }

687                 if (!follow_up(&nd->path))

688                         break;

689         }

690         follow_mount(&nd->path);

691 }

---------------------------------------------------------------------

(1)、首先,设置ndroot字段为当前进程的根路径。

 

(2)、如果最近解析的目录是进程的根目录(nd->path.dentry等于nd->root.dentry,而nd->path.mnt等于nd->root.mnt),那么再向上追踪是不允许的:在最近解析的分量上调用follow_mount()(见下面),继续下一个分量。

 

(3)、如果最近解析的目录不是nd->path.mnt文件系统的根目录(nd->path.dentry 不等于 nd->path.mnt->mnt_root,如果当前节点dentry 不等于当前节点vfsmount对象的根设备的dentry, 说明当前节点不是做为根节点被mount到其它设备上去的。在这里再来看vfsmount对象的mnt_mountpoint字段,它指向它挂载的目录的目录项,也就是原来的目录文件的信息),那么必须回到父目录:把nd->path.dentry置为其父目录的目录项,其实也就是nd-> path.dentry-> d_parent在父目录上调用follow_mount(&nd->path) (见下面),继续下一个分量。

 

(4)、如果最近解析的目录是nd->mnt文件系统的根目录,则调用函数follow_up(&nd->path)来处理,这个函数定义如下:

---------------------------------------------------------------------

fs/namei.c

599 int follow_up(struct path *path)

600 {

601         struct vfsmount *parent;

602         struct dentry *mountpoint;

603         spin_lock(&vfsmount_lock);

604         parent = path->mnt->mnt_parent;

605         if (parent == path->mnt) {

606                 spin_unlock(&vfsmount_lock);

607                 return 0;

608         }

609         mntget(parent);

610         mountpoint = dget(path->mnt->mnt_mountpoint);

611         spin_unlock(&vfsmount_lock);

612         dput(path->dentry);

613         path->dentry = mountpoint;

614         mntput(path->mnt);

615         path->mnt = parent;

616         return 1;

617 }

---------------------------------------------------------------------

如果这个文件系统没有被安装在其他文件系统之上(path->mnt->mnt_parent等于path->mnt),那么 path->mnt文件系统通常就是进程命名空间的根文件系统:在这种情况下,再向上追踪是不可能的,因此在最近解析的分量上调用 follow_mount()(参见下面),继续下一个分量。(这种情况是不应该出现的,或者说这种情况应该是在follow_dotdot的步骤(2)中就已经检测出来的)。

 

如果这个文件系统被安装在其他文件系统之上,那么就需要文件系统交换。因此,把path->dentry置为path->mnt->mnt_mountpoint,且把path->mnt置为 path->mnt->mnt_parent,然后重新开始第6g步(几个文件系统可以挂载在同一个挂载点上,在挂载的时候,原来的那个目录文件的vfsmount对象和目录项信息被保存在新的vfsmount对象的mnt_parentmnt_mountpoint字段中)。

 

最后来看follow_mount()follow_mount()定义如下:

---------------------------------------------------------------------

fs/namei.c

639 static void follow_mount(struct path *path)

640 {

641         while (d_mountpoint(path->dentry)) {

642                 struct vfsmount *mounted = lookup_mnt(path);

643                 if (!mounted)

644                         break;

645                 dput(path->dentry);

646                 mntput(path->mnt);

647                 path->mnt = mounted;

648                 path->dentry = dget(mounted->mnt_root);

649         }

650 }

---------------------------------------------------------------------

follow_mount()函数检查path ->dentry是否是某文件系统的挂载点(path->  dentry-> d_mounted的值大于0),如果不是,则直接退出。如果是,则调用lookup_mnt(),它的定义如下:

---------------------------------------------------------------------

fs/namespace.c

57 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)

58 {

59         unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);

60         tmp += ((unsigned long)dentry / L1_CACHE_BYTES);

61         tmp = tmp + (tmp >> HASH_SHIFT);

62         return tmp & (HASH_SIZE - 1);

63 }

 

414 struct vfsmount *__lookup_mnt(struct vfsmount *mnt,

415                                  struct dentry *dentry, int dir)

416 {

417        struct list_head *head = mount_hashtable + hash(mnt, dentry);

418        struct list_head *tmp = head;

419        struct vfsmount *p, *found = NULL;

420

421        for (;;) {

422          tmp = dir ? tmp->next : tmp->prev;

423          p = NULL;

424          if (tmp == head)

425              break;

426          p = list_entry(tmp, struct vfsmount, mnt_hash);

427          if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {

428                  found = p;

429                  break;

430          }

431       }

432       return found;

433 }

 

439 struct vfsmount *lookup_mnt(struct path *path)

440 {

441         struct vfsmount *child_mnt;

442         spin_lock(&vfsmount_lock);

443         if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))

444                 mntget(child_mnt);

445         spin_unlock(&vfsmount_lock);

446         return child_mnt;

447 }

---------------------------------------------------------------------

对于一个vfsmount来说,哈希值是根据其父vfsmount对象的地址和挂载点地址来计算的。

 

follow_mount()函数就是要找到挂载在本路径上的文件系统,即vfsmount对象的地址和目录项对象地址。

 

h.分量名既不是“.”,也不是“..”,调用do_lookup(nd, &this, &next)878行),得到与给定的父目录(nd->path)和文件名(要解析的路径名分量&this)相关的目录项对象,存放在结果参数next中。这个函数完成实际的查找,是link_path_walk()函数的核心。后面会有更详细的说明。

 

i.检查刚解析的分量是否指向一个符号链接(next.dentry->d_inode具有一个i_op->follow_link方法)。将在后面“符号链接的查找”有更详细的说明。如果是则调用do_follow_link(&next, nd)做相应的处理。

 

j.刚解析的分量不是指向一个符号链接调用path_to_nameidata(&next, nd)nd->path.dentrynd->path.mnt分别置为next.dentrynext.mnt,然后继续路径名的下一个分量:

---------------------------------------------------------------------

fs/namei.c

523 static inline void path_to_nameidata(struct path *path, struct nameidata *nd)

524 {

525         dput(nd->path.dentry);

526         if (nd->path.mnt != path->mnt)

527                 mntput(nd->path.mnt);

528         nd->path.mnt = path->mnt;

529         nd->path.dentry = path->dentry;

530 }

---------------------------------------------------------------------

 

k. 检查刚解析的分量是否指向一个目录(next.dentry->d_inode具有一个自定义的i_op->lookup方法)。如果没有,返回一个错误码-ENOTDIR,因为这个分量位于原路径名的中间,然后continue继续路径名的下一个分量。主要的循环到此结束。

 

7、减少对查找到的path的引用计数并返回。

阅读(2023) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~