Chinaunix首页 | 论坛 | 博客
  • 博客访问: 230325
  • 博文数量: 59
  • 博客积分: 1215
  • 博客等级: 少尉
  • 技术积分: 575
  • 用 户 组: 普通用户
  • 注册时间: 2011-11-09 02:18
文章分类

全部博文(59)

文章存档

2012年(53)

2011年(6)

分类: C/C++

2012-02-04 16:20:57

    昨晚分析了poll,通过代码的阅读可以发现,poll操作有很多可以优化的地方。epoll是eventpoll的简称,他的效率是非常高的,我们今天来看看他的实现。他的实现在FS/Eventpoll.c,代码有1500多行,呵呵,怕了吧。
    大家都知道,epoll有三个系统调用,C库封装成以下三个:
  1. int epoll_create(int size);
  2. int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
  3. int epoll_wait(int epfd, struct epoll_event *events,int maxevents, int timeout);
epoll的源码这么多,我们就干脆跟着他们三个走着瞧。今天先搞定第一个---epoll_create
 
第一个是
  1. /*
  2.  * It opens an eventpoll file descriptor by suggesting a storage of "size"
  3.  * file descriptors. The size parameter is just an hint about how to size
  4.  * data structures. It won't prevent the user to store more than "size"
  5.  * file descriptors inside the epoll interface. It is the kernel part of
  6.  * the userspace epoll_create(2).
  7.  */
  8. asmlinkage long sys_epoll_create(int size)
  9. {
  10.     int error, fd;
  11.     struct inode *inode;
  12.     struct file *file;

  13.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
  14.          current, size));

  15.     /* Sanity check on the size parameter */
  16.     error = -EINVAL;
  17.     if (size <= 0)
  18.         goto eexit_1;

  19.     /*
  20.      * Creates all the items needed to setup an eventpoll file. That is,
  21.      * a file structure, and inode and a free file descriptor.
  22.      */
  23.     error = ep_getfd(&fd, &inode, &file);                  //(1)
  24.     if (error)
  25.         goto eexit_1;

  26.     /* Setup the file internal data structure ( "struct eventpoll" ) */
  27.     error = ep_file_init(file);                            //(2)
  28.     if (error)
  29.         goto eexit_2;


  30.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
  31.          current, size, fd));

  32.     return fd;

  33. eexit_2:
  34.     sys_close(fd);
  35. eexit_1:
  36.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
  37.          current, size, error));
  38.     return error;
  39. }

(1)这里用到了一个ep_getfd函数,从注释我们知道,这个函数建立eventpoll相关的file,当然,一个file要包括文件描述符、inode、还有文件对象,这也是我们传的三个参数。废话不说,看源码:

  1. /*
  2.  * Creates the file descriptor to be used by the epoll interface.
  3.  */
  4. static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
  5. {
  6.     struct qstr this;
  7.     char name[32];
  8.     struct dentry *dentry;
  9.     struct inode *inode;
  10.     struct file *file;
  11.     int error, fd;

  12.     /* Get an ready to use file */
  13.     error = -ENFILE;
  14.     file = get_empty_filp();
  15.     if (!file)
  16.         goto eexit_1;

  17.     /* Allocates an inode from the eventpoll file system */
  18.     inode = ep_eventpoll_inode();
  19.     error = PTR_ERR(inode);
  20.     if (IS_ERR(inode))
  21.         goto eexit_2;

  22.     /* Allocates a free descriptor to plug the file onto */
  23.     error = get_unused_fd();
  24.     if (error < 0)
  25.         goto eexit_3;
  26.     fd = error;

  27.     /*
  28.      * Link the inode to a directory entry by creating a unique name
  29.      * using the inode number.
  30.      */
  31.     error = -ENOMEM;
  32.     sprintf(name, "[%lu]", inode->i_ino);
  33.     this.name = name;
  34.     this.len = strlen(name);
  35.     this.hash = inode->i_ino;
  36.     dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this);
  37.     if (!dentry)
  38.         goto eexit_4;
  39.     dentry->d_op = &eventpollfs_dentry_operations;
  40.     d_add(dentry, inode);
  41.     file->f_vfsmnt = mntget(eventpoll_mnt);
  42.     file->f_dentry = dentry;
  43.     file->f_mapping = inode->i_mapping;

  44.     file->f_pos = 0;
  45.     file->f_flags = O_RDONLY;
  46.     file->f_op = &eventpoll_fops;
  47.     file->f_mode = FMODE_READ;
  48.     file->f_version = 0;
  49.     file->private_data = NULL;

  50.     /* Install the new setup file into the allocated fd. */
  51.     fd_install(fd, file);

  52.     *efd = fd;
  53.     *einode = inode;
  54.     *efile = file;
  55.     return 0;

  56. eexit_4:
  57.     put_unused_fd(fd);
  58. eexit_3:
  59.     iput(inode);
  60. eexit_2:
  61.     put_filp(file);
  62. eexit_1:
  63.     return error;
  64. }

这个函数的注释都比较全,这里简单提一下,况且因为涉及到的函数太多,要深究起来涉及的知识太多,也不可能逐一去列代码。不过这个函数个人觉得比较经典,这函数就是创建一个文件的流程。

首先,我们得拿到一个file结构体,通过内核分配给我们;然后我们要拿到inode,调用这个ep_eventpoll_inode()就可以了;接着是get_unused_fd()拿到文件描述符;接着d_alloc()函数为我们拿到一个dentry;d_add(dentry, inode)函数把dentry建立hash里面并且绑定inode;后面是继续填充文件对象file;fd_install(fd, file)向进程注册文件,并通过这样的方式把文件描述符和文件对象关联起来。

(2)在跟踪ep_file_init函数之前,我们先来看一下eventpoll结构体:

  1. /*
  2.  * This structure is stored inside the "private_data" member of the file
  3.  * structure and rapresent the main data sructure for the eventpoll
  4.  * interface.
  5.  */
  6. struct eventpoll {
  7.     /* Protect the this structure access */
  8.     rwlock_t lock;

  9.     /*
  10.      * This semaphore is used to ensure that files are not removed
  11.      * while epoll is using them. This is read-held during the event
  12.      * collection loop and it is write-held during the file cleanup
  13.      * path, the epoll file exit code and the ctl operations.
  14.      */
  15.     struct rw_semaphore sem;

  16.     /* Wait queue used by sys_epoll_wait() */
  17.     wait_queue_head_t wq;

  18.     /* Wait queue used by file->poll() */
  19.     wait_queue_head_t poll_wait;

  20.     /* List of ready file descriptors */
  21.     struct list_head rdllist;

  22.     /* RB-Tree root used to store monitored fd structs */
  23.     struct rb_root rbr;
  24. };

注释也是相当清楚。这个eventpoll可以看得出来,是epoll的核心,它将会存储你想要监听的文件描述符,这也是为什么epoll高效之所在。

好,我们回到sys_epoll_create函数,开始跟踪ep_file_init函数:

  1. static int ep_file_init(struct file *file)
  2. {
  3.     struct eventpoll *ep;

  4.     if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
  5.         return -ENOMEM;

  6.     memset(ep, 0, sizeof(*ep));
  7.     rwlock_init(&ep->lock);
  8.     init_rwsem(&ep->sem);
  9.     init_waitqueue_head(&ep->wq);
  10.     init_waitqueue_head(&ep->poll_wait);
  11.     INIT_LIST_HEAD(&ep->rdllist);
  12.     ep->rbr = RB_ROOT;

  13.     file->private_data = ep;

  14.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
  15.          current, ep));
  16.     return 0;
  17. }

其实也就是eventpoll结构体的初始化。

sys_epoll_create函数大概就这样了,明天接着看sys_epoll_ctl。

阅读(1586) | 评论(0) | 转发(3) |
给主人留下些什么吧!~~