Chinaunix首页 | 论坛 | 博客
  • 博客访问: 356938
  • 博文数量: 158
  • 博客积分: 52
  • 博客等级: 民兵
  • 技术积分: 613
  • 用 户 组: 普通用户
  • 注册时间: 2011-10-27 11:58
文章分类

全部博文(158)

文章存档

2017年(1)

2016年(5)

2015年(19)

2014年(8)

2013年(13)

2012年(80)

2011年(32)

分类:

2012-02-28 00:12:39

    好久没继续写了,今天来完成最后一个epoll系统调用的分析,也是epoll花的时间最多那一部分。
好~先来看epoll_wait的代码:
  1. /*
  2. * Implement the event wait interface for the eventpoll file. It is the kernel
  3. * part of the user space epoll_wait(2).
  4. */
  5. asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
  6. int maxevents, int timeout)
  7. {
  8. int error;
  9. struct file *file;
  10. struct eventpoll *ep;

  11. DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
  12. current, epfd, events, maxevents, timeout));

  13. /* The maximum number of event must be greater than zero */
  14. if (maxevents <= 0)
  15. return -EINVAL;

  16. /* Verify that the area passed by the user is writeable */
  17. if ((error = verify_area(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))))                                                              (1)
  18. goto eexit_1;

  19. /* Get the "struct file *" for the eventpoll file */
  20. error = -EBADF;
  21. file = fget(epfd);
  22. if (!file)
  23. goto eexit_1;

  24. /*
  25. * We have to check that the file structure underneath the fd
  26. * the user passed to us _is_ an eventpoll file.
  27. */
  28. error = -EINVAL;
  29. if (!IS_FILE_EPOLL(file))
  30. goto eexit_2;

  31. /*
  32. * At this point it is safe to assume that the "private_data" contains
  33. * our own data structure.
  34. */
  35. ep = file->private_data;

  36. /* Time to fish for events ... */
  37. error = ep_poll(ep, events, maxevents, timeout);                            (2)

  38. eexit_2:
  39. fput(file);
  40. eexit_1:
  41. DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
  42. current, epfd, events, maxevents, timeout, error));

  43. return error;
  44. }

函数主体来说主要关键就在ep_poll函数:

  1. static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
  2.          int maxevents, long timeout)
  3. {
  4.     int res, eavail;
  5.     unsigned long flags;
  6.     long jtimeout;
  7.     wait_queue_t wait;

  8.     /*
  9.      * Calculate the timeout by checking for the "infinite" value ( -1 )
  10.      * and the overflow condition. The passed timeout is in milliseconds,
  11.      * that why (t * HZ) / 1000.
  12.      */
  13.     jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
  14.         MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;

  15. retry:
  16.     write_lock_irqsave(&ep->lock, flags);

  17.     res = 0;
  18.     if (list_empty(&ep->rdllist)) {
  19.         /*
  20.          * We don't have any available event to return to the caller.
  21.          * We need to sleep here, and we will be wake up by
  22.          * ep_poll_callback() when events will become available.
  23.          */
  24.         init_waitqueue_entry(&wait, current);                                (1)
  25.         add_wait_queue(&ep->wq, &wait);                                      (2)

  26.         for (;;) {                                                           (3)
  27.             /*
  28.              * We don't want to sleep if the ep_poll_callback() sends us
  29.              * a wakeup in between. That's why we set the task state
  30.              * to TASK_INTERRUPTIBLE before doing the checks.
  31.              */
  32.             set_current_state(TASK_INTERRUPTIBLE);
  33.             if (!list_empty(&ep->rdllist) || !jtimeout)
  34.                 break;
  35.             if (signal_pending(current)) {
  36.                 res = -EINTR;
  37.                 break;
  38.             }

  39.             write_unlock_irqrestore(&ep->lock, flags);
  40.             jtimeout = schedule_timeout(jtimeout);
  41.             write_lock_irqsave(&ep->lock, flags);
  42.         }
  43.         remove_wait_queue(&ep->wq, &wait);

  44.         set_current_state(TASK_RUNNING);
  45.     }

  46.     /* Is it worth to try to dig for events ? */
  47.     eavail = !list_empty(&ep->rdllist);

  48.     write_unlock_irqrestore(&ep->lock, flags);

  49.     /*
  50.      * Try to transfer events to user space. In case we get 0 events and
  51.      * there's still timeout left over, we go trying again in search of
  52.      * more luck.
  53.      */
  54.     if (!res && eavail &&
  55.      !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout)        (4)
  56.         goto retry;

  57.     return res;
  58. }

这个就是一个死循环嘛,干嘛的呢,当然是等待当rdlist不为空的时候被唤醒并且跳出循环。高效吧,每次等待只需要sleep等别人叫。

好了,接下来就要把时间传给用户空间了。这里就不列ep_events_transfer代码了,这个函数主要是ep_collect_ready_items和ep_send_events函数。ep_collect_ready_items函数作用是收集准备好的epitem加到传送列表里面,ep_send_events则是负责传到用户空间里面。

 

 

阅读(897) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~