好久没继续写了,今天来完成最后一个epoll系统调用的分析,也是epoll花的时间最多那一部分。
好~先来看epoll_wait的代码:
- /*
- * Implement the event wait interface for the eventpoll file. It is the kernel
- * part of the user space epoll_wait(2).
- */
- asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
- int maxevents, int timeout)
- {
- int error;
- struct file *file;
- struct eventpoll *ep;
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
- current, epfd, events, maxevents, timeout));
- /* The maximum number of event must be greater than zero */
- if (maxevents <= 0)
- return -EINVAL;
- /* Verify that the area passed by the user is writeable */
- if ((error = verify_area(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))) (1)
- goto eexit_1;
- /* Get the "struct file *" for the eventpoll file */
- error = -EBADF;
- file = fget(epfd);
- if (!file)
- goto eexit_1;
- /*
- * We have to check that the file structure underneath the fd
- * the user passed to us _is_ an eventpoll file.
- */
- error = -EINVAL;
- if (!IS_FILE_EPOLL(file))
- goto eexit_2;
- /*
- * At this point it is safe to assume that the "private_data" contains
- * our own data structure.
- */
- ep = file->private_data;
- /* Time to fish for events ... */
- error = ep_poll(ep, events, maxevents, timeout); (2)
- eexit_2:
- fput(file);
- eexit_1:
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
- current, epfd, events, maxevents, timeout, error));
- return error;
- }
函数主体来说主要关键就在ep_poll函数:
- static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
- int maxevents, long timeout)
- {
- int res, eavail;
- unsigned long flags;
- long jtimeout;
- wait_queue_t wait;
- /*
- * Calculate the timeout by checking for the "infinite" value ( -1 )
- * and the overflow condition. The passed timeout is in milliseconds,
- * that why (t * HZ) / 1000.
- */
- jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
- MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
- retry:
- write_lock_irqsave(&ep->lock, flags);
- res = 0;
- if (list_empty(&ep->rdllist)) {
- /*
- * We don't have any available event to return to the caller.
- * We need to sleep here, and we will be wake up by
- * ep_poll_callback() when events will become available.
- */
- init_waitqueue_entry(&wait, current); (1)
- add_wait_queue(&ep->wq, &wait); (2)
- for (;;) { (3)
- /*
- * We don't want to sleep if the ep_poll_callback() sends us
- * a wakeup in between. That's why we set the task state
- * to TASK_INTERRUPTIBLE before doing the checks.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&ep->rdllist) || !jtimeout)
- break;
- if (signal_pending(current)) {
- res = -EINTR;
- break;
- }
- write_unlock_irqrestore(&ep->lock, flags);
- jtimeout = schedule_timeout(jtimeout);
- write_lock_irqsave(&ep->lock, flags);
- }
- remove_wait_queue(&ep->wq, &wait);
- set_current_state(TASK_RUNNING);
- }
- /* Is it worth to try to dig for events ? */
- eavail = !list_empty(&ep->rdllist);
- write_unlock_irqrestore(&ep->lock, flags);
- /*
- * Try to transfer events to user space. In case we get 0 events and
- * there's still timeout left over, we go trying again in search of
- * more luck.
- */
- if (!res && eavail &&
- !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) (4)
- goto retry;
- return res;
- }
这个就是一个死循环嘛,干嘛的呢,当然是等待当rdlist不为空的时候被唤醒并且跳出循环。高效吧,每次等待只需要sleep等别人叫。
好了,接下来就要把时间传给用户空间了。这里就不列ep_events_transfer代码了,这个函数主要是ep_collect_ready_items和ep_send_events函数。ep_collect_ready_items函数作用是收集准备好的epitem加到传送列表里面,ep_send_events则是负责传到用户空间里面。
阅读(1836) | 评论(0) | 转发(1) |