epoll_wait系统实现如下:
- asmlinkage long sys_epoll_wait(int epfd,struct epoll_event __user *events,
-
int maxevents,int timeout)
-
{
-
int error;
-
struct file *file;
-
struct eventpoll *ep;
-
//#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
-
//178956970(1.7亿)
-
if(maxevents <=0 || maxevents > EP_MAX_EVETNS)
-
return -EINVAL;
-
//判断返回事件数组是否合法
-
if(!access_ok(VERIFY_WRITE,events,
-
maxevents * sizeof(struct epoll_event)))
-
{
-
error = -EFAULT;
-
goto error_return;
-
}
-
-
error = -EBADF;
-
file = fget(epfd);
-
-
if(!file)
-
goto error_return;
-
error = -EINVAL;
-
if(!is_file_epoll(file))
-
goto error_fput;
-
//将epoll注册时设置的数据结构取出来,开始进行判断
-
ep = file->private_data;
-
error = ep_poll(ep,events,maxevents,timeout);
-
….......
-
}
现在又转入了ep_poll函数中:
- static int ep_poll(struct eventpoll *ep,struct epoll_event __user *events,
-
int maxevents,long timeout)
-
{
-
int res,avail;
-
unsigned long flags;
-
long jtimeout;
-
wait_queue_t wait;
-
-
//注册的0ms按0.999 Jiffies处理,并非真正的0s,HZ=100,
-
//jiffies/HZ 为s
-
jtimeout = (timeout<0 || timeout >= EP_MAX_MSTIMEO)?
-
MAX_SCHEDULE_TIMEOUT:(timeout*HZ+999)/1000;
-
-
retry:
-
spin_lock_irqsave(&ep->lock,flags);
-
-
res = 0;
-
//事件就绪队列为空,就监听poll
-
if(list_empty(&ep->rdllist))
-
{
-
//让当前进程挂在等待队列wait上,并将该等待队列加入到ep->wq(epoll_wait的 专属队列中),
-
init_waitqueue_entry(&wait,current);
-
wait.flags |= WQ_FLAG_EXCLUSIVE;
-
__add_wait_queue(&ep->wq,&wait);
-
-
for(;;){
-
//进程设置睡眠状态,等到信息时变唤醒
-
set_current_state(TASK_INTERRUPTIBLE);
-
if(!list_empty(&ep->rdllist) || !jtimeout)//只要事件到来,就返回
-
break;
-
if(signal_pending(current)) {//被信号中断就会返回
-
res = -EINTR;
-
break;
-
}
-
spin_unlock_irqrestore(&ep->lock,flags);
-
//进程进入睡眠状态直到规定的睡眠事件醒来或者注册的fd对应的poll驱动函数唤醒该 进程
-
jtimeout = schedule_timeout(jtimeout);
-
spin_lock_irqrestore(&ep->lock,flags);
-
}
-
//poll驱动唤醒了该进程,现在就将对应的poll从等待队列中清除出去,并设置为运行状态
-
__remove_wait_queue(&ep->wq,&wait);
-
set_current_state(TASK_RUNNING);
-
}
-
eavail = !list_empty(&ep->rdllist);
-
spin_unlock_irqrestore(&ep->lock,flags);
-
//没有被中断,有就绪事件,并且向用户空间发送成功,就返回
-
if(!res && eavail && !(res = ep_send_events(ep,events,maxevents))
-
&&jtimeout)
-
goto retry;
-
-
return res;
-
}
ep_send_events函数向用户空间发送就绪事件:
- static int ep_send_events(struct eventpoll *ep,struct epoll_event __user *events,int maxevents)
-
{
-
int eventcnt,error = -EFAULT,pwake = 0;
-
unsigned int revents;
-
unsigned long flags;
-
struct epitem *epi,*nepi;
-
struct list_head txlist;
-
-
INIT_LIST_HEAD(&txlist);
-
mutex_lock(&ep->mtx);
-
-
spin_lock_irqsave(&ep->lock,flags);
-
//将ep->rdllist链表加入到txlist链表中去,这样的话rdllist链表就为空了
-
list_splice(&ep->rdllist,&txlist);
-
INIT_LIST_HEAD(&ep->rdllist);
-
ep->ovflist = NULL;
-
spin_unlock_irqrestore(&ep->lock,flags);
-
//将rdllist链表中的每一项都发送至用户空间
-
for(eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
-
-
epi = list_first_entry(&txlist,struct epitem,rdllink);
-
list_del_init(&epi->rdllink);
-
//立刻返回当前文件的就绪事件
-
revents = epi->ffd.file->f_op->poll(epi->ffd.file,NULL);
-
revents &= epi->event.events;
-
-
if(revents) {
-
//将就绪事件的poll_event发送至用户空间
-
if(__put_user(revents,&events[eventcnt.].events) ||
-
__put_user(epi->event.data,&events[eventcnt].data))
-
-
goto errxit;
-
//#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
-
if(epi->event.events & EPOLLONESHOT)
-
epi->event.events &= EP_PRIVATE_BITS;
-
eventcnt++;
-
}
-
//非边缘触发,且事件就绪时,就将epi->rdllink加入到rdllist链表中,实际上就是将没有标记为ET模式的fd又放回到rdllist中,这样下次就绪时又能将其发送至用户空间了
-
if(!(epi->event.events & EPOLLET) && (revents &
-
epi->event.events))
-
list_add_tail(&epi->rdllink,&ep->rdllist);
-
}
-
error = 0;
-
errixt:
-
spin_lock_irqsave(&ep->lock,flags);
-
//在执行上面的代码期间,又有可能有就绪事件,这样的话就进入了ovflist队列,这样有需要再一次确认一次
-
for(nepi = ep->ovflist;(epi = nepi)!= NULL;
-
nepi = epi->next;epi->next = EP_UNACTIVE_PTR) {
-
//链表为空且没有ET事件发生,#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET),这里也和上面的一样
-
if(!ep_is_linked(&epi->rdllink) && (epi->event.events &
-
~EP_PRIVATE_BITS))
-
//又将rdllink其加入到rdllist中
-
list_add_tail(&epi->rdllink,&ep->rdllist);
-
}
-
//#define EP_UNACTIVE_PTR ((void*) -1L)
-
ep->ovflist = EP_UNACTIVE_PTR;
-
list_spice(&txlist,&ep->rdllist);//现在又将txlist链表加入到rdllist链表中去
-
if(!list_empty(&ep->rdllist))
-
{
-
//等待的队列不为空
-
if(waitqueue_active(&ep->wq))
-
-
__wake_up_locked(&ep->wq,TASK_UNINTERRUPTIBLE |
-
TASK_INTERRUPTIBLE);
-
//如果poll队列不为空,则唤醒的次数加1
-
if(waitqueue_active(&ep->poll_wait))
-
pwake++;
-
}
-
spin_unlock_irqrestore(&ep->lock,flags);
-
mutex_unlock(&ep->mtx);
-
if(pwake)
-
ep_poll_safewake(&psw,&ep->poll_wait);
-
return eventcnt == 0?error:eventcnt;
-
}
这样epoll_wait的调用顺序为:
参考资料:
linux-2.6.24.3源代码
阅读(3223) | 评论(0) | 转发(0) |