内核版本2.6.21.5
1. recvfrom 函数原型
int recvfrom(int socket, void *buffer, size tsize, int flags, struct sockaddr *addr, socklen t *length-ptr)
这个void *buffer是进程用户空间地址,可以是栈上的地址,也可以是你用malloc创建的堆地址。struct sockaddr *addr, socklen t *length-ptr 分别用来存储对端的地址信息和长度,注意这个长度用的是指针表示,别弄错了。
2. 看看这个函数的源代码
-
asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
-
unsigned flags, struct sockaddr __user *addr,
-
int __user *addr_len)
-
{
-
struct socket *sock;
-
struct iovec iov; //缓冲区向量
-
struct msghdr msg;
-
char address[MAX_SOCK_ADDR]; //保存对端地址信息
-
int err, err2;
-
struct file *sock_file;
-
int fput_needed;
-
-
sock_file = fget_light(fd, &fput_needed);
-
err = -EBADF;
-
if (!sock_file)
-
goto out;
-
-
sock = sock_from_file(sock_file, &err);
-
if (!sock)
-
goto out_put;
-
-
msg.msg_control = NULL;
-
msg.msg_controllen = 0;
-
msg.msg_iovlen = 1;
-
msg.msg_iov = &iov;
-
iov.iov_len = size;
-
iov.iov_base = ubuf;
-
msg.msg_name = address;
-
msg.msg_namelen = MAX_SOCK_ADDR;
-
if (sock->file->f_flags & O_NONBLOCK) //看是否设置了非阻塞熟悉,默认是阻塞的,也就是如果没有数据包到来,进程会阻塞在这个系统调用里。
-
flags |= MSG_DONTWAIT;
-
err = sock_recvmsg(sock, &msg, size, flags);
-
-
if (err >= 0 && addr != NULL) {
-
err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); //得到对端地址信息,返回给api
-
if (err2 < 0)
-
err = err2;
-
}
-
out_put:
-
fput_light(sock_file, fput_needed);
-
out:
-
return err;
-
}
上面代码是与之对应的系统调用服务函数,直接看sock_recvmsg吧:
-
int sock_recvmsg(struct socket *sock, struct msghdr *msg,
-
size_t size, int flags)
-
{
-
struct kiocb iocb;
-
struct sock_iocb siocb;
-
int ret;
-
-
init_sync_kiocb(&iocb, NULL);
-
iocb.private = &siocb;
-
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-
if (-EIOCBQUEUED == ret)
-
ret = wait_on_sync_kiocb(&iocb);
-
return ret;
-
}
-
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
-
struct msghdr *msg, size_t size, int flags)
-
{
-
int err;
-
struct sock_iocb *si = kiocb_to_siocb(iocb);
-
-
si->sock = sock;
-
si->scm = NULL;
-
si->msg = msg;
-
si->size = size;
-
si->flags = flags;
-
-
err = security_socket_recvmsg(sock, msg, size, flags);
-
if (err)
-
return err;
-
-
return sock->ops->recvmsg(iocb, sock, msg, size, flags);
-
}
还是直接看
-
int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
-
struct msghdr *msg, size_t size, int flags)
-
{
-
struct sock *sk = sock->sk;
-
int addr_len = 0;
-
int err;
-
-
err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
-
flags & ~MSG_DONTWAIT, &addr_len);
-
if (err >= 0)
-
msg->msg_namelen = addr_len;
-
return err;
-
}
-
int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
-
size_t len, int noblock, int flags, int *addr_len)
-
{
-
struct inet_sock *inet = inet_sk(sk);
-
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; //store peer address
-
struct sk_buff *skb;
-
int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
-
-
/*
-
* Check any passed addresses
-
*/
-
if (addr_len)
-
*addr_len=sizeof(*sin);
-
-
if (flags & MSG_ERRQUEUE) /* Fetch message from error queue. */
-
return ip_recv_error(sk, msg, len);
-
-
try_again:
-
skb = skb_recv_datagram(sk, flags, noblock, &err); //这个函数取得了skb
-
if (!skb)
-
goto out;
-
-
copied = skb->len - sizeof(struct udphdr); //如果收到的data大于用户想要的长度,截断处理
-
if (copied > len) {
-
copied = len;
-
msg->msg_flags |= MSG_TRUNC; //例如,用户想要100个字节,skb中确有1000字节,那么只从skb拷贝出前100个字节,并设置标志MSG_TRUNC
-
}
-
-
/*
-
* Decide whether to checksum and/or copy data.
-
*
-
* UDP: checksum may have been computed in HW,
-
* (re-)compute it if message is truncated.
-
* UDP-Lite: always needs to checksum, no HW support.
-
*/
-
copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
-
-
if (is_udplite || (!copy_only && msg->msg_flags&MSG_TRUNC)) {
-
if (__udp_lib_checksum_complete(skb))
-
goto csum_copy_err;
-
copy_only = 1;
-
}
-
-
if (copy_only)
-
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), //在这里完成了data的拷贝
-
msg->msg_iov, copied );
-
else {
-
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
-
-
if (err == -EINVAL)
-
goto csum_copy_err;
-
}
-
-
if (err)
-
goto out_free;
-
-
sock_recv_timestamp(msg, sk, skb); //打上时间戳
-
-
/* Copy the address. */
-
if (sin)
-
{
-
sin->sin_family = AF_INET;
-
sin->sin_port = skb->h.uh->source;
-
sin->sin_addr.s_addr = skb->nh.iph->saddr; //在这里取得对端的IP地址和端口号
-
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-
}
-
if (inet->cmsg_flags)
-
ip_cmsg_recv(msg, skb);
-
-
err = copied;
-
if (flags & MSG_TRUNC)
-
err = skb->len - sizeof(struct udphdr);
-
-
out_free:
-
skb_free_datagram(sk, skb);
-
out:
-
return err; //err返回的是拷贝的字节数
-
-
csum_copy_err:
-
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
-
-
skb_kill_datagram(sk, skb, flags);
-
-
if (noblock)
-
return -EAGAIN;
-
goto try_again;
-
}
3. 从上面的代码中我们看到有2个重要的函数:skb_recv_datagram和skb_copy_datagram_iovec,一个是取得skb, 一个是从skb中拷贝数据
-
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-
int noblock, int *err)
-
{
-
struct sk_buff *skb;
-
long timeo;
-
/*
-
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
-
*/
-
int error = sock_error(sk);
-
-
if (error)
-
goto no_packet;
-
-
timeo = sock_rcvtimeo(sk, noblock); //如果设置为非阻塞模式timeo为0,否则是你用setsockopt设置的时间,没设置
-
//的话默认是一直阻塞,在创建socket的时候sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
-
-
do {
-
/* Again only user level code calls this function, so nothing
-
* interrupt level will suddenly eat the receive_queue.
-
*
-
* Look at current nfs client by the way...
-
* However, this function was corrent in any case. 8)
-
*/
-
if (flags & MSG_PEEK) {
-
unsigned long cpu_flags;
-
-
spin_lock_irqsave(&sk->sk_receive_queue.lock,
-
cpu_flags);
-
skb = skb_peek(&sk->sk_receive_queue);
-
if (skb)
-
atomic_inc(&skb->users);
-
spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
-
cpu_flags);
-
} else
-
skb = skb_dequeue(&sk->sk_receive_queue); //一目了然,从接受队列中取得skb,在这里还记得我们把skb挂入到sk_receive_queue队列?
-
-
if (skb)
-
return skb; //不管是阻塞还是非阻塞,有skb直接返回, ideal
-
-
/* User doesn't want to wait */
-
error = -EAGAIN;
-
if (!timeo) //non_block直接返回或指定的时间减少到0了,不等了
-
goto no_packet;
-
-
} while (!wait_for_packet(sk, err, &timeo)); //只有在这里等了,等待一个指定时间,或痴情的一直等下去...
-
-
return NULL;
-
-
no_packet:
-
*err = error;
-
return NULL;
-
}
-
/*
-
* Wait for a packet..
-
*/
-
static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
-
{
-
int error;
-
DEFINE_WAIT(wait);
-
-
prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); //把当前进程挂入socket等待队列中sk->sk_sleep
-
-
/* Socket errors? */
-
error = sock_error(sk);
-
if (error)
-
goto out_err;
-
-
if (!skb_queue_empty(&sk->sk_receive_queue)) //看是否有skb,有的话直接error = 0 return 去接收skb
-
goto out;
-
-
/* Socket shut down? */
-
if (sk->sk_shutdown & RCV_SHUTDOWN) //看在等待的时候,socket是否被shut down了。如shut down 了error = 1; 不接收skb了
-
goto out_noerr;
-
-
/* Sequenced packets can come disconnected.
-
* If so we report the problem
-
*/
-
error = -ENOTCONN;
-
if (connection_based(sk) && //看这里是对tcp的处理,先不管
-
!(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
-
goto out_err;
-
-
/* handle signals */
-
if (signal_pending(current)) //当等待的时候来了中断...
-
goto interrupted;
-
-
error = 0;
-
*timeo_p = schedule_timeout(*timeo_p); //如果指定了时间,减少时间
-
out:
-
finish_wait(sk->sk_sleep, &wait);
-
return error;
-
interrupted:
-
error = sock_intr_errno(*timeo_p); //返回ERESTARTSYS或EINTR给应用层处理,我们在应用层要对这个中断信号进程处理,一般的话就是继续这个系统调用:
-
//if(errno == EINTR) continue;
-
out_err:
-
*err = error;
-
goto out;
-
out_noerr:
-
*err = 0;
-
error = 1;
-
goto out;
-
}
上面2个代码段是取得skb的过程,从中我们看到涉及到我们在应用层设置非阻塞,此系统调用返回时间,对中断信号的处理等问题,下面我们看看数据拷贝过程吧:
-
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
-
struct iovec *to, int len)
-
{
-
int start = skb_headlen(skb); //这个是数据包总长度减去分片中数据包的长度
-
int i, copy = start - offset;
-
-
/* Copy header. */
-
if (copy > 0) { //如果没有分片的话,在这里就一次拷贝完了,如果有分片的话,就拷贝第一个分片
-
if (copy > len)
-
copy = len;
-
if (memcpy_toiovec(to, skb->data + offset, copy))
-
goto fault;
-
if ((len -= copy) == 0)
-
return 0;
-
offset += copy;
-
}
-
-
/* Copy paged appendix. Hmm... why does this look so complicated? */ //在这里拷贝其他分片中的数据包,关于分片以后再说吧...
-
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-
int end;
-
-
BUG_TRAP(start <= offset + len);
-
-
end = start + skb_shinfo(skb)->frags[i].size;
-
if ((copy = end - offset) > 0) {
-
int err;
-
u8 *vaddr;
-
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
struct page *page = frag->page;
-
-
if (copy > len)
-
copy = len;
-
vaddr = kmap(page);
-
err = memcpy_toiovec(to, vaddr + frag->page_offset +
-
offset - start, copy);
-
kunmap(page);
-
if (err)
-
goto fault;
-
if (!(len -= copy))
-
return 0;
-
offset += copy;
-
}
-
start = end;
-
}
-
-
if (skb_shinfo(skb)->frag_list) {
-
struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-
for (; list; list = list->next) {
-
int end;
-
-
BUG_TRAP(start <= offset + len);
-
-
end = start + list->len;
-
if ((copy = end - offset) > 0) {
-
if (copy > len)
-
copy = len;
-
if (skb_copy_datagram_iovec(list,
-
offset - start,
-
to, copy))
-
goto fault;
-
if ((len -= copy) == 0)
-
return 0;
-
offset += copy;
-
}
-
start = end;
-
}
-
}
-
if (!len)
-
return 0;
-
-
fault:
-
return -EFAULT;
-
}
over ...
阅读(14013) | 评论(0) | 转发(0) |