分类: LINUX
2008-08-24 18:00:27
TCP/IP Stack – UDP
Beta-song @
本文从流程上简要分析了UDP数据发送和接收数据包的过程,并没有深入协议细节。
UDP协议入口
net/ipv4/f_inet.c, UDP操作集
const struct proto_ops inet_dgram_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_dgram_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
.poll = udp_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
};
上述ops函数最终会对应到udp协议上的相关函数,比如inet_sendmsg在内部是通过如下调用实现的:sk->sk_prot->sendmsg(iocb, sk, msg, size),其中, struct proto *skc_prot指向的是如下的udp_prot。
net/ipv4/udp.c,UDP协议
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
};
发送过程对应函数udp_sendmsg,接收函数对应udp_recvmsg,接下来进行分别分析。
发送过程
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len){
//第一步:检查是否有pending的帧,有的话则跳转到第四步进行发送
//第二步:获取路由,如果不存则通过ip_route_output_flow新建一个
rt = (struct rtable*)sk_dst_check(sk, 0); …………
//第三步:堵塞udp_sock来发送数据,防止当前包未发送完,又有新数据要发送
up->pending = AF_INET;…………
//第四步:收集数据并发送(数据有可能在iovec向量中,所以有必要先收集到一起)
// make one large IP datagram from many pieces of data.
//Each pieces will be holded on the socket until ip_push_pending_frames() is called
// ip_append_data 函数内部调用__skb_queue_tail(&sk->sk_write_queue, skb);将数据包排队在写队列上
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), &ipc, rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk); //出错了,扔掉所有当前包的数据,并关闭pending
else if (!corkreq)
err = udp_push_pending_frames(sk); //调用ip_push_pending_frames发送数据
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0; //取消pending,以便新数据可以进行发送
release_sock(sk);
}
/*Combined all pending IP fragments on the socket as one IP datagram and push them out. */
int ip_push_pending_frames(struct sock *sk){
//第一步:从写队列中收集所有的数据
while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {……}
//第二步:发送
err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output); //dst_output将数据从传输层送至网络层
}
/* Output packet to network from transport. */
static inline int dst_output(struct sk_buff *skb){
return skb->dst->output(skb); //本路由表项的发送函数是网络层的ip_output,
//继续调用ip_finish_output……
}
接收过程
Net/ipv4/udp.c
int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len){
skb = skb_recv_datagram(sk, flags, noblock, &err); //有数据就返回之,反之则阻塞
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied); //拷贝数据到用户空间
}
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err){
do {
skb = skb_dequeue(&sk->sk_receive_queue); //从sock接收队列中取出数据包
} while (!wait_for_packet(sk, err, &timeo)); //必要时会阻塞
}
static int wait_for_packet(struct sock *sk, int *err, long *timeo_p){
DEFINE_WAIT(wait); //当前进程加入等待队列
prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
if (!skb_queue_empty(&sk->sk_receive_queue))
goto out; //不用等待,因为有数据
*timeo_p = schedule_timeout(*timeo_p); //本进程休眠
out:
finish_wait(sk->sk_sleep, &wait); //将本进程从等待队列删除,结束等待
return error;
}
关于timeo_p
关于休眠时间的问题,是这样设置的:noblock ? 0 : sk->sk_rcvtimeo;
如果非阻塞调用,休眠时间是0,就是不阻塞;否则阻塞sk->sk_rcvtimeo长的时间。
在sock_init_data中对该时间设置为MAX_SCHEDULE_TIMEOUT,而该值定义为LONG_MAX,即永不超时。
当没有数据可读的时候,udp进程就会阻塞等待;以下是net core接收数据流程,并唤醒udp进程的过程。
Net/ipv4/ip_input.c
static inline int ip_local_deliver_finish(struct sk_buff *skb){
if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {} //定位到具体的传输层协议
ret = ipprot->handler(skb); //调用其handler,由以下分析可知,该handler是udp_rcv
}
Net/ipv4/af_inet.c
static int __init inet_init(void){
(void)sock_register(&inet_family_ops);
if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0){}
}
static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
};
Net/ipv4/udp.c
int udp_rcv(struct sk_buff *skb)
à __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
à udp_queue_rcv_skb(sk, skb);
à sock_queue_rcv_skb(sk,skb);
à skb_queue_tail(&sk->sk_receive_queue, skb); //放到sock的接收队列
à sk->sk_data_ready(sk, skb_len); //向上通知数据准备好了
针对inet协议簇,sk_data_ready就是sock_def_readable,见如下分析:
static struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create, //调用sock_init_data(sock, sk);
.owner = THIS_MODULE,
};
void sock_init_data(struct socket *sock, struct sock *sk){
sk->sk_data_ready = sock_def_readable;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
}
static void sock_def_readable(struct sock *sk, int len){
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
sk_wake_async(sk,1,POLL_IN);
read_unlock(&sk->sk_callback_lock);
}
可见,sock_def_readable唤醒等待在该socket上的udp进程,允许其继续读取数据。
欢迎转载,转载请注明出处,谢谢。
http://blog.chinaunix.net/u/8754/showart_1145897.html
2008-08-24