TCP读取数据学习-KWA2-ChinaUnix博客

KWA2

首页　| 　博文目录　| 　关于我

KWA2

博客访问： 428541
博文数量： 124
博客积分： 0
博客等级：民兵
技术积分： 872
用户组：普通用户
注册时间： 2018-03-29 14:38

个人简介

默默的一块石头

文章分类

全部博文（124）

其他（2）
mysql（0）
服务器学习（6）
设备系统（4）
虚拟机（2）
网络编程（81）
命名空间（1）
文件系统（5）
进程编程学习（5）
进程调度学习（3）
linux（14）
未分配的博文（1）

文章存档

2022年（26）

2021年（10）

2020年（28）

2019年（60）

我的朋友

相关博文

TCP读取数据学习

分类： LINUX

2020-10-20 21:39:19

Note:以下学习基于kernel 3.13.1与自己学习碰到的条件情况debug
1.tcp_v4_rcv()
/*
* From tcp_input.c
*/
int tcp_v4_rcv(struct sk_buff *skb)
{
sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
if (!sk)
goto no_tcp_socket;

if (!sock_owned_by_user(sk)) {
{
if (!tcp_prequeue(sk, skb)) --------recv情况（1）
ret = tcp_v4_do_rcv(sk, skb);--------recv情况（2）
}
} else if (unlikely(sk_add_backlog(sk, skb,sk->sk_rcvbuf + sk->sk_sndbuf))) ------recv情况（3）

sock_put(sk);
return ret;
}

static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,const __be16 sport,const __be16 dport)
{
struct sock *sk = skb_steal_sock(skb);
{
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
struct sock *sk = skb->sk;
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
return NULL;
}
}
}
----------------------------------------------------------
recv情况（1）
/* Packet is added to VJ-style prequeue for processing in process
* context, if a reader task is waiting. Apparently, this exciting
* idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
* failed somewhere. Latency? Burstiness? Well, at least now we will
* see, why it failed. 8)8) --ANK
*
*/
bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);

if (sysctl_tcp_low_latency || !tp->ucopy.task)
return false;

if (skb->len <= tcp_hdrlen(skb) &&
skb_queue_len(&tp->ucopy.prequeue) == 0)
return false;

__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
if (tp->ucopy.memory > sk->sk_rcvbuf) {
struct sk_buff *skb1;

while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
sk_backlog_rcv(sk, skb1);
}
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
wake_up_interruptible_sync_poll(sk_sleep(sk),POLLIN | POLLRDNORM | POLLRDBAND);
if (!inet_csk_ack_scheduled(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,(3 * tcp_rto_min(sk)) / 4,TCP_RTO_MAX);
}
return true;
}
Note:(1)通过 __skb_queue_tail(&tp->ucopy.prequeue, skb);把skb buffer加入tp->ucopy.prequeue列表
(2)wake_up_interruptible_sync_poll唤醒tcp_recvmasg读取信息
----------------------------------------------------------
recv情况（2）
1.tcp_v4_do_rcv()
/* The socket must have it's spinlock held when we get
* here.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
* This is because we cannot sleep with the original spinlock
* held.
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;

if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst = sk->sk_rx_dst;

sock_rps_save_rxhash(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, 0) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
}
tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
return 0;
}
}
2.tcp_queue_rcv()
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);

__skb_pull(skb, hdrlen);
eaten = (tail &&tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
}
return eaten;
}

----------------------------------------------------------
recv情况（3）
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
* packets, so that we won't get any new data or any
* packets that change the state of the socket.
*
* While locked, BH processing will add new packets to
* the backlog queue. This queue is processed by the
* owner of the socket lock right before it is released.
*
* Since ~2.3.5 it is also exclusive sleep lock serializing
* accesses from user process context.
*/
// sk_lock.slock locked, owned = 0, BH disabled
// sk_lock.slock unlocked, owned = 1, BH enabled
#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)

if (!sock_owned_by_user(sk)) {
}else if (unlikely(sk_add_backlog(sk, skb,sk->sk_rcvbuf + sk->sk_sndbuf))) {
}

#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)
----------------------------------------------------------------------------------->2022.1.11

进程上下文在访问该传输控制块之前需要调用lock_sock()锁定，
在访问完成后调用release_sock()将其释放，这两个函数的实现如下：
(1)lock_sock()
static inline void lock_sock(struct sock *sk)
{
lock_sock_nested(sk, 0);
}

void lock_sock_nested(struct sock *sk, int subclass)
{
//注意：调用lock_sock()可能会导致休眠
might_sleep();
//持有自旋锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果owned不为0，说明有进程持有该传输控制块，调用__lock_sock()等待，见下文
if (sk->sk_lock.owned)
__lock_sock(sk);
//上面__lock_sock()返回后现场已经被还原，即持有锁并且已经关闭下半部。

//将owned设置为1，表示本进程现在持有该传输控制块
sk->sk_lock.owned = 1;
//释放锁但是没有开启下半部
spin_unlock(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
*/
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
//开启下半部
local_bh_enable();
}

//__lock_sock()将进程挂到sk->sk_lock中的等待队列wq上，直到没有进程再持有该该传输
//控制块时返回。注意：调用时已经持有sk->sk_lock，睡眠之前释放锁，返回前再次持有锁
static void __lock_sock(struct sock *sk)
{
//定义一个等待队列结点
DEFINE_WAIT(wait);

//循环，直到sock_owned_by_user()返回0才结束
for (;;) {
//将调用进程挂接到锁的等待队列中
prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
TASK_UNINTERRUPTIBLE);
//释放锁并打开下半部
spin_unlock_bh(&sk->sk_lock.slock);
//执行一次调度
schedule();
//再次被调度到时会回到这里，首先持锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果没有进程再次持有该传输控制块，那么返回
if (!sock_owned_by_user(sk))
break;
}
finish_wait(&sk->sk_lock.wq, &wait);
}

(2)release_sock()
void release_sock(struct sock *sk)
{
/*
* The sk_lock has mutex_unlock() semantics:
*/
//调试相关，忽略
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);

//获取自旋锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果后备队列不为空，则调用__release_sock()处理后备队列中的数据包，见数据包的接收过程
if (sk->sk_backlog.tail)
__release_sock(sk);
//设置owned为0，表示调用者不再持有该传输控制块
sk->sk_lock.owned = 0;
//如果等待队列不为空，则唤醒这些等待的进程
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
//释放自旋锁并开启下半部
spin_unlock_bh(&sk->sk_lock.slock);
}
参考:blog.csdn.net/wangquan1992/article/details/108960282

----------------------------------------------------------
1.tcp_recvmsg()
/*
* This routine copies from a sock struct into the user buffer.
*
* Technical note: in 2.3 we work on _locked_ socket, so that
* tricks with *seq access order and skb->users are not required.
* Probably, code can be easily improved even more.
*/

int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len)
{
lock_sock(sk);

seq = &tp->copied_seq; //u32 copied_seq; /* Head of yet unread data */
if (flags & MSG_PEEK) {
peek_seq = tp->copied_seq;
seq = &peek_seq;
}

do {
u32 offset;

/* Next get a buffer. */
skb_queue_walk(&sk->sk_receive_queue, skb) {
/* Now that we have two receive queues this
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
"recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
flags))
break;

offset = *seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
offset--;
if (offset < skb->len)
goto found_ok_skb;
}

if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
/* Install new reader */
tp->ucopy.len = len;
/* Ugly... If prequeue is not empty, we have to
* process it before releasing socket, otherwise
* order will be broken at second iteration.
* More elegant solution is required!!!
*
* Look: we have the following (pseudo)queues:
*
* 1. packets in flight
* 2. backlog
* 3. prequeue
* 4. receive_queue
*
* Each queue can be processed only if the next ones
* are empty. At this point we have empty receive_queue.
* But prequeue _can_ be not empty after 2nd iteration,
* when we jumped to start of loop because backlog
* processing added something to receive_queue.
* We cannot release_sock(), because backlog contains
* packets arrived _after_ prequeued ones.
*
* Shortly, algorithm is clear --- to process all
* the queues in order. We could make it more directly,
* requeueing packets from backlog to prequeue, if
* is not empty. It is more elegant, but eats cycles,
* unfortunately.
*/
if (!skb_queue_empty(&tp->ucopy.prequeue))
goto do_prequeue;

/* __ Set realtime policy in scheduler __ */
}

if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
} else
sk_wait_data(sk, &timeo);

if (user_recv) {
int chunk;

/* __ Restore normal policy in scheduler __ */
if ((chunk = len - tp->ucopy.len) != 0) {
NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
}

if (tp->rcv_nxt == tp->copied_seq &&
!skb_queue_empty(&tp->ucopy.prequeue)) {
do_prequeue:
tcp_prequeue_process(sk);
if ((chunk = len - tp->ucopy.len) != 0) {
len -= chunk;
copied += chunk;
}
}
}
continue;

found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;//offset = *seq - TCP_SKB_CB(skb)->seq;
if (len < used)
used = len;

if (!(flags & MSG_TRUNC)) {
{
err = skb_copy_datagram_iovec(skb, offset,msg->msg_iov, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
copied = -EFAULT;
break;
}
}
}

*seq += used;
copied += used;
len -= used;

tcp_rcv_space_adjust(sk);
} while (len > 0);

release_sock(sk);
return copied;
}
EXPORT_SYMBOL(tcp_recvmsg);

2.tcp_prequeue_process()
static void tcp_prequeue_process(struct sock *sk)
{
/* RX process wants to run with disabled BHs, though it is not necessary */
local_bh_disable();
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
local_bh_enable();

/* Clear memory counter. */
tp->ucopy.memory = 0;
}

tcp_prequeue(sk, skb) --------recv情况（1）====>延迟接收数据
(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来
(2)tcp_v4_rcv()---->tcp_prequeue()--->__skb_queue_tail()/wake_up_interruptible_sync_poll();添加数据并唤醒tcp_recvmsg()
(3)tcp_recvmsg()---->if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue;---->tcp_prequeue_process()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据

tcp_v4_do_rcv(sk, skb)--------recv情况（2）====>立即接收数据
(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来
(2)tcp_v4_rcv()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据
(3)sk->sk_data_ready(sk, 0);唤醒tcp_recvmsg()
void sock_init_data(struct socket *sock, struct sock *sk)
{
sk->sk_data_ready = sock_def_readable;
}
static void sock_def_readable(struct sock *sk, int len)
{
struct socket_wq *wq;

rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}

阅读(1667) | 评论(0) | 转发(0) |

上一篇：重传----超时重传

下一篇：QUIC和TCP

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6