Chinaunix首页 | 论坛 | 博客
  • 博客访问: 416954
  • 博文数量: 124
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 872
  • 用 户 组: 普通用户
  • 注册时间: 2018-03-29 14:38
个人简介

默默的一块石头

文章分类

全部博文(124)

文章存档

2022年(26)

2021年(10)

2020年(28)

2019年(60)

我的朋友

分类: LINUX

2020-10-20 21:39:19

Note:以下学习基于kernel 3.13.1与自己学习碰到的条件情况debug
1.tcp_v4_rcv()
/*
 * From tcp_input.c
 */
int tcp_v4_rcv(struct sk_buff *skb)
{
sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
if (!sk)
goto no_tcp_socket;

if (!sock_owned_by_user(sk)) {
{
if (!tcp_prequeue(sk, skb)) --------recv情况(1)
ret = tcp_v4_do_rcv(sk, skb);--------recv情况(2)
}
} else if (unlikely(sk_add_backlog(sk, skb,sk->sk_rcvbuf + sk->sk_sndbuf))) ------recv情况(3)

sock_put(sk);
return ret;
}

static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
     struct sk_buff *skb,const __be16 sport,const __be16 dport)
{
struct sock *sk = skb_steal_sock(skb);
{
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
{
if (skb->sk) {
struct sock *sk = skb->sk;
skb->destructor = NULL;
skb->sk = NULL;
return sk;
}
return NULL;
}
}
}
----------------------------------------------------------
recv情况(1)
/* Packet is added to VJ-style prequeue for processing in process
 * context, if a reader task is waiting. Apparently, this exciting
 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
 * failed somewhere. Latency? Burstiness? Well, at least now we will
 * see, why it failed. 8)8)   --ANK
 *
 */
bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);

if (sysctl_tcp_low_latency || !tp->ucopy.task)
return false;

if (skb->len <= tcp_hdrlen(skb) &&
    skb_queue_len(&tp->ucopy.prequeue) == 0)
return false;

__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
if (tp->ucopy.memory > sk->sk_rcvbuf) {
struct sk_buff *skb1;

while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
sk_backlog_rcv(sk, skb1);
}
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
wake_up_interruptible_sync_poll(sk_sleep(sk),POLLIN | POLLRDNORM | POLLRDBAND);
if (!inet_csk_ack_scheduled(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,(3 * tcp_rto_min(sk)) / 4,TCP_RTO_MAX);
}
return true;
}
Note:(1)通过 __skb_queue_tail(&tp->ucopy.prequeue, skb);把skb buffer加入tp->ucopy.prequeue列表
        (2)wake_up_interruptible_sync_poll唤醒tcp_recvmasg读取信息
----------------------------------------------------------
recv情况(2)
1.tcp_v4_do_rcv()

/* The socket must have it's spinlock held when we get
 * here.
 *
 * We have a potential double-lock case here, so even when
 * doing backlog processing we use the BH locking scheme.
 * This is because we cannot sleep with the original spinlock
 * held.
 */
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;

if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst = sk->sk_rx_dst;

sock_rps_save_rxhash(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
    dst->ops->check(dst, 0) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
}
tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
return 0;
}
}
2.tcp_queue_rcv()
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
  bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);

__skb_pull(skb, hdrlen);
eaten = (tail &&tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
}
return eaten;
}

----------------------------------------------------------
recv情况(3)
/* Used by processes to "lock" a socket state, so that
 * interrupts and bottom half handlers won't change it
 * from under us. It essentially blocks any incoming
 * packets, so that we won't get any new data or any
 * packets that change the state of the socket.
 *
 * While locked, BH processing will add new packets to
 * the backlog queue.  This queue is processed by the
 * owner of the socket lock right before it is released.
 *
 * Since ~2.3.5 it is also exclusive sleep lock serializing
 * accesses from user process context.
 */
 //  sk_lock.slock locked, owned = 0, BH disabled
 //  sk_lock.slock unlocked, owned = 1, BH enabled

#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)

if (!sock_owned_by_user(sk)) {
}else if (unlikely(sk_add_backlog(sk, skb,sk->sk_rcvbuf + sk->sk_sndbuf))) {
}

#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)
----------------------------------------------------------------------------------->2022.1.11
进程上下文在访问该传输控制块之前需要调用lock_sock()锁定,
在访问完成后调用release_sock()将其释放,这两个函数的实现如下:

(1)lock_sock()
static inline void lock_sock(struct sock *sk)
{
lock_sock_nested(sk, 0);
}
 
void lock_sock_nested(struct sock *sk, int subclass)
{
//注意:调用lock_sock()可能会导致休眠
might_sleep();
//持有自旋锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果owned不为0,说明有进程持有该传输控制块,调用__lock_sock()等待,见下文
if (sk->sk_lock.owned)
__lock_sock(sk);
//上面__lock_sock()返回后现场已经被还原,即持有锁并且已经关闭下半部。
 
//将owned设置为1,表示本进程现在持有该传输控制块
sk->sk_lock.owned = 1;
//释放锁但是没有开启下半部
spin_unlock(&sk->sk_lock.slock);
/*
 * The sk_lock has mutex_lock() semantics here:
 */
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
//开启下半部
    local_bh_enable();
}
 
//__lock_sock()将进程挂到sk->sk_lock中的等待队列wq上,直到没有进程再持有该该传输
//控制块时返回。注意:调用时已经持有sk->sk_lock,睡眠之前释放锁,返回前再次持有锁
static void __lock_sock(struct sock *sk)
{
//定义一个等待队列结点
DEFINE_WAIT(wait);
 
//循环,直到sock_owned_by_user()返回0才结束
for (;;) {
//将调用进程挂接到锁的等待队列中
prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
TASK_UNINTERRUPTIBLE);
//释放锁并打开下半部
spin_unlock_bh(&sk->sk_lock.slock);
//执行一次调度
schedule();
//再次被调度到时会回到这里,首先持锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果没有进程再次持有该传输控制块,那么返回
if (!sock_owned_by_user(sk))
break;
}
finish_wait(&sk->sk_lock.wq, &wait);
}
 
(2)release_sock()
void release_sock(struct sock *sk)
{
/*
 * The sk_lock has mutex_unlock() semantics:
 */
//调试相关,忽略
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 
//获取自旋锁并关闭下半部
spin_lock_bh(&sk->sk_lock.slock);
//如果后备队列不为空,则调用__release_sock()处理后备队列中的数据包,见数据包的接收过程
if (sk->sk_backlog.tail)
__release_sock(sk);
//设置owned为0,表示调用者不再持有该传输控制块
sk->sk_lock.owned = 0;
//如果等待队列不为空,则唤醒这些等待的进程
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
//释放自旋锁并开启下半部
spin_unlock_bh(&sk->sk_lock.slock);
}
参考:blog.csdn.net/wangquan1992/article/details/108960282

----------------------------------------------------------
1.tcp_recvmsg()
/*

 * This routine copies from a sock struct into the user buffer.
 *
 * Technical note: in 2.3 we work on _locked_ socket, so that
 * tricks with *seq access order and skb->users are not required.
 * Probably, code can be easily improved even more.
 */

int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len)
{
 lock_sock(sk);

seq = &tp->copied_seq; //u32 copied_seq; /* Head of yet unread data */
if (flags & MSG_PEEK) {
peek_seq = tp->copied_seq;
seq = &peek_seq;
}

do {
u32 offset;

/* Next get a buffer. */
skb_queue_walk(&sk->sk_receive_queue, skb) {
/* Now that we have two receive queues this
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
"recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
flags))
break;

offset = *seq - TCP_SKB_CB(skb)->seq;
if (tcp_hdr(skb)->syn)
offset--;
if (offset < skb->len)
goto found_ok_skb;
}

if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
/* Install new reader */
tp->ucopy.len = len;
/* Ugly... If prequeue is not empty, we have to
* process it before releasing socket, otherwise
* order will be broken at second iteration.
* More elegant solution is required!!!
*
* Look: we have the following (pseudo)queues:
*
* 1. packets in flight
* 2. backlog
* 3. prequeue
* 4. receive_queue
*
* Each queue can be processed only if the next ones
* are empty. At this point we have empty receive_queue.
* But prequeue _can_ be not empty after 2nd iteration,
* when we jumped to start of loop because backlog
* processing added something to receive_queue.
* We cannot release_sock(), because backlog contains
* packets arrived _after_ prequeued ones.
*
* Shortly, algorithm is clear --- to process all
* the queues in order. We could make it more directly,
* requeueing packets from backlog to prequeue, if
* is not empty. It is more elegant, but eats cycles,
* unfortunately.
*/
if (!skb_queue_empty(&tp->ucopy.prequeue))
goto do_prequeue;

/* __ Set realtime policy in scheduler __ */
}

if (copied >= target) {
/* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
} else
sk_wait_data(sk, &timeo);

if (user_recv) {
int chunk;

/* __ Restore normal policy in scheduler __ */
if ((chunk = len - tp->ucopy.len) != 0) {
NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
}

if (tp->rcv_nxt == tp->copied_seq &&
    !skb_queue_empty(&tp->ucopy.prequeue)) {
do_prequeue:
tcp_prequeue_process(sk);
if ((chunk = len - tp->ucopy.len) != 0) {
len -= chunk;
copied += chunk;
}
}
}
continue;

found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;//offset = *seq - TCP_SKB_CB(skb)->seq;
if (len < used)
used = len;

if (!(flags & MSG_TRUNC)) {
{
err = skb_copy_datagram_iovec(skb, offset,msg->msg_iov, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
copied = -EFAULT;
break;
}
}
}

*seq += used;
copied += used;
len -= used;

tcp_rcv_space_adjust(sk);
} while (len > 0);

release_sock(sk);
return copied;
}
EXPORT_SYMBOL(tcp_recvmsg);

2.tcp_prequeue_process()
static void tcp_prequeue_process(struct sock *sk)
{
/* RX process wants to run with disabled BHs, though it is not necessary */
local_bh_disable();
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
local_bh_enable();

/* Clear memory counter. */
tp->ucopy.memory = 0;
}

tcp_prequeue(sk, skb) --------recv情况(1)====>延迟接收数据
(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来
(2)tcp_v4_rcv()---->
tcp_prequeue()--->__skb_queue_tail()/wake_up_interruptible_sync_poll();添加数据并唤醒tcp_recvmsg()
(3)
tcp_recvmsg()---->if (!skb_queue_empty(&tp->ucopy.prequeue)) goto do_prequeue;---->tcp_prequeue_process()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据

tcp_v4_do_rcv(sk, skb)--------recv情况(2)====>立即接收数据
(1)tcp_recvmsg()----> sk_wait_data();睡眠等待数据的到来
(2)tcp_v4_rcv()---->tcp_do_v4_rcv()---->tcp_rcv_established()---->tcp_queue_rcv()接收数据
(3)sk->sk_data_ready(sk, 0);唤醒tcp_recvmsg()
void sock_init_data(struct socket *sock, struct sock *sk)
{
sk->sk_data_ready = sock_def_readable;
}
static void sock_def_readable(struct sock *sk, int len)
{
struct socket_wq *wq;

rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
POLLRDNORM | POLLRDBAND);
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
rcu_read_unlock();
}

阅读(1635) | 评论(0) | 转发(0) |
0

上一篇:重传----超时重传

下一篇:QUIC和TCP

给主人留下些什么吧!~~