TCP 接收缓冲区学习笔记-KWA2-ChinaUnix博客

KWA2

首页　| 　博文目录　| 　关于我

KWA2

博客访问： 428446
博文数量： 124
博客积分： 0
博客等级：民兵
技术积分： 872
用户组：普通用户
注册时间： 2018-03-29 14:38

个人简介

默默的一块石头

文章分类

全部博文（124）

其他（2）
mysql（0）
服务器学习（6）
设备系统（4）
虚拟机（2）
网络编程（81）
命名空间（1）
文件系统（5）
进程编程学习（5）
进程调度学习（3）
linux（14）
未分配的博文（1）

文章存档

2022年（26）

2021年（10）

2020年（28）

2019年（60）

我的朋友

相关博文

TCP 接收缓冲区学习笔记

分类： LINUX

2020-11-19 16:57:06

kernel version:3.13.1

skb_buffer的分配(Device为虚拟机Intel e1000)
1.e1000_clean_rx_irq
/**
* e1000_clean_rx_irq - Send received data up the network stack; legacy
* @adapter: board private structure
* @rx_ring: ring to clean
* @work_done: amount of napi work completed this call
* @work_to_do: max amount of work allowed for this call to do
*/
static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
struct e1000_rx_ring *rx_ring,
int *work_done, int work_to_do){

while (rx_desc->status & E1000_RXD_STAT_DD) {
struct sk_buff *skb;

skb = buffer_info->skb;
buffer_info->skb = NULL;
length = le16_to_cpu(rx_desc->length);
skb_put(skb, length);
}
cleaned_count = E1000_DESC_UNUSED(rx_ring);
if (cleaned_count)
adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
}

2.e1000_alloc_rx_buffers
/**
* e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
* @adapter: address of board private structure
**/
static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
struct e1000_rx_ring *rx_ring,
int cleaned_count)
{
buffer_info = &rx_ring->buffer_info[i];
skb = buffer_info->skb;
skb = netdev_alloc_skb_ip_align(netdev, bufsz);
}

tcp_data_queue()---->tcp_try_rmem_schedule()---->sk_rmem_schedule()---->__sk_rmem_schedule()
1.tcp_prot
struct proto tcp_prot = {
.enter_memory_pressure = tcp_enter_memory_pressure,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
/*
static void tcp_init_mem(void)
{
unsigned long limit = nr_free_buffer_pages() / 8;
limit = max(limit, 128UL);
sysctl_tcp_mem[0] = limit / 4 * 3;
sysctl_tcp_mem[1] = limit;
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
}
*/
.sysctl_rmem = sysctl_tcp_rmem,
}
/**
* __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
* @sk: socket
* @size: memory size to allocate
* @kind: allocation type
*
* If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
* rmem allocation. This function assumes that protocols which have
* memory_pressure use sk_wmem_queued as write buffer accounting.
*/
int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
long allocated;
int parent_status = UNDER_LIMIT;

sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
/*
static inline long
sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
{
struct proto *prot = sk->sk_prot;
return atomic_long_add_return(amt, prot->memory_allocated);
}
*/
allocated = sk_memory_allocated_add(sk, amt, &parent_status);

/* Under limit. */
if (parent_status == UNDER_LIMIT &&
allocated <= sk_prot_mem_limits(sk, 0)) {
sk_leave_memory_pressure(sk);
return 1;
}
/*
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
long *prot = sk->sk_prot->sysctl_mem;
return prot[index];
}
*/
/* Under pressure. (we or our parents) */
if ((parent_status > SOFT_LIMIT) ||
allocated > sk_prot_mem_limits(sk, 1))
sk_enter_memory_pressure(sk);

/* Over hard limit (we or our parents) */
if ((parent_status == OVER_LIMIT) ||
(allocated > sk_prot_mem_limits(sk, 2)))
goto suppress_allocation;

/* guarantee minimum buffer size under pressure */
if (kind == SK_MEM_RECV) {
if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
return 1;

}

if (sk_has_memory_pressure(sk)) {
int alloc;

if (!sk_under_memory_pressure(sk))
return 1;
alloc = sk_sockets_allocated_read_positive(sk);
if (sk_prot_mem_limits(sk, 2) >
        alloc *sk_mem_pages(sk->sk_wmem_queued +atomic_read(&sk->sk_rmem_alloc)
+ sk->sk_forward_alloc))
return 1;
}

suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
sk_memory_allocated_sub(sk, amt);

return 0;
}

记录:1.网卡分配skb_buffer空间，只要有数据就会分配skb_buffer
2.__sk_mem_schedule通过各种预设值限定sk->sk_forward_alloc
3.在TCP通过各种预设值的计算达到限定接收窗口的大小以限定网卡的接收数据(两边套在一起)<----自己的猜想
--------------------------------------------------------------------

sk上还有两个字段十分重要，sk->sk_rmem_alloc表示当前已经使用的接收缓冲区内存，sk->sk_forward_alloc表示预先向内核分配到的内存。这两个字段有什么关系呢？
打个比方，sk->sk_forward_alloc就好比充值的点卡，sk->sk_rmeme_alloc则用来记录实际花销。当需要花费内存时，内核总是先看sk->sk_forward_alloc有没有，如果没有，则向系统申请内存，放到sk->sk_forward_alloc里，之后再花费时，发现sk->sk_forward_alloc还有，就直接扣这里面的就行了。而当sk->sk_forward_alloc花光时，则又会重新充值。

kernel version：3.13.1
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
int eaten = -1;

if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0)
goto out_of_window;

/* Ok. In sequence. In window. */
if (tp->ucopy.task == current &&
tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
sock_owned_by_user(sk) && !tp->urg_data) {
          int chunk = min_t(unsigned int, skb->len,tp->ucopy.len);
          if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
               tp->ucopy.len -= chunk;
               tp->copied_seq += chunk;
               eaten = (chunk == skb->len);
               tcp_rcv_space_adjust(sk);
          }
}

if (eaten <= 0) {
queue_and_out:
//eaten的值以及的if条件句，会存在tcp_try_rmem_schedule还未对sk->sk_forward_alloc
//分配空间，就在tcp_queue_rcv函数里面操作，而导致sk->sk_forward_alloc变为负数。
if (eaten < 0 && tcp_try_rmem_schedule(sk, skb, skb->truesize))
     goto drop;

eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
}
......
}

kernel version:4.14
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
int eaten;

if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
if (tcp_receive_window(tp) == 0)
goto out_of_window;
queue_and_out:
if(skb_queue_len(&sk->sk_receive_queue) == 0)
sk_forced_mem_schedule(sk,skb->truesize);
else if(tcp_try_rmeme_schedule(sk,skb,skb->truesize))
goto drop;

eaten = tcp_queue_rcv(sk,skb,0,&fragstole);
.....
}
......
}

阅读(2365) | 评论(0) | 转发(0) |

上一篇：TCP接收端估算cwnd

下一篇：spin_lock_bh想到的一些事

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6