Chinaunix首页 | 论坛 | 博客
  • 博客访问: 4441968
  • 博文数量: 252
  • 博客积分: 5347
  • 博客等级: 大校
  • 技术积分: 13838
  • 用 户 组: 普通用户
  • 注册时间: 2009-09-30 10:13
文章分类
文章存档

2022年(12)

2017年(11)

2016年(7)

2015年(14)

2014年(20)

2012年(9)

2011年(20)

2010年(153)

2009年(6)

分类: 云计算

2017-04-06 09:56:02

 1.  TCP接收窗口的调整

在上面配置的rcv_buf都是配置的接收缓存,在tcp层中接收窗口不能完全占满接收缓存,因为TCP层描述的接收窗口,仅仅是tcp层的data,不包含整个数据包的header部分,也就是不包含tcp header IP Header和一些选项信息。

tcp三次握手的时候会通告各自的接收窗口,包括窗口中的scale因子,在使用wireshark抓包的时候,如果没有抓到三次握手的数据报文,在后续的报文中会有下面的提示信息,wireshark无法准确获取窗口的滑动因子。

tcp的发送函数中tcp_transmit_skb中调用tcp_select_window函数,选择通告的win大小,接收窗口只有16位,所以最大能表示的65535字节,64KB的大小,可以通过添加的scale参数扩大接收缓存的大小。

1.1  tcp_transmit_skb

点击(此处)折叠或打开

  1. static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
  2.              gfp_t gfp_mask)
  3. {
  4. ……………………..
  5. if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) //如果是SYN包,则单独处理tcp的选项信息
  6.         tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
  7.     else
  8.         tcp_options_size = tcp_established_options(sk, skb, &opts,
  9.                              &md5);
  10.     tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
  11. ………..
  12. if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
  13.         /* RFC1323: The window in SYN & SYN/ACK segments
  14.          * is never scaled.
  15.          在三次握手阶段,接收窗口并没有按扩大因子进行缩放*/
  16.         th->window    = htons(min(tp->rcv_wnd, 65535U));
  17.     } else {
  18.         th->window    = htons(tcp_select_window(sk));
  19.     }
  20. …………………………
  21. }

  22. static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
  23.                 struct tcp_out_options *opts,
  24.                 struct tcp_md5sig_key **md5)
  25. {
  26.     struct tcp_sock *tp = tcp_sk(sk);
  27.     unsigned int remaining = MAX_TCP_OPTION_SPACE;
  28.     struct tcp_fastopen_request *fastopen = tp->fastopen_req;

  29. #ifdef CONFIG_TCP_MD5SIG
  30.     *md5 = tp->af_specific->md5_lookup(sk, sk);
  31.     if (*md5) {
  32.         opts->options |= OPTION_MD5;
  33.         remaining -= TCPOLEN_MD5SIG_ALIGNED;
  34.     }
  35. #else
  36.     *md5 = NULL;
  37. #endif

  38.     opts->mss = tcp_advertise_mss(sk);
  39.     remaining -= TCPOLEN_MSS_ALIGNED;

  40.     if (likely(sysctl_tcp_timestamps && !*md5)) {
  41.         opts->options |= OPTION_TS;
  42.         opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
  43.         opts->tsecr = tp->rx_opt.ts_recent;
  44.         remaining -= TCPOLEN_TSTAMP_ALIGNED;
  45.     }
  46.     if (likely(sysctl_tcp_window_scaling)) { /* 如果开启sysctl_tcp_window_scaling 选项则在syn报文中添加scale的选项信息*/
  47.         opts->ws = tp->rx_opt.rcv_wscale;
  48.         opts->options |= OPTION_WSCALE;
  49.         remaining -= TCPOLEN_WSCALE_ALIGNED;
  50.     }
  51.     if (likely(sysctl_tcp_sack)) {
  52.         opts->options |= OPTION_SACK_ADVERTISE;
  53.         if (unlikely(!(OPTION_TS & opts->options)))
  54.             remaining -= TCPOLEN_SACKPERM_ALIGNED;
  55.     }

  56.     if (fastopen && fastopen->cookie.len >= 0) {
  57.         u32 need = fastopen->cookie.len;

  58.         need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
  59.                      TCPOLEN_FASTOPEN_BASE;
  60.         need = (need + 3) & ~3U; /* Align to 32 bits */
  61.         if (remaining >= need) {
  62.             opts->options |= OPTION_FAST_OPEN_COOKIE;
  63.             opts->fastopen_cookie = &fastopen->cookie;
  64.             remaining -= need;
  65.             tp->syn_fastopen = 1;
  66.             tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
  67.         }
  68.     }

  69.     return MAX_TCP_OPTION_SPACE - remaining;
  70. }


1.2  tcp_select_window

tcp_select_window函数更加当前的缓存,确定了最终window的大小。

点击(此处)折叠或打开

  1. static u16 tcp_select_window(struct sock *sk)
  2. {
  3.     struct tcp_sock *tp = tcp_sk(sk);
  4.     u32 old_win = tp->rcv_wnd;
  5.     u32 cur_win = tcp_receive_window(tp); /*计算当前剩余接收窗口*/
  6.     u32 new_win = __tcp_select_window(sk);/*这里根据剩余的接收缓存,计算新的接收窗口*/
  7.     
  8.     /* Never shrink the offered window 不允许缩小已经分配的窗口大小*/
  9.     if (new_win < cur_win) {
  10.         if (new_win == 0)
  11.             NET_INC_STATS(sock_net(sk),
  12.                  LINUX_MIB_TCPWANTZEROWINDOWADV);
  13.         new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
  14.     }
  15.     tp->rcv_wnd = new_win;
  16.     tp->rcv_wup = tp->rcv_nxt;

  17.     /* Make sure we do not exceed the maximum possible
  18.      * scaled window.
  19.      确保接收窗口不超过规定的最大值*/
  20.     if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
  21.         new_win = min(new_win, MAX_TCP_WINDOW);
  22.     else
  23.         new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));

  24.     /* RFC1323 scaling applied 根据三次握手时确定的scale,按照scale缩小接收窗口的大小*/
  25.     new_win >>= tp->rx_opt.rcv_wscale;

  26.     /* If we advertise zero window, disable fast path. */
  27.     if (new_win == 0) {
  28.         tp->pred_flags = 0;
  29.         if (old_win)
  30.             NET_INC_STATS(sock_net(sk),
  31.                  LINUX_MIB_TCPTOZEROWINDOWADV);
  32.     } else if (old_win == 0) {
  33.         NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
  34.     }
  35.  /*返回新的可用的接收窗口,这个值就是在wireshark抓包时看到的值。*/
  36.     return new_win;
  37. }

1.3  __tcp_select_window

点击(此处)折叠或打开

  1. u32 __tcp_select_window(struct sock *sk)
  2. {
  3.     struct inet_connection_sock *icsk = inet_csk(sk);
  4.     struct tcp_sock *tp = tcp_sk(sk);
  5.     /* MSS for the peer's data. Previous versions used mss_clamp
  6.      * here. I don't know if the value based on our guesses
  7.      * of peer's MSS is better for the performance. It's more correct
  8.      * but may be worse for the performance because of rcv_mss
  9.      * fluctuations. --SAW 1998/11/1
  10.      */
  11.     int mss = icsk->icsk_ack.rcv_mss; //接收端mss的大小
  12.     int free_space = tcp_space(sk); // 可以使用的接收缓存大小,是(sk->sk_rcvbuf-sk->sk_rmem_alloc) /2
  13.     int allowed_space = tcp_full_space(sk); // 为sk->sk_rcvbuf/2
  14.     int full_space = min_t(int, tp->window_clamp, allowed_space); //总的接收缓存
  15.     int window;

  16.     if (mss > full_space)
  17.         mss = full_space;
  18. /* 如果可用的缓存小于总接收缓存的1/2的话,则说明内存吃紧*/
  19.     if (free_space < (full_space >> 1)) {
  20.         icsk->icsk_ack.quick = 0;

  21.         if (sk_under_memory_pressure(sk)) /*如果处于内存压力的状态,则修改接收窗口的阈值*/
  22.             tp->rcv_ssthresh = min(tp->rcv_ssthresh,
  23.                      4U * tp->advmss);

  24.         /* free_space might become our new window, make sure we don't
  25.          * increase it due to wscale.
  26.          */
  27.         free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
  28.    /*剩余接收缓存不足时,则直接返回0*/
  29.         if (free_space < (allowed_space >> 4) || free_space < mss)
  30.             return 0;
  31.     }

  32.     if (free_space > tp->rcv_ssthresh)
  33.         free_space = tp->rcv_ssthresh;

  34.     /* Don't do rounding if we are using window scaling, since the
  35.      * scaled window will not line up with the MSS boundary anyway.
  36.      */
  37.     window = tp->rcv_wnd;
  38.     if (tp->rx_opt.rcv_wscale) { /*接收扩大因子不为0 的话,则重新设置window*/
  39.         window = free_space;

  40.         /* Advertise enough space so that it won't get scaled away.
  41.          * Import case: prevent zero window announcement if
  42.          * 1<<rcv_wscale > mss.
  43.          这里进行防治四舍五入导致的接收窗口较小*/    
  44.         if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
  45.             window = (((window >> tp->rx_opt.rcv_wscale) + 1)
  46.                  << tp->rx_opt.rcv_wscale);
  47.     } else {
  48.     
  49.         if (window <= free_space - mss || window > free_space)
  50.             window = (free_space / mss) * mss;
  51.         else if (mss == full_space &&
  52.              free_space > window + (full_space >> 1))
  53.             window = free_space;
  54.     }
  55. /*返回及时后的window*/
  56.     return window;
  57. }

1.4   tcp_space

点击(此处)折叠或打开

  1. /* Note: caller must be prepared to deal with negative returns */
  2. static inline int tcp_space(const struct sock *sk)
  3. {
  4.     /*这里的sk_rmem_alloc表示的是该socket已经被接收到的数据包占用的空间大小
  5. http://vger.kernel.org/~davem/skb_sk.html */
  6.     return tcp_win_from_space(sk->sk_rcvbuf -
  7.                  atomic_read(&sk->sk_rmem_alloc));
  8. }

  9. static inline int tcp_full_space(const struct sock *sk)
  10. {
  11.     return tcp_win_from_space(sk->sk_rcvbuf);
  12. }

  13. static inline int tcp_win_from_space(int space)
  14. {
  15. /*这里sysctl_tcp_adv_win_scale 在较新的内核为1,原先为2,Upstream commit b49960a05e32121d29316cfdf653894b88ac9190 是修改的patch,里面说明了修改的原因。 如果sysctl_tcp_adv_win_scale为1的话,这里的tcp可以使用的空间有原先的3/4修改为1/2,说明tcp数据包中的其他字段开销变大了*/
  16.     return sysctl_tcp_adv_win_scale<=0 ?
  17.         (space>>(-sysctl_tcp_adv_win_scale)) :
  18.         space - (space>>sysctl_tcp_adv_win_scale);
  19. }


阅读(9905) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~