#define TCP_RTO_MAX (120*HZ)
#define TCP_RTO_MIN (HZ/5)
#define TCP_TIMEOUT_INIT (3*HZ) /* RFC 1122 initial RTO value */
static __inline__ voidtcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) 在响应处理中刷新RTT{
/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
if (tp->saw_tstamp && tp->rcv_tsecr)
tcp_ack_saw_tstamp(tp, flag);
使用反射时戳
else if (seq_rtt >= 0)
tcp_ack_no_tstamp(tp, seq_rtt, flag); 不使用反射时戳}
/* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */
static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag){ __u32 seq_rtt;
/* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment * acknowledges some new data, i.e., only if it advances the * left edge of the send window. * * See draft-ietf-tcplw-high-performance-00, section 3.3. * 1998/04/10 Andrey V. Savochkin */
seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
取时间差 tcp_rtt_estimator(tp, seq_rtt);
估算RTT tcp_set_rto(tp);
重置重发超时 if (tp->backoff) { 如果重发延长指数非零 if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED)) 如果重发数为零 或者 重发包已受到了响应 tp->backoff = 0; 复位延长指数 else tp->rto <<= tp->backoff; 延长重发超时 } tcp_bound_rto(tp); 重发超时最大不超过120秒}static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag){ /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine * rtt estimates. Also, we must not reset the * backoff for rto until we get a non-retransmitted * packet. This allows us to deal with a situation * where the network delay has increased suddenly. * I.e. Karn's algorithm. (SIGCOMM '87, p5.) */ if (flag & FLAG_RETRANS_DATA_ACKED) 如果是重传数据的响应 return; tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); if (tp->backoff) { /* To relax it? We have valid sample as soon as we are * here. Why not to clear backoff? */ if (!tp->retransmits) tp->backoff = 0; else tp->rto <<= tp->backoff; } tcp_bound_rto(tp); }/* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt){ long m = mrtt; /* RTT */ /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". * * On a 1990 paper the rto value is changed to: * RTO = rtt + 4 * mdev * * Funny. This algorithm seems to be very broken. * These formulae increase RTO, when it should be decreased, increase * too slowly, when it should be incresed fastly, decrease too fastly * etc. I guess in BSD RTO takes ONE value, so that it is absolutely * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ if(m == 0) m = 1; if (tp->srtt != 0) { m -= (tp->srtt >> 3); /* m is now error in rtt est */ 求RTT均值偏差 tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { 如果新的RTT比均值要小 m = -m; /* m is now abs(error) */ 取绝对值 m -= (tp->mdev >> 2); /* similar update on mdev */ 求与RTT平均误差的偏差 /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain * for mdev in this case (alpha*beta). * Like Eifel it also prevents growth of rto, * but also it limits too fast rto decreases, * happening in pure Eifel. */ if (m > 0) m >>= 3; 减小收敛速度 } else { m -= (tp->mdev >> 2); /* similar update on mdev */ } tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ if (tp->mdev > tp->mdev_max) { tp->mdev_max = tp->mdev; 取最大值 if (tp->mdev_max > tp->rttvar) tp->rttvar = tp->mdev_max; } if (after(tp->snd_una, tp->rtt_seq)) { 如果是新应答 if (tp->mdev_max < tp->rttvar) tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; 平滑最大RTT均偏差 tp->rtt_seq = tp->snd_una; 记录响应取样位置 tp->mdev_max = TCP_RTO_MIN; 0.2秒 } } else { 初始值 /* no previous measure. */ tp->srtt = m<<3; /* take the measured time to be rtt */ tp->mdev = m<<2; /* make sure rto = 3*rtt */ tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); tp->rtt_seq = tp->snd_nxt; }}/* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */static __inline__ void tcp_set_rto(struct tcp_opt *tp){ /* Old crap is replaced with new one. 8) * * More seriously: * 1. If rtt variance happened to be less 50msec, it is hallucination. * It cannot be less due to utterly erratic ACK generation made * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ * to do with delayed acks, because at cwnd>2 true delack timeout * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some curcumstances. */ tp->rto = (tp->srtt >> 3) + tp->rttvar; /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure [censored] and should be replaced * with correct one. It is exaclty, which we pretend to do. */}/* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */static __inline__ void tcp_bound_rto(struct tcp_opt *tp){ if (tp->rto > TCP_RTO_MAX) tp->rto = TCP_RTO_MAX; }
|