本文主要分析:三次握手中最后一个ACK段到达时,服务器端的处理路径。
内核版本:3.6
Author:zhangskd @ csdn blog
函数路径
以下是第三次握手时,服务端接收到ACK后的处理路径。
接收入口
1. 状态为ESTABLISHED时,用tcp_rcv_established()接收处理。
2. 状态为LISTEN时,说明这个sock处于监听状态,用于被动打开的接收处理,包括SYN和ACK。
3. 当状态不为ESTABLISHED或TIME_WAIT时,用tcp_rcv_state_process()处理。
-
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
-
{
-
struct sock *rsk;
-
-
#ifdef CONFIG_TCP_MD5SIG
-
-
-
-
-
if (tcp_v4_inbound_md5_hash(sk, skb))
-
goto discard;
-
#endif
-
-
-
if (sk->sk_state == TCP_ESTABLISHED) {
-
struct dst_entry *dst = sk->sk_rx_dst;
-
sock_rps_save_rxhash(sk, skb);
-
-
if (dst) {
-
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) {
-
dst_release(dst);
-
sk->sk_rx_dst = NULL;
-
}
-
}
-
-
-
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
-
rsk = sk;
-
goto reset;
-
}
-
return 0;
-
}
-
-
-
if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
-
goto csum_err;
-
-
-
if (sk->sk_state == TCP_LISTEN) {
-
-
-
-
-
-
struct sock *nsk = tcp_v4_hnd_req(sk, skb);
-
-
if (! nsk)
-
goto discard;
-
-
if (nsk != sk) {
-
sock_rps_save_rxhash(nsk, skb);
-
-
if (tcp_child_process(sk, nsk, skb)) {
-
rsk = nsk;
-
goto reset;
-
}
-
return 0;
-
}
-
} else
-
sock_rps_save_rx(sk, skb);
-
-
-
if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
-
rsk = sk;
-
goto reset;
-
}
-
return 0;
-
-
reset:
-
tcp_v4_send_reset(rsk, skb);
-
-
discard:
-
kfree_skb(skb);
-
return 0;
-
-
csum_err:
-
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
-
goto discard;
-
}
收到SYN段后,服务器端会分配一个连接请求块,并初始化这个连接请求块。
构造和发送SYNACK段。
然后把这个连接请求块链入半连接队列中,启动超时定时器。
之后如果再收到ACK,就能完成三次握手了。
-
static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
-
{
-
struct tcphdr *th = tcp_hdr(skb);
-
const struct iphdr *iph = ip_hdr(skb);
-
struct sock *nsk;
-
struct request_sock **prev;
-
-
-
struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr);
-
if (req)
-
return tcp_check_req(sk, skb, req, prev);
-
-
-
nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source,
-
iph->daddr, th->dest, inet_iif(skb));
-
-
if (nsk) {
-
if (nsk->sk_state != TCP_TIME_WAIT) {
-
bh_lock_sock(nsk);
-
return nsk;
-
}
-
-
inet_twsk_put(inet_twsk(nsk));
-
return NULL;
-
}
-
-
#ifdef CONFIG_SYN_COOKIES
-
-
if (! th->syn)
-
sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
-
#endif
-
-
return sk;
-
}
在表示半连接队列的哈希表中,寻找符合条件的连接请求块。
-
struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp,
-
const __be16 rport, const __be32 raddr, const __be32 laddr)
-
{
-
const struct inet_connection_sock *icsk = inet_csk(sk);
-
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-
struct request_sock *req, **prev;
-
-
-
for(prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, lopt->nr_table_entries)];
-
(req = *prev) != NULL; prev = &req->dl_next) {
-
const struct inet_request_sock *ireq = inet_rsk(req);
-
-
if (ireq->rmt_port == rport && ireq->rmt_addr == raddr && ireq->loc_addr = laddr
-
&& AF_INET_FAMILY(req->rsk_ops->family)) {
-
WARN_ON(req->sk);
-
*prevp = prev;
-
break;
-
}
-
}
-
-
return req;
-
}
第三次握手
inet_csk_search_req()在半连接队列中查找是否已有符合的连接请求块,如果有,则说明这可能是三次握手的最后一个ACK。
接着调用tcp_check_req()来进行验证,如果合法,则完成三次握手。
-
-
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req,
-
struct request_sock **prev)
-
{
-
struct tcp_options_received tmp_opt;
-
const u8 *hash_location;
-
struct sock *child;
-
const struct tcphdr *th = tcp_hdr(skb);
-
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_ACK);
-
bool paws_reject = false;
-
-
tmp_opt.saw_tstamp = 0;
-
-
-
if (th->doff > (sizeof(struct tcphdr) >> 2)) {
-
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
-
if (tmp_opt.saw_tstamp) {
-
tmp_opt.ts_recent = req->ts_recent;
-
-
-
-
-
tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ) << req->retrans);
-
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
-
}
-
}
-
-
-
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn && flg == TCP_FLAG_SYN
-
&& ! paws_reject) {
-
-
-
-
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
-
return NULL;
-
}
-
-
-
-
-
-
if ((flg & TCP_FLAG_ACK) && (TCP_SKB_CB(skb)->ack_seq !=
-
tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
-
return sk;
-
-
-
if (paws_reject || ! tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-
tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
-
-
if (! (flg & TCP_FLAG_RST))
-
-
-
-
req->rsk_ops->send_ack(sk, skb, req);
-
-
if (paws_reject)
-
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-
-
return NULL;
-
}
-
-
-
if (tmp_opt.saw_tstamp && ! after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
-
req->ts_recent = tmp_opt.rcv_tsval;
-
-
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
-
-
flg &= ~TCP_FLAG_SYN;
-
}
-
-
if (flg & (TCP_FLAG_RST | TCP_FLAG_SYN)) {
-
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
-
goto embryonic_reset;
-
}
-
-
-
-
-
if (! (flg & TCP_FLAG_ACK))
-
return NULL;
-
-
-
-
-
if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-
inet_rsk(req)->acked = 1;
-
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
-
return NULL;
-
}
-
-
if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
-
tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
-
else if (req->retrans)
-
tcp_rsk(req)->snt_synack = 0;
-
-
-
-
-
-
-
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-
-
if (child == NULL)
-
goto listen_overflow;
-
-
inet_csk_reqsk_queue_unlink(sk, req, prev);
-
inet_csk_reqsk_queue_removed(sk, req);
-
-
-
inet_csk_reqsk_queue_add(sk, req, child);
-
-
return child;
-
-
listen_overflow:
-
-
if (! sysctl_tcp_abort_on_overflow) {
-
inet_rsk(req)->acked = 1;
-
return NULL;
-
}
-
-
embryonic_reset:
-
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
-
if (! (flg & TCP_FLAG_RST))
-
-
req->rsk_ops->send_reset(sk, skb);
-
-
-
inet_csk_reqsk_queue_drop(sk, req, prev);
-
-
return NULL;
-
}
是否发生了回绕。
-
static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt, int rst)
-
{
-
if (tcp_paws_check(rx_opt, 0))
-
return false;
-
-
-
if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
-
return false;
-
-
return true;
-
}
检查客户端的时间戳是否合法。
要求客户端发送SYN的时间戳 <= 客户端重传SYN的时间戳 、客户端发送ACK的时间戳。
-
static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt, int paws_win)
-
{
-
if ((s32) (rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
-
return true;
-
-
-
if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
-
return true;
-
-
-
-
-
-
if (! rx_opt->ts_recent)
-
return true;
-
}
检查序号是否合法。
-
-
-
-
-
-
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
-
{
-
if (seq == s_win)
-
return true;
-
-
if (after(end_seq, s_win) && before(seq, e_win))
-
return true;
-
-
return seq == e_win && seq == end_seq;
-
}
连接请求块操作
request_sock_ops为处理连接请求块的函数指针表,对于TCP,它的实例为tcp_request_sock_ops。
-
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
-
.family = PF_INET,
-
.obj_size = sizeof(struct tcp_request_sock),
-
.rtx_syn_ack = tcp_v4_rtx_synack,
-
.send_ack = tcp_v4_reqsk_send_ack,
-
.destructor = tcp_v4_reqsk_destructor,
-
.send_reset = tcp_v4_send_reset,
-
.syn_ack_timeout = tcp_syn_ack_timeout,
-
};
(1) 重传SYNACK段
-
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp)
-
{
-
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
-
return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
-
}
我们在上一篇中已分析过tcp_v4_send_synack(),它主要用于构造和发送SYNACK段。
(2) 发送ACK段
在tcp_check_req()中,如果接收到的ACK段时间戳不合法、或者序号不在接收窗口内,且不含RST标志,
则需要给客户端发送一个ACK。
-
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req)
-
{
-
tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
-
req->rcv_wnd, req->ts_recent, 0,
-
tcp_md5_do_lookup(sk, (union tcp_md5_addr *) &ip_hdr(skb)->daddr, AF_INET),
-
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos);
-
}
-
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif,
-
struct tcp_md5sig_key *key, int reply_flags, u8 tos)
-
{
-
const struct tcphdr *th = tcp_hdr(skb);
-
struct {
-
struct tcphdr th;
-
__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-
#ifdef CONFIG_TCP_MD5SIG
-
+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
-
#endif
-
];
-
} rep;
-
-
struct ip_reply_arg arg;
-
struct net *net = dev_net(skb_dst(skb)->dev);
-
memset(&rep.th, 0, sizeof(struct tcphdr));
-
memset(&arg, 0, sizeof(arg));
-
-
arg.iov[0].iov_base = (unsigned char *) &rep;
-
arg.iov[0].iov_len = sizeof(rep.th);
-
-
if (ts) {
-
rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) |
-
TCPOLEN_TIMESTAMP);
-
rep.opt[1] = htonl(tcp_time_stamp);
-
rep.opt[2] = htonl(ts);
-
arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
-
}
-
-
-
rep.th.dest = th->source;
-
rep.th.source = th->dest;
-
rep.th.doff = arg.iov[0].iov_len / 4;
-
rep.th.seq = htonl(seq);
-
rep.th.ack_seq = htonl(ack);
-
rep.th.ack = 1;
-
rep.th.window = htons(win);
-
-
#ifdef CONFIG_TCP_MD5SIG
-
if (key) {
-
int offset = (ts) ? 3 : 0;
-
rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
-
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-
rep.th.doff = arg.iov[0].iov_len / 4;
-
-
tcp_v4_md5_hash_addr((__u8 *) &rep.opt[offset], key, ip_hdr(skb)->saddr,
-
ip_hdr(skb)->daddr, &rep.th);
-
}
-
#endif
-
-
arg.flags = reply_flags;
-
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
-
arg.iov[0].iov_len, IPPROTO_TCP, 0);
-
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
-
if (oif)
-
arg.bound_dev_if = oif;
-
arg.tos = tos;
-
-
-
ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
-
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-
}
-
struct ip_reply_arg {
-
struct kvec iov[1];
-
int flags;
-
__wsum csum;
-
int csumoffset;
-
int bound_dev_if;
-
u8 tos;
-
};
-
-
struct kvec {
-
void *iov_base;
-
size_t iov_len;
-
-
};
(3) 发送RST段
检测到对端异常时,发送RST段。
-
static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
-
{
-
const struct tcphdr *th = tcp_hdr(skb);
-
struct {
-
struct tcphdr th;
-
#ifdef CONFIG_TCP_MD5SIG
-
__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2) ];
-
#endif
-
} rep;
-
struct ip_reply_arg arg;
-
#ifdef CONFIG_TCP_MD5SIG
-
struct tcp_md5sig_key *key;
-
const __u8 *hash_location = NULL;
-
unsigned char newhash[16];
-
int genhash;
-
struct sock *sk1 = NULL;
-
#endif
-
struct net *net;
-
-
-
if (th->rst)
-
return;
-
-
if (skb_rtable(skb)->rt_type != RTN_LOCAL)
-
return;
-
-
-
memset(&rep, 0, sizeof(rep));
-
rep.th.dest = th->source;
-
rep.th.source = th->dest;
-
rep.th.doff = sizeof(struct tcphdr) / 4;
-
rep.th.rst = 1;
-
-
if (th->ack) {
-
rep.th.seq = th->ack_seq;
-
} else {
-
rep.th.ack = 1;
-
rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2));
-
}
-
-
memset(&arg, 0, sizeof(arg));
-
arg.iov[0].iov_base = (unsigned char *) &rep;
-
arg.iov[0].iov_len = sizeof(rep.th);
-
-
#ifdef CONFIG_TCP_MD5SIG
-
-
...
-
#endif
-
-
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
-
arg.iov[0].iov_len, IPPROTO_TCP, 0);
-
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
-
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
-
-
-
-
-
if (sk)
-
arg.bound_dev_if = sk->sk_bound_dev_if;
-
net = dev_net(skb_dst(skb)->dev);
-
arg.tos = ip_hdr(skb)->tos;
-
-
-
ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
-
-
TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-
TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
-
-
#ifdef CONFIG_TCP_MD5SIG
-
-
...
-
#endif
-
}
(4) 析构函数
释放request_sock实例前调用。
-
-
static void tcp_v4_reqsk_destructor(struct request_sock *req)
-
{
-
kfree(inet_rsk(req)->opt);
-
}
(5) 超时处理函数
不是真正的SYNACK超时处理函数,简单更新下统计变量。
-
void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
-
{
-
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
-
}