本文主要分析:服务器端如何构造和发送SYNACK段。
内核版本:3.6
Author:zhangskd @ csdn blog
发送入口
tcp_v4_send_synack()用于发送SYNACK段,在tcp_v4_conn_request()中被调用。
首先调用tcp_make_synack()构造SYNACK段,主要是构造TCP报头和初始化skb中的一些字段。
然后调用ip_build_and_send_pkt()添加IP报头后发送出去。
-
-
-
-
-
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req,
-
struct request_values *rvp, u16 queue_mapping, bool nocache)
-
{
-
const struct inet_request_sock *ireq = inet_rsk(req);
-
struct flowi4 fl4;
-
int err = -1;
-
struct sk_buff *skb;
-
-
-
-
-
if (! dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
-
return -1;
-
-
-
skb = tcp_make_synack(sk, dst, req, rvp);
-
-
if (skb) {
-
-
__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
-
skb_set_queue_mapping(skb, queue_mapping);
-
-
-
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt);
-
err = net_xmit_eval(err);
-
}
-
-
return err;
-
}
构造SYNACK段
构造一个SYNACK段,初始化TCP报头和skb中的一些字段。
-
-
-
-
-
-
-
-
-
-
-
-
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req,
-
struct request_values *rvp)
-
{
-
struct tcp_out_options opts;
-
struct tcp_extend_values *xvp = tcp_xv(rvp);
-
struct inet_request_sock *ireq = inet_rsk(req);
-
struct tcp_sock *tp = tcp_sk(sk);
-
const struct tcp_cookie_values *cvp = tp->cookie_values;
-
struct tcphdr *th;
-
struct sk_buff *skb;
-
struct tcp_md5sig_key *md5;
-
int tcp_header_size;
-
int mss;
-
int s_data_desired = 0;
-
-
if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
-
s_data_desired = cvp->s_data_desired;
-
-
-
skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, sk_gfp_atomic(sk, GFP_ATOMIC));
-
if (unlikely(! skb)) {
-
dst_release(dst);
-
return NULL;
-
}
-
-
-
-
-
skb_reserve(skb, MAX_TCP_HEADER);
-
skb_dst_set(skb, dst);
-
-
-
mss = dst_metric_advmss(dst);
-
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
-
mss = tp->rx_opt.user_mss;
-
-
-
-
-
if (req->rcv_wnd == 0) {
-
__u8 rcv_wscale;
-
-
req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-
-
-
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-
(req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-
req->window_clamp = tcp_full_space(sk);
-
-
-
tcp_select_initial_window(tcp_full_space(sk), mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-
&req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND));
-
ireq->rcv_wscale = rcv_wscale;
-
}
-
memset(&opts, 0, sizeof(opts));
-
-
#ifdef CONFIG_SYN_COOKIES
-
-
-
-
if (unlikely(req->cookie_ts))
-
TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
-
else
-
#endif
-
TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
-
-
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, xvp) + sizeof(*th);
-
-
-
skb_push(skb, tcp_header_size);
-
skb_reset_transport_header(skb);
-
-
th = tcp_hdr(skb);
-
memset(th, 0, sizeof(struct tcphdr));
-
th->syn = 1;
-
th->ack = 1;
-
-
TCP_ECN_make_synack(req, th);
-
th->source = ireq->loc_port;
-
th->dest = ireq->rmt_port;
-
-
tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK);
-
-
-
if (OPTION_COOKIE_EXTENSION & opts.options) {
-
...
-
}
-
-
th->seq = htonl(TCP_SKB_CB(skb)->seq);
-
th->ack_seq = htonl(tcp_rsk(seq)->rcv_isn + 1);
-
-
th->window = htons(min(req->rcv_wnd, 65535U));
-
-
-
tcp_options_write((__u32 *) (th + 1), tp, &opts);
-
th->doff = (tcp_header_size >> 2);
-
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
-
-
#ifdef CONFIG_TCP_MD5SIG
-
-
if (md5)
-
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, md5, NULL, req, skb);
-
#endif
-
-
return skb;
-
}
-
-
#define MAX_TCP_HEADER (128 + MAX_HEADER)
-
#define MAX_TCP_OPTION_SPACE 40
-
-
static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask)
-
{
-
-
return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC);
-
}
如果SYNACK段使用SYN Cookie,并且使用时间戳选项,则把TCP选项信息保存在SYNACK段
中tsval的低6位。
-
-
-
-
-
-
__u32 cookie_init_timestamp(struct request_sock *req)
-
{
-
struct inet_request_sock *ireq;
-
u32 ts, ts_now = tcp_time_stamp;
-
u32 options = 0;
-
ireq = inet_rsk(req);
-
-
options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
-
options |= ireq->sack_ok << 4;
-
options |= ireq->ecn_ok << 5;
-
-
ts = ts_now & ~TSMASK;
-
ts |= options;
-
-
if (ts > ts_now) {
-
ts >>= TSBITS;
-
ts--;
-
ts <<= TSBITS;
-
ts |= options;
-
}
-
return ts;
-
}
-
-
#define TSBITS 6
-
#define TSMASK (((__u32) 1 << TSBITS) - 1)
TCP Cookie Transaction (TCPCT) 选项功能类似与SYN Cookie,是2009年加入内核,
2013/3从内核中移除,本文不对其进行分析。
-
#define OPTION_SACK_ADVERTISE (1 << 0)
-
#define OPTION_TS (1 << 1)
-
#define OPTION_MD5 (1 << 2)
-
#define OPTION_WSCALE (1 << 3)
-
#define OPTION_COOKIE_EXTENSION (1 << 4)
-
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
-
-
-
struct tcp_out_options {
-
u16 options;
-
u16 mss;
-
u8 ws;
-
u8 num_sack_blocks;
-
u8 hash_size;
-
__u8 *hash_location;
-
__u32 tsval, tsecr;
-
struct tcp_fastopen_cookie *fastopen_cookie;
-
};
赋值TCP选项实例tcp_out_options,用于构造SYNACK段。
-
-
static unsigned int tcp_synack_options(struct sock *sk, struct request_sock *req,
-
unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts,
-
struct tcp_md5sig_key **md5, struct tcp_extend_values *xvp)
-
{
-
struct inet_request_sock *ireq = inet_rsk(req);
-
unsigned int remaining = MAX_TCP_OPTION_SPACE;
-
u8 cookie_plus = (xvp != NULL && ! xvp->cookie_out_never) ? xvp->cookie_plus : 0;
-
-
#define CONFIG_TCP_MD5SIG
-
*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
-
if (*md5) {
-
opts->options |= OPTION_MD5;
-
remaining -= TCPOLEN_MD5SIG_ALIGNED;
-
-
-
-
-
-
ireq->tstamp_ok &= ! ireq->sack_ok;
-
}
-
#else
-
*md5 = NULL;
-
#endif
-
-
-
opt->mss = mss;
-
remaining -= TCPOLEN_MSS_ALIGNED;
-
-
if (likely(ireq->wscale_ok)) {
-
opts->ws = ireq->rcv_wscale;
-
opts->options |= OPTION_WSCALE;
-
remaining -= TCPOLEN_WSCALE_ALIGNED;
-
}
-
-
if (likely(ireq->tstamp_ok)) {
-
opts->options |= OPTION_TS;
-
opts->tsval = TCP_SKB_CB(skb)->when;
-
opts->tsecr = req->ts_recent;
-
remaining -= TCPOLEN_TSTAMP_ALIGNED;
-
}
-
-
if (likely(ireq->sack_ok)) {
-
opts->options |= OPTION_SACK_ADVERTISE;
-
if (unlikely(! ireq->tstamp_ok))
-
remaining -= TCPOLEN_SACKPERM_ALIGNED;
-
}
-
-
-
if (*md5 == NULL && ireq->tstamp_ok && cookie_plus > TCPOLEN_COOKIE_BASE) {
-
...
-
}
-
-
return MAX_TCP_OPTION_SPACE - remaining;
-
}
初始化不携带数据skb的一些控制字段。
-
-
-
-
-
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
-
{
-
skb->ip_summed = CHECKSUM_PARTIAL;
-
skb->csum = 0;
-
-
TCP_SKB_CB(skb)->tcp_flags = flags;
-
TCP_SKB_CB(skb)->sacked = 0;
-
-
skb_shinfo(skb)->gso_segs = 1;
-
skb_shinfo(skb)->gso_size = 0;
-
skb_shinfo(skb)->gso_type = 0;
-
-
TCP_SKB_CB(skb)->seq = seq;
-
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
-
seq++;
-
TCP_SKB_CB(skb)->end_seq = seq;
-
}
发送到IP层
TCP报头中的校验和字段还没赋值,用__tcp_v4_send_check()来计算。
-
static void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
-
{
-
struct tcphdr *th = tcp_hdr(skb);
-
-
if (skb->ip_summed == CHECKSUM_PARTIAL) {
-
-
th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
-
skb->csum_start = skb_transport_header(skb) - skb->head;
-
skb->csum_offset = offsetof(struct tcphdr, check);
-
-
} else {
-
-
-
-
-
th->check = tcp_v4_check(skb->len, saddr, daddr, csum_partial(th, th->doff << 2, skb->csum));
-
}
-
}
给skb添加一个IP报头,然后发送出去。
-
-
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, __be32 saddr, __be32 daddr,
-
struct ip_options_rcu *opt)
-
{
-
struct inet_sock *inet = inet_sk(sk);
-
struct rtable *rt = skb_rtable(skb);
-
struct iphdr *iph;
-
-
-
skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
-
skb_reset_network_header(skb);
-
-
iph = ip_hdr(skb);
-
iph->version = 4;
-
iph->ihl = 5;
-
ip->tos = inet->tos;
-
-
if (ip_dont_fragment(sk, &rt->dst))
-
iph->frag_off = htons(IP_DF);
-
else
-
iph->frag_off = 0;
-
-
iph->ttl = ip_select_ttl(inet, &rt->dst);
-
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
-
iph->saddr = saddr;
-
iph->protocol = sk->sk_protocol;
-
ip_select_ident(iph, &rt->dst, sk);
-
-
if (opt && opt->opt.optlen) {
-
iph->ihl += opt->opt.optlen >> 2;
-
ip_options_build(skb, &opt->opt, daddr, rt, 0);
-
}
-
-
skb->priority = sk->sk_priority;
-
skb->mark = sk->sk_mark;
-
-
-
return ip_local_out(skb);
-
-
}