- /*
-
* This IP datagram is too large to be sent in one piece. Break it up into
-
* smaller pieces (each of size equal to IP header plus
-
* a block of the data of the original IP data part) that will yet fit in a
-
* single device frame, and queue such a frame for sending.
-
*/
-
-
int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
-
{
-
struct iphdr *iph;
-
int ptr;
-
struct net_device *dev;
-
struct sk_buff *skb2;
-
unsigned int mtu, hlen, left, len, ll_rs;
-
int offset;
-
__be16 not_last_frag;
-
struct rtable *rt = skb_rtable(skb);
-
int err = 0;
-
-
dev = rt->dst.dev;
-
-
/*
-
* Point into the IP datagram header.
-
*/
-
/* 得到IP报文头的指针 */
-
iph = ip_hdr(skb);
-
-
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
- /* 禁止分片,增加错误计数 */
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
-
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-
htonl(ip_skb_dst_mtu(skb)));
-
kfree_skb(skb);
-
return -EMSGSIZE;
-
}
-
-
/*
-
* Setup starting values.
-
*/
/* 得到IP报文总长度 */
-
hlen = iph->ihl * 4;
- /* 这里的mtu为真正的MTU-IP报文头,即允许的最大IP数据长度 */
-
mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
-
#ifdef CONFIG_BRIDGE_NETFILTER
-
if (skb->nf_bridge)
-
mtu -= nf_bridge_mtu_reduction(skb);
-
#endif
- /* 为这个skb_buff置上分片完成的标志 */
-
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
-
-
/* When frag_list is given, use it. First, check its validity:
-
* some transformers could create wrong frag_list or break existing
-
* one, it is not prohibited. In this case fall back to copying.
-
*
-
* LATER: this step can be merged to real generation of fragments,
-
* we can switch to copy when see the first bad fragment.
-
*/
- /* 根据前面的学习,我们知道4层有可能会将数据包分片。这些分片存放在skb的frag_list中*/
-
if (skb_has_frags(skb)) {
- /* skb_buffer已经有了一个frag list */
-
struct sk_buff *frag, *frag2;
- /* 拿到数据包的长度 */
-
int first_len = skb_pagelen(skb);
/*
1.数据包的长度超过了MTU;
2.数据包长度没有按8字节对齐;
3.数据包设置了IP_MF或者IP_OFFSET位
这样,进入slow_path
*/
-
if (first_len - hlen > mtu ||
-
((first_len - hlen) & 7) ||
-
(iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
-
skb_cloned(skb))
-
goto slow_path; //跳到slow_path
/* 遍历每一个分片 */
-
skb_walk_frags(skb, frag) {
- /* 检查每个分片,如果有一个分片不符合要求,就只能使用slow path */
-
/* Correct geometry. */
-
if (frag->len > mtu ||
-
((frag->len & 7) && frag->next) ||
-
skb_headroom(frag) < hlen)
-
goto slow_path_clean;
-
-
/* Partially cloned skb? */
-
if (skb_shared(frag))
-
goto slow_path_clean;
-
-
BUG_ON(frag->sk);
-
if (skb->sk) {
-
frag->sk = skb->sk;
-
frag->destructor = sock_wfree;
-
}
-
skb->truesize -= frag->truesize;
-
}
-
-
/* Everything is OK. Generate! */
-
/* 现在可以进行fast path了*/
-
err = 0;
-
offset = 0;
- /* 拿到frag list */
-
frag = skb_shinfo(skb)->frag_list;
- /* 重置原来的frag list,相当于从skb_buff上取走了frag list */
-
skb_frag_list_init(skb);
- /*
- 得到实际的数据长度,置分片标志位和校验和
- */
-
skb->data_len = first_len - skb_headlen(skb);
-
skb->len = first_len;
-
iph->tot_len = htons(first_len);
-
iph->frag_off = htons(IP_MF);
-
ip_send_check(iph);
/* 分别处理每一个分片 */
-
for (;;) {
-
/* Prepare header of the next frame,
-
* before previous one went down. */
-
if (frag) {
- /* 表示checksm已经算好*/
-
frag->ip_summed = CHECKSUM_NONE;
- /* 设置传输层*/
-
skb_reset_transport_header(frag);
-
__skb_push(frag, hlen);
- /* 设置网络层 */
-
skb_reset_network_header(frag);
-
memcpy(skb_network_header(frag), iph, hlen);
-
iph = ip_hdr(frag);
-
iph->tot_len = htons(frag->len);
-
ip_copy_metadata(frag, skb);
-
if (offset == 0)
-
ip_options_fragment(frag);
-
offset = skb->len - hlen;
-
iph->frag_off = htons(offset>>3);
-
if (frag->next != NULL)
-
iph->frag_off |= htons(IP_MF);
-
/* Ready, complete checksum */
- /* 计算分片的校验和 */
-
ip_send_check(iph);
-
}
/* 发送 */
-
err = output(skb);
-
-
if (!err)
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
-
if (err || !frag)
-
break;
-
-
skb = frag;
-
frag = skb->next;
-
skb->next = NULL;
-
}
-
-
if (err == 0) {
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
-
return 0;
-
}
/* 出错是否内存 */
-
while (frag) {
-
skb = frag->next;
-
kfree_skb(frag);
-
frag = skb;
-
}
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
-
return err;
-
-
slow_path_clean:
- /* 清除shared sk_buff */
-
skb_walk_frags(skb, frag2) {
-
if (frag2 == frag)
-
break;
-
frag2->sk = NULL;
-
frag2->destructor = NULL;
-
skb->truesize = frag2->truesize;
-
}
-
}
-
-
slow_path:
-
left = skb->len - hlen; /* Space per frame */
-
ptr = hlen; /* Where to start from */
-
-
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
-
* we need to make room for the encapsulating header
-
*/
-
ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
-
-
/*
-
* Fragment the datagram.
-
*/
/* 得到偏移 */
-
offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
- /* 通过IP_MF标志位,判断是否是最后一个分片 */
-
not_last_frag = iph->frag_off & htons(IP_MF);
-
-
/*
-
* Keep copying data until we run out.
-
*/
-
-
while (left > 0) {
- /* 计算分片长度 */
-
len = left;
-
/* IF: it doesn't fit, use 'mtu' - the data space left */
-
if (len > mtu)
-
len = mtu;
-
/* IF: we are not sending upto and including the packet end
-
then align the next start on an eight byte boundary */
-
if (len < left) {
-
len &= ~7;
-
}
-
/*
-
* Allocate buffer.
-
*/
/* 为分片申请该分片申请一个sk_buff */
-
if ((skb2 = alloc_skb(len hlen ll_rs, GFP_ATOMIC)) == NULL) {
-
NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
-
err = -ENOMEM;
-
goto fail;
-
}
-
-
/*
-
* Set up data on packet
-
*/
/* 复制数据,以及运输层 */
-
ip_copy_metadata(skb2, skb);
-
skb_reserve(skb2, ll_rs);
-
skb_put(skb2, len hlen);
-
skb_reset_network_header(skb2);
-
skb2->transport_header = skb2->network_header hlen;
-
-
/*
-
* Charge the memory for the fragment to any owner
-
* it might possess
-
*/
-
-
if (skb->sk)
-
skb_set_owner_w(skb2, skb->sk);
-
-
/*
-
* Copy the packet header into the new buffer.
-
*/
-
-
skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
-
-
/*
-
* Copy a block of the IP datagram.
-
*/
-
if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
-
BUG();
-
left -= len;
-
-
/*
-
* Fill in the new header fields.
-
*/
- /* 填充网络层 */
-
iph = ip_hdr(skb2);
-
iph->frag_off = htons((offset >> 3));
-
-
/* ANK: dirty, but effective trick. Upgrade options only if
-
* the segment to be fragmented was THE FIRST (otherwise,
-
* options are already fixed) and make it ONCE
-
* on the initial skb, so that all the following fragments
-
* will inherit fixed options.
-
*/
- /* 如果是第一个分片, 填充ip option */
-
if (offset == 0)
-
ip_options_fragment(skb);
-
-
/*
-
* Added AC : If we are fragmenting a fragment that's not the
-
* last fragment then keep MF on each bit
-
*/
- /* 设置IP_MF标志位 */
-
if (left > 0 || not_last_frag)
-
iph->frag_off |= htons(IP_MF);
-
ptr = len;
-
offset = len;
-
-
/*
-
* Put this fragment into the sending queue.
-
*/
-
iph->tot_len = htons(len hlen);
/* 计算校验和 */
-
ip_send_check(iph);
/* 发送该分片 */
-
err = output(skb2);
-
if (err)
-
goto fail;
-
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
-
}
/* 释放sk_buff */
-
kfree_skb(skb);
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
-
return err;
-
-
fail:
-
kfree_skb(skb);
-
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
-
return err;
-
}