Chinaunix首页 | 论坛 | 博客
  • 博客访问: 85709
  • 博文数量: 15
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 210
  • 用 户 组: 普通用户
  • 注册时间: 2014-01-05 15:27
文章分类

全部博文(15)

文章存档

2014年(15)

我的朋友

分类: LINUX

2014-04-03 23:38:38

当一个ip报文超过MTU的时候,发送端会进行fragment,接收端反过来需要进行defrag

发送端:

在函数ip_finish_output中会判断是否进行fragment,以udp发送10k的数据为例:
  1. static int ip_finish_output(struct sk_buff *skb)
  2. {
  3. #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
  4.     /* Policy lookup after SNAT yielded a new policy */
  5.     if (skb_dst(skb)->xfrm != NULL) {
  6.         IPCB(skb)->flags |= IPSKB_REROUTED;
  7.         return dst_output(skb);
  8.     }
  9. #endif
  10.     if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))//skb->len=10008,mtu=1500,gso=0
  11.         return ip_fragment(skb, ip_finish_output2);
  12.     else
  13.         return ip_finish_output2(skb);
  14. }

  1. int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
  2. {
  3.     struct iphdr *iph;
  4.     int ptr;
  5.     struct net_device *dev;
  6.     struct sk_buff *skb2;
  7.     unsigned int mtu, hlen, left, len, ll_rs;
  8.     int offset;
  9.     __be16 not_last_frag;
  10.     struct rtable *rt = skb_rtable(skb);
  11.     int err = 0;

  12.     dev = rt->dst.dev;

  13.     /*
  14.      * Point into the IP datagram header.
  15.      */

  16.     iph = ip_hdr(skb);

  17.     if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {  //如果置了不可分片的标志位,则发送icmp信息
  18.         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
  19.         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
  20.               htonl(ip_skb_dst_mtu(skb)));
  21.         kfree_skb(skb);
  22.         return -EMSGSIZE;
  23.     }

  24.     /*
  25.      * Setup starting values.
  26.      */

  27.     hlen = iph->ihl * 4;
  28.     mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */  //1480
  29. #ifdef CONFIG_BRIDGE_NETFILTER
  30.     if (skb->nf_bridge)
  31.         mtu -= nf_bridge_mtu_reduction(skb);
  32. #endif
  33.     IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;

  34.     /* When frag_list is given, use it. First, check its validity:
  35.      * some transformers could create wrong frag_list or break existing
  36.      * one, it is not prohibited. In this case fall back to copying.
  37.      *
  38.      * LATER: this step can be merged to real generation of fragments,
  39.      * we can switch to copy when see the first bad fragment.
  40.      */
  41.     if (skb_has_frag_list(skb)) {  //如果有frag list,说明上层有可能已经做了一些事情,可以走fast流程
  42.         struct sk_buff *frag, *frag2;
  43.         int first_len = skb_pagelen(skb);  //skb的线性区和shinfo中的frags的总大小,不包括fraglist,1500

  44.         if (first_len - hlen > mtu ||
  45.             ((first_len - hlen) & 7) ||
  46.             ip_is_fragment(iph) ||
  47.             skb_cloned(skb))
  48.             goto slow_path;

  49.         skb_walk_frags(skb, frag) {
  50.             /* Correct geometry. */
  51.             if (frag->len > mtu ||
  52.                 ((frag->len & 7) && frag->next) ||
  53.                 skb_headroom(frag) < hlen)
  54.                 goto slow_path_clean;

  55.             /* Partially cloned skb? */
  56.             if (skb_shared(frag))
  57.                 goto slow_path_clean;

  58.             BUG_ON(frag->sk);
  59.             if (skb->sk) {
  60.                 frag->sk = skb->sk;  //后续每个frag都将独立,因此赋值相应的分量
  61.                 frag->destructor = sock_wfree;
  62.             }
  63.             skb->truesize -= frag->truesize;   //如果fraglist中的skb满足条件的话,从总的skb大小中减掉
  64.         }

  65.         /* Everything is OK. */

  66.         err = 0;
  67.         offset = 0;
  68.         frag = skb_shinfo(skb)->frag_list;
  69.         skb_frag_list_init(skb);
  70.         skb->data_len = first_len - skb_headlen(skb);    //当前skb不再包含fraglist的大小
  71.         skb->len = first_len;
  72.         iph->tot_len = htons(first_len);
  73.         iph->frag_off = htons(IP_MF);  //设置第一个分片的标志,没有offset
  74.         ip_send_check(iph);
  75.         for (;;) {
  76.             /* Prepare header of the next frame,
  77.              * before previous one went down. */
  78.             if (frag) {
  79.                 frag->ip_summed = CHECKSUM_NONE;
  80.                 skb_reset_transport_header(frag);
  81.                 __skb_push(frag, hlen);
  82.                 skb_reset_network_header(frag);
  83.                 memcpy(skb_network_header(frag), iph, hlen); //把上一个skb的ip头拷贝到这一个skb
  84.                 iph = ip_hdr(frag);
  85.                 iph->tot_len = htons(frag->len);
  86.                 ip_copy_metadata(frag, skb);
  87.                 if (offset == 0)
  88.                     ip_options_fragment(frag);
  89.                 offset += skb->len - hlen;  //offset为L3层看到的负载,不包含L3的头
  90.                 iph->frag_off = htons(offset>>3);
  91.                 if (frag->next != NULL)
  92.                     iph->frag_off |= htons(IP_MF); //除非是最后一个分片,否则都置为MF标志
  93.                 /* Ready, complete checksum */
  94.                 ip_send_check(iph);
  95.             }

  96.             err = output(skb); //对每一个skb进行发送 :ip_finish_output2

  97.             if (!err)
  98.                 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
  99.             if (err || !frag)
  100.                 break;

  101.             skb = frag;
  102.             frag = skb->next;
  103.             skb->next = NULL;
  104.         }

  105.         if (err == 0) {
  106.             IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
  107.             return 0; //如果一切顺利的话就到这
  108.         }

  109.         while (frag) {
  110.             skb = frag->next;
  111.             kfree_skb(frag);
  112.             frag = skb;
  113.         }
  114.         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
  115.         return err;

  116. slow_path_clean:
  117.         skb_walk_frags(skb, frag2) {
  118.             if (frag2 == frag)
  119.                 break;
  120.             frag2->sk = NULL;
  121.             frag2->destructor = NULL;
  122.             skb->truesize += frag2->truesize;
  123.         }
  124.     }

  125. slow_path:
  126.     left = skb->len - hlen; /* Space per frame */
  127.     ptr = hlen; /* Where to start from */

  128.     /* for bridged IP traffic encapsulated inside f.e. a vlan header,
  129.      * we need to make room for the encapsulating header
  130.      */
  131.     ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));

  132.     /*
  133.      * Fragment the datagram.
  134.      */

  135.     offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
  136.     not_last_frag = iph->frag_off & htons(IP_MF);

  137.     /*
  138.      * Keep copying data until we run out.
  139.      */

  140.     while (left > 0) {
  141.         len = left;
  142.         /* IF: it doesn't fit, use 'mtu' - the data space left */
  143.         if (len > mtu)
  144.             len = mtu;
  145.         /* IF: we are not sending up to and including the packet end
  146.            then align the next start on an eight byte boundary */
  147.         if (len < left) {
  148.             len &= ~7;
  149.         }
  150.         /*
  151.          * Allocate buffer.
  152.          */

  153.         if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) { //分配一个新的skb,数据都放在线性区
  154.             NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
  155.             err = -ENOMEM;
  156.             goto fail;
  157.         }

  158.         /*
  159.          * Set up data on packet
  160.          */

  161.         ip_copy_metadata(skb2, skb);
  162.         skb_reserve(skb2, ll_rs)
  163.         skb_put(skb2, len + hlen);
  164.         skb_reset_network_header(skb2);  //data和network_header指向L3的头
  165.         skb2->transport_header = skb2->network_header + hlen;

  166.         /*
  167.          * Charge the memory for the fragment to any owner
  168.          * it might possess
  169.          */

  170.         if (skb->sk)
  171.             skb_set_owner_w(skb2, skb->sk);

  172.         /*
  173.          * Copy the packet header into the new buffer.
  174.          */

  175.         skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen); //拷贝L3头

  176.         /*
  177.          * Copy a block of the IP datagram.
  178.          */
  179.         if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len)) //拷贝负载数据,可能从skb的线性区、frag或者list中获取
  180.             BUG();                                                    //由ptr即offset决定
  181.         left -= len;

  182.         /*
  183.         /*
  184.          * Fill in the new header fields.
  185.          */
  186.         iph = ip_hdr(skb2);
  187.         iph->frag_off = htons((offset >> 3));

  188.         /* ANK: dirty, but effective trick. Upgrade options only if
  189.          * the segment to be fragmented was THE FIRST (otherwise,
  190.          * options are already fixed) and make it ONCE
  191.          * on the initial skb, so that all the following fragments
  192.          * will inherit fixed options.
  193.          */
  194.         if (offset == 0)
  195.             ip_options_fragment(skb);

  196.         /*
  197.          * Added AC : If we are fragmenting a fragment that's not the
  198.          * last fragment then keep MF on each bit
  199.          */
  200.         if (left > 0 || not_last_frag)
  201.             iph->frag_off |= htons(IP_MF);
  202.         ptr += len;
  203.         offset += len;

  204.         /*
  205.          * Put this fragment into the sending queue.
  206.          */
  207.         iph->tot_len = htons(len + hlen);

  208.         ip_send_check(iph);

  209.         err = output(skb2); //fast path中,入参skb是第一个发送的分片,slow path中入参skb是不会动的,它只负责提供数据
  210.         if (err)
  211.             goto fail;

  212.         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
  213.     }
  214.     kfree_skb(skb);
  215.     IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
  216.     return err;

  217. fail:
  218.     kfree_skb(skb);
  219.     IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
  220.     return err;
  221. }

简单总结:ip_fragment根据上层传入的情况做不同的处理,如果上层已经基本分片过的,则拷贝下L3头,对每一个分片调发包函数,否则创建skb结构,拷贝数据,然后对每一个分片调发包函数

接收端:

发送10K的数据的时候,接收端tcpdump输出如下:
  1. 22:39:47.521105 IP 100.100.100.11.51389 > 100.100.100.2.8888: UDP, length 10000
  2. 22:39:47.521189 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  3. 22:39:47.521329 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  4. 22:39:47.521450 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  5. 22:39:47.521567 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  6. 22:39:47.521697 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  7. 22:39:47.521791 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
第一个报文和第二个报文的详细信息如下:
  1. 22:45:34.400082 IP 100.100.100.11.37617 > 100.100.100.2.8888: UDP, length 10000
  2.     0x0000: 60eb 6945 291b 3c97 0ecd a46d 0800 4500 `.iE).<....m..E.
  3.     0x0010: 05dc 8e6c 2000 4011 35cf 6464 640b 6464 ...l..@.5.ddd.dd  
  4.     0x0020: 6402 92f1 22b8 2718 cfa2 6767 6767 6767 d...".'...gggggg  //0x2718=10008=负载+一个UDP头
  1. 22:45:34.400161 IP 100.100.100.11 > 100.100.100.2: ip-proto-17
  2.     0x0000: 60eb 6945 291b 3c97 0ecd a46d 0800 4500 `.iE).<....m..E.
  3.     0x0010: 05dc 8e6c 20b9 4011 3516 6464 640b 6464 ...l..@.5.ddd.dd
  4.     0x0020: 6402 6767 6767 6767 6767 6767 6767 6767 d.gggggggggggggg  //不包含UDP头
对于ip分片而言,IP的头结构中有两个字节相关,即加红的字段,16bit中高三位为标记为,低13位为偏移/8
因此20b9表示标记位为001(表示MF),偏移为0xb9×8=1480(MTU-IP头的长度),即第一个分片包含了1480个字节(包括8字节UDP头)。
看代码是如何分辨一个skb是否是一个分片:
  1. static inline bool ip_is_fragment(const struct iphdr *iph)
  2. {
  3.     return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
  4. }
在收包函数ip_local_deliver中会进行判断,如果是的话,会调用ip_defrag进行分片重组:
  1. int ip_defrag(struct sk_buff *skb, u32 user)
  2. {
  3.     struct ipq *qp;
  4.     struct net *net;

  5.     net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
  6.     IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);

  7.     /* Start by cleaning up the memory. */
  8.     if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
  9.         ip_evictor(net);

  10.     /* Lookup (or create) queue header */
  11.     if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {  //找到和该skb匹配的项,没有的话说明是第一个,新建一个
  12.         int ret;

  13.         spin_lock(&qp->q.lock);

  14.         ret = ip_frag_queue(qp, skb);

  15.         spin_unlock(&qp->q.lock);
  16.         ipq_put(qp);
  17.         return ret;
  18.     }

  19.     IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
  20.     kfree_skb(skb);
  21.     return -ENOMEM;
  22. }
涉及的数据结构如下:

找到或者新建完ipq的数据结构后,把当前的skb放到对应的数据结构:

  1. static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
  2. {
  3.     struct sk_buff *prev, *next;
  4.     struct net_device *dev;
  5.     int flags, offset;
  6.     int ihl, end;
  7.     int err = -ENOENT;
  8.     u8 ecn;

  9.     if (qp->q.last_in & INET_FRAG_COMPLETE)  //当ipq需要释放的时候才会设置这个标志位
  10.         goto err;

  11.     if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
  12.         unlikely(ip_frag_too_far(qp)) &&
  13.         unlikely(err = ip_frag_reinit(qp))) {
  14.         ipq_kill(qp);
  15.         goto err;
  16.     }

  17.     ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
  18.     offset = ntohs(ip_hdr(skb)->frag_off);
  19.     flags = offset & ~IP_OFFSET;
  20.     offset &= IP_OFFSET;
  21.     offset <<= 3; /* offset is in 8-byte chunks */
  22.     ihl = ip_hdrlen(skb);

  23.     /* Determine the position of this fragment. */
  24.     end = offset + skb->len - ihl;
  25.     err = -EINVAL;

  26.     /* Is this the final fragment? */
  27.     if ((flags & IP_MF) == 0) {
  28.         /* If we already have some bits beyond end
  29.          * or have different end, the segment is corrrupted.
  30.          */
  31.         if (end < qp->q.len ||
  32.             ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
  33.             goto err;
  34.         qp->q.last_in |= INET_FRAG_LAST_IN;  //设置最后一个分片的标志位
  35.         qp->q.len = end;  //这个包总的长度
  36.     } else {
  37.         if (end&7) {  //8字节对齐
  38.             end &= ~7;
  39.             if (skb->ip_summed != CHECKSUM_UNNECESSARY)
  40.                 skb->ip_summed = CHECKSUM_NONE;
  41.         }
  42.         if (end > qp->q.len) {
  43.             /* Some bits beyond end -> corruption. */
  44.             if (qp->q.last_in & INET_FRAG_LAST_IN)
  45.                 goto err;
  46.             qp->q.len = end;
  47.         }
  48.     }
  49.     if (end == offset)
  50.         goto err;
  51.     err = -ENOMEM;
  52.     if (pskb_pull(skb, ihl) == NULL)  //去掉skb的头
  53.         goto err;

  54.     err = pskb_trim_rcsum(skb, end - offset);
  55.     if (err)
  56.         goto err;

  57.     /* Find out which fragments are in front and at the back of us
  58.      * in the chain of fragments so far. We must know where to put
  59.      * this fragment, right?
  60.      */
  61.     prev = qp->q.fragments_tail;
  62.     if (!prev || FRAG_CB(prev)->offset < offset) { //第一个情况prev为空,则表示当前skb为第一个;后面的情况的话当前skb放到最后
  63.         next = NULL;
  64.         goto found;
  65.     }
  66.     prev = NULL;
  67.     for (next = qp->q.fragments; next != NULL; next = next->next) { //fragments以offset从小到大排列
  68.         if (FRAG_CB(next)->offset >= offset)
  69.             break; /* */
  70.         prev = next;
  71.     }

  72. found:
  73.     /* We found where to put this one. Check for overlap with
  74.      * preceding fragment, and, if needed, align things so that
  75.      * any overlaps are eliminated.
  76.      */
  77.     if (prev) {
  78.         int i = (FRAG_CB(prev)->offset + prev->len) - offset; //处理当前skb和prev的重叠部分,以前面的为准,改变skb

  79.         if (i > 0) {
  80.             offset += i;
  81.             err = -EINVAL;
  82.             if (end <= offset)
  83.                 goto err;
  84.             err = -ENOMEM;
  85.             if (!pskb_pull(skb, i))
  86.                 goto err;
  87.             if (skb->ip_summed != CHECKSUM_UNNECESSARY)
  88.                 skb->ip_summed = CHECKSUM_NONE;
  89.         }
  90.     }

  91.     err = -ENOMEM;

  92.     while (next && FRAG_CB(next)->offset < end) { //处理当前skb和next的重叠部分,以当前skb为准,改变next
  93.         int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */

  94.         if (i < next->len) {
  95.             /* Eat head of the next overlapped fragment
  96.              * and leave the loop. The next ones cannot overlap.
  97.              */
  98.             if (!pskb_pull(next, i))
  99.                 goto err;
  100.             FRAG_CB(next)->offset += i;
  101.             qp->q.meat -= i;
  102.             if (next->ip_summed != CHECKSUM_UNNECESSARY)
  103.                 next->ip_summed = CHECKSUM_NONE;
  104.             break;
  105.         } else {
  106.             struct sk_buff *free_it = next;

  107.             /* Old fragment is completely overridden with
  108.              * new one drop it.
  109.              */
  110.             next = next->next;

  111.             if (prev)
  112.                 prev->next = next;
  113.             else
  114.                 qp->q.fragments = next;

  115.             qp->q.meat -= free_it->len;
  116.             frag_kfree_skb(qp->q.net, free_it);
  117.         }
  118.     }

  119.     FRAG_CB(skb)->offset = offset;

  120.     /* Insert this fragment in the chain of fragments. */
  121.     skb->next = next;
  122.     if (!next)
  123.         qp->q.fragments_tail = skb;  //插入到链表
  124.     if (prev)
  125.         prev->next = skb;
  126.     else
  127.         qp->q.fragments = skb;

  128.     dev = skb->dev;
  129.     if (dev) {
  130.         qp->iif = dev->ifindex;
  131.         skb->dev = NULL;
  132.     }
  133.     qp->q.stamp = skb->tstamp;
  134.     qp->q.meat += skb->len;
  135.     qp->ecn |= ecn;
  136.     atomic_add(skb->truesize, &qp->q.net->mem);
  137.     if (offset == 0)
  138.         qp->q.last_in |= INET_FRAG_FIRST_IN; //如果offset为0,表示第一个分片,置标志位

  139.     if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && //第一个分片和最后一个分片到了
  140.         qp->q.meat == qp->q.len) //如果最后一个分片到了,则len已经得到包的总大小了,meat是每个skb加入的时候增加的,两者相等说明包齐了
  141.         return ip_frag_reasm(qp, prev, dev); //重组

  142.     write_lock(&ip4_frags.lock);
  143.     list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list); //更新当前ipq的位置,降低被释放的优先级
  144.     write_unlock(&ip4_frags.lock);
  145.     return -EINPROGRESS;  //返回非零的话,不会再往下处理了
  146. err:
  147.     kfree_skb(skb);
  148.     return err;
  149. }
需要关注ip_frag_reasm函数的参数,由于ip_frag_queue函数的传入参数为skb,而且没有返回参数,而后续的操作必须有sk_buff的数据结构,因此不管最后调用ip_frag_queue时的skb是哪一个分片,返回的时候必须代表整个数据包。而这个过程是函数在ip_frag_reasm完成的:

  1. static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
  2.              struct net_device *dev)
  3. {
  4.     struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
  5.     struct iphdr *iph;
  6.     struct sk_buff *fp, *head = qp->q.fragments;
  7.     int len;
  8.     int ihlen;
  9.     int err;
  10.     u8 ecn;

  11.     ipq_kill(qp);

  12.     ecn = ip4_frag_ecn_table[qp->ecn];
  13.     if (unlikely(ecn == 0xff)) {
  14.         err = -EINVAL;
  15.         goto out_fail;
  16.     }
  17.     /* Make the one we just received the head. */  
  18.     if (prev) {
  19.         head = prev->next;  //后续流程保证head即最初传入的skb包含的是一个分片的内容
  20.         fp = skb_clone(head, GFP_ATOMIC);  
  21.         if (!fp)
  22.             goto out_nomem;

  23.         fp->next = head->next;
  24.         if (!fp->next)
  25.             qp->q.fragments_tail = fp;
  26.         prev->next = fp;

  27.         skb_morph(head, qp->q.fragments);
  28.         head->next = qp->q.fragments->next;

  29.         kfree_skb(qp->q.fragments);
  30.         qp->q.fragments = head;
  31.     }

  32.     WARN_ON(head == NULL);
  33.     WARN_ON(FRAG_CB(head)->offset != 0);

  34.     /* Allocate a new buffer for the datagram. */
  35.     ihlen = ip_hdrlen(head);
  36.     len = ihlen + qp->q.len;
  37.     err = -E2BIG;
  38.     if (len > 65535)
  39.         goto out_oversize;

  40.     /* Head of list must not be cloned. */
  41.     if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
  42.         goto out_nomem;

  43.     /* If the first fragment is fragmented itself, we split
  44.      * it to two chunks: the first with data and paged part
  45.      * and the second, holding only fragments. */
  46.     if (skb_has_frag_list(head)) {
  47.         struct sk_buff *clone;
  48.         int i, plen = 0;

  49.         if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
  50.             goto out_nomem;
  51.         clone->next = head->next;
  52.         head->next = clone;
  53.         skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
  54.         skb_frag_list_init(head);
  55.         for (i=0; i<skb_shinfo(head)->nr_frags; i++)
  56.             plen += skb_shinfo(head)->frags[i].size;
  57.         clone->len = clone->data_len = head->data_len - plen;
  58.         head->data_len -= clone->len;
  59.         head->len -= clone->len;
  60.         clone->csum = 0;
  61.         clone->ip_summed = head->ip_summed;
  62.         atomic_add(clone->truesize, &qp->q.net->mem);
  63.     }

  64.     skb_shinfo(head)->frag_list = head->next;//相当与把fragments对应的链表搬到当前skb上了
  65.     skb_push(head, head->data - skb_network_header(head));  //第一个分片对应的skb需要包含IP头

  66.     for (fp=head->next; fp; fp = fp->next) {
  67.         head->data_len += fp->len;   //更新当前skb的分量,因为它不再只代表自己一个分片,而是整个包
  68.         head->len += fp->len;
  69.         if (head->ip_summed != fp->ip_summed)
  70.             head->ip_summed = CHECKSUM_NONE;
  71.         else if (head->ip_summed == CHECKSUM_COMPLETE)
  72.             head->csum = csum_add(head->csum, fp->csum);
  73.         head->truesize += fp->truesize;
  74.     }
  75.     atomic_sub(head->truesize, &qp->q.net->mem);

  76.     head->next = NULL;
  77.     head->dev = dev;
  78.     head->tstamp = qp->q.stamp;

  79.     iph = ip_hdr(head);
  80.     iph->frag_off = 0;  //重组完,改变相应的标志
  81.     iph->tot_len = htons(len);
  82.     iph->tos |= ecn;
  83.     IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
  84.     qp->q.fragments = NULL;
  85.     qp->q.fragments_tail = NULL;
  86.     return 0;

  87. out_nomem:
  88.     LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
  89.                   "queue %p\n", qp);
  90.     err = -ENOMEM;
  91.     goto out_fail;
  92. out_oversize:
  93.     if (net_ratelimit())
  94.         printk(KERN_INFO "Oversized IP packet from %pI4.\n",
  95.             &qp->saddr);
  96. out_fail:
  97.     IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
  98.     return err;
  99. }
简单的来说,重组过程就是先把所有的分片skb加到qp->q.fragments对应的链表中,当分片都到的时候,再把该链表中的所有skb都移到一个统一的skb上的frag_list上,更新这个统一skb的分量,然后交由函数ip_local_deliver_finish继续处理。







阅读(4214) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~