Chinaunix首页 | 论坛 | 博客
  • 博客访问: 965195
  • 博文数量: 58
  • 博客积分: 10192
  • 博客等级: 上将
  • 技术积分: 1845
  • 用 户 组: 普通用户
  • 注册时间: 2010-11-22 21:24
文章分类

全部博文(58)

文章存档

2011年(11)

2010年(12)

2009年(20)

2008年(15)

分类: C/C++

2010-11-29 11:38:12

. 前言

IP碎片的重组是防火墙提高安全性的一个重要手段,通过提前进行碎片重组,可以有效防御各种碎片攻击,Linux内核的防火墙netfilter就自动对IP碎片包进行了重组,本文介绍Linux内核中的IP重组过程,内核代码版本2.6.11

 

.IP碎片重组过程:

1.重要的数据结构struct ipq:

 

ipq队列节点结构: 

/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
// 下一个

 struct ipq *next; /* linked list pointers */
// 最新使用链表

 struct list_head lru_list; /* lru list member */
// 以下4项用来匹配一组IP分配

 u32 saddr;
 u32 daddr;
 u16 id;
 u8 protocol;
// 状态标志

 u8 last_in;
#define COMPLETE 4 // 数据已经完整

#define FIRST_IN 2 // 第一个包到达

#define LAST_IN 1 // 最后一个包到达

// 接收到的IP碎片链表

 struct sk_buff *fragments; /* linked list of received fragments */
// len是根据最新IP碎片中的偏移信息得出的数据总长

 int len; /* total length of original datagram */
// meat是所有碎片实际长度的累加

 int meat;
 spinlock_t lock;
 atomic_t refcnt;
// 超时

 struct timer_list timer; /* when will this queue expire? */
// 前一项队列地址

 struct ipq **pprev;
// 数据进入网卡的索引号

 int iif;
// 最新一个碎片的时间戳

 struct timeval stamp;
};

系统定义了一个大小为IPQ_HASHSZ=64ipq_hash表,static struct ipq *ipq_hash[IPQ_HASHSZ];每个数组元素就是一个具有相同hash值的链表。每个链表上的节点就代表着同一个链接的IP的碎片。见下图:

 

2. 实现IP重组的基本函数为ip_defrag(),在net/ipv4/ip_fragment.c中实现,基本过程是建立碎片处理队列,队列中每个节点(struct ipq)是一个链表,这个链表保存同一个连接的碎片,当碎片都到达之后进行数据包重组,或者在一定时间(缺省30)内所有碎片包不能到达而释放掉。

 

/* Process an incoming IP datagram fragment. */
struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
{
    struct iphdr *iph = skb->nh.iph;
    struct ipq *qp;
    struct net_device *dev;
    
    IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);

    /* 当用于已经使用的分片重组内存大于最大阀值就进行清理操作(调用ip_evictor)
     * ip_frage_mem 存储分片的内存,其初始化
               * atomic_t ip_frag_mem = ATOMIC_INIT(0);    
               */

    if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
        ip_evictor();

    dev = skb->dev;

    /* Lookup (or create) queue header */
    /*
     * ip_find()函数:1. 在碎片处理队列中查找自己IP分片的节点。
     * 2. 如果没有找到就创建一个新节点。
     */

    if ((qp = ip_find(iph, user)) != NULL) {
        struct sk_buff *ret = NULL;

        spin_lock(&qp->lock);

        ip_frag_queue(qp, skb);
        /* 如果收到第一个分片和最后一个分片

         * 且分片总长度等于原有IP包总长度 

         */
        if (qp->last_in == (FIRST_IN|LAST_IN) &&
         qp->meat == qp->len)
            /* 重组完成并把分片构建成IP数据包
             * 返回skbbuff header指针值
             */

            ret = ip_frag_reasm(qp, dev);

        spin_unlock(&qp->lock);
        /* 从分片重组队列中删除该节点 */
        ipq_put(qp, NULL);
        return ret;
    }

    IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
    kfree_skb(skb);
    return NULL;
}


3.  ip_find()函数:在hash表中查找属于同一链接的节点,如果没有找到,则新建一个节点。

 

static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{
    __u16 id = iph->id;
    __u32 saddr = iph->saddr;
    __u32 daddr = iph->daddr;
    __u8 protocol = iph->protocol;

    /*
     * 通过id+saddr+daddr+protocol计算出hash值。
     */

    unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
    struct ipq *qp;

    read_lock(&ipfrag_lock);
    for(qp = ipq_hash[hash]; qp; qp = qp->next) {
        /* they have same hash value(conflict), so list they by list->next*/
        /*
         * 因为存在hash冲突,可能存在多个节点符号要求。
         * 我们通过判断ID等来判断,如果判断正确就return。
         */

        if(qp->id == id        &&
         qp->saddr == saddr    &&
         qp->daddr == daddr    &&
         qp->protocol == protocol &&
         qp->user == user) {
            atomic_inc(&qp->refcnt);
            read_unlock(&ipfrag_lock);
            return qp;
        }
    }
    read_unlock(&ipfrag_lock);

    return ip_frag_create(hash, iph, user);
}


4. ip_frag_queue()函数:把新到达的分片加入到属于同一链接的节点中。

 

static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
    struct sk_buff *prev, *next;
    int flags, offset;
    int ihl, end;

    /* last_in状态标志,COMPLETE代表数据已经完整 */
    if (qp->last_in & COMPLETE)
        goto err;

    /* frag_off为16bits,包含3bits标志+13bits片偏移 */
     offset = ntohs(skb->nh.iph->frag_off);
    /* 获取3bit的标志 */
    flags = offset & ~IP_OFFSET;
    /* 计算13bits的片偏移 */
    offset &= IP_OFFSET;
    offset <<= 3;        /* offset is in 8-byte chunks */
     ihl = skb->nh.iph->ihl * 4;

    /* Determine the position of this fragment. */
              /* 计算出分片的偏移地址,分片数据大小+片偏移 */
     end = offset + skb->len - ihl;

    /* Is this the final fragment? */
    /* 如果是分片包的话,IP_MF位置1 */
    if ((flags & IP_MF) == 0) {
        /* If we already have some bits beyond end
         * or have different end, the segment is corrrupted.
         */

        /*
         * qp->len 原始数据包的总长度
         * LAST_IN 最后一个包到达
         */

        if (end < qp->len ||
         ((qp->last_in & LAST_IN) && end != qp->len))
            goto err;
        qp->last_in |= LAST_IN;
        qp->len = end;
    } else

        /* 每个分片(除最后一个外)都必须是8字节倍数 */
        if (end&7) {
            end &= ~7;
            if (skb->ip_summed != CHECKSUM_UNNECESSARY)
                skb->ip_summed = CHECKSUM_NONE;
        }
        if (end > qp->len) {
            /* Some bits beyond end -> corruption. */
            if (qp->last_in & LAST_IN)
                goto err;
            qp->len = end;
        }
    }
    if (end == offset)
        goto err;

    if (pskb_pull(skb, ihl) == NULL)
        goto err;
    if (pskb_trim(skb, end-offset))
        goto err;

    /* Find out which fragments are in front and at the back of us
     * in the chain of fragments so far. We must know where to put
     * this fragment, right?
     */

    /* 找出应该把分片放在哪里 */
    prev = NULL;
    for(next = qp->fragments; next != NULL; next = next->next) {
        if (FRAG_CB(next)->offset >= offset)
            break;    /* bingo!找到退出 */
        prev = next;
    }

    /* We found where to put this one. Check for overlap with
     * preceding fragment, and, if needed, align things so that
     * any overlaps are eliminated.
     */

    /* 查看与前分片是否有数据重叠,找出并清除 */
    if (prev) {
        int i = (FRAG_CB(prev)->offset + prev->len) - offset;
        if (i > 0) {
            offset += i;
            if (end <= offset)
                goto err;
            if (!pskb_pull(skb, i)) /* 向后移动长度为i的距离 */
                goto err;
            if (skb->ip_summed != CHECKSUM_UNNECESSARY)
                skb->ip_summed = CHECKSUM_NONE;
        }
    }
    /* 查看与后分片是否有数据重叠 */
    while (next && FRAG_CB(next)->offset < end) {
        int i = end - FRAG_CB(next)->offset; /* 重叠i个字节的数据overlap is 'i' bytes */
        if (i < next->len) {
            /* Eat head of the next overlapped fragment
             * and leave the loop. The next ones cannot overlap.
             */

            if (!pskb_pull(next, I)) /* 向后移动长度为i的距离 */
                goto err;
            FRAG_CB(next)->offset += i;
            qp->meat -= i; /* meat是所有碎片实际长度的累加 */
            if (next->ip_summed != CHECKSUM_UNNECESSARY)
                next->ip_summed = CHECKSUM_NONE;
            break;
        } else {
            struct sk_buff *free_it = next;

            /* Old fragmnet is completely overridden with
             * new one drop it.
             * 已有的分片完全包含新到达的分片,所以丢弃新到的分片
             */

            next = next->next;
            if (prev)
                prev->next = next;
            else
                qp->fragments = next;
            qp->meat -= free_it->len;
            frag_kfree_skb(free_it, NULL);
        }
    }
    FRAG_CB(skb)->offset = offset;
    /* Insert this fragment in the chain of fragments. */
    /* 位置已经找到,插入到链表中 */
    skb->next = next;
    if (prev)
        prev->next = skb;
    else
        qp->fragments = skb;

     if (skb->dev)
         qp->iif = skb->dev->ifindex;
    skb->dev = NULL;
    qp->stamp = skb->stamp;
    qp->meat += skb->len;
    /* 原子操作增加已经使用的分片重组内存 */
    atomic_add(skb->truesize, &ip_frag_mem);
    if (offset == 0)
        qp->last_in |= FIRST_IN;
    write_lock(&ipfrag_lock);
    /* 把此链表移动到 ipq_lru_list的末尾 */
    list_move_tail(&qp->lru_list, &ipq_lru_list);
    write_unlock(&ipfrag_lock);
    return;
err:
    kfree_skb(skb);
}


5. ip_frag_reasm()函数:把属于同一个链接中的所有分片组成一个新的IP数据包。

 

static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
{
    struct iphdr *iph;
    struct sk_buff *fp, *head = qp->fragments;
    int len;
    int ihlen;

    /*remove it from list,all fragments were here.*/
    /* 将节点从链表中断开,删除定时器 */
    ipq_kill(qp);

    BUG_TRAP(head != NULL);
    BUG_TRAP(FRAG_CB(head)->offset == 0);

    /* Allocate a new buffer for the datagram. */
    ihlen = head->nh.iph->ihl*4;
    len = ihlen + qp->len;

    if (len > 65535)
        goto out_oversize;

    /* 头部不能被克隆,因为下面将可能对其进行修改. */
    if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
        goto out_nomem;

    /* If the first fragment is fragmented itself, we split
     * it to two chunks: the first with data and paged part
     * and the second, holding only fragments.
               */

    /*
     * 如果第一个分片已经被自身分片,我们需要把它分成两块;
     * 第一块(head)包含数据和页块,第二块(clone)只指向frag_list。
     */

    if (skb_shinfo(head)->frag_list) {
        struct sk_buff *clone;
        int i, plen = 0;
    
        if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
            goto out_nomem;
        clone->next = head->next;
        head->next = clone;
        /* clone指向frag_list */
        skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
        skb_shinfo(head)->frag_list = NULL;
        for (i=0; i<skb_shinfo(head)->nr_frags; i++)
            plen += skb_shinfo(head)->frags[i].size;
        clone->len = clone->data_len = head->data_len - plen;/* 最后生成了一个自身数据为0.不包含任何数据. frag_list指向所有的分片的clone */
        head->data_len -= clone->len;
        head->len -= clone->len;
        clone->csum = 0;
        clone->ip_summed = head->ip_summed;
        /* 原子操作增加已经使用的分片重组内存 */
        atomic_add(clone->truesize, &ip_frag_mem);
    }

    skb_shinfo(head)->frag_list = head->next;
    skb_push(head, head->data ? head->nh.raw);
               /* 原子操作减少已经使用的分片重组内存 */
    atomic_sub(head->truesize, &ip_frag_mem);

    /* 计算出所有分片的长度并减少分片重组内存 */
    for (fp=head->next; fp; fp = fp->next) {
        head->data_len += fp->len;
        head->len += fp->len;
        if (head->ip_summed != fp->ip_summed)
            head->ip_summed = CHECKSUM_NONE;
        else if (head->ip_summed == CHECKSUM_HW)
            head->csum = csum_add(head->csum, fp->csum);
        head->truesize += fp->truesize;
        atomic_sub(fp->truesize, &ip_frag_mem);
    }

    head->next = NULL;
    head->dev = dev;
    head->stamp = qp->stamp;

    /* 对IP头中的长度和偏移标志进行重置 */
    iph = head->nh.iph;
    iph->frag_off = 0;
    iph->tot_len = htons(len);
    IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
       /* 各碎片skb已经得到处理,在释放qp时将不再重新释放了*/
    qp->fragments = NULL;
    return head;

out_nomem:
     NETDEBUG(if (net_ratelimit())
     printk(KERN_ERR
            "IP: queue_glue: no memory for gluing queue %p\n",
            qp));
    goto out_fail;
out_oversize:
    if (net_ratelimit())
        printk(KERN_INFO
            "Oversized IP packet from %d.%d.%d.%d.\n",
            NIPQUAD(qp->saddr));
out_fail:
    IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
    return NULL;
}


6. ipq_put()函数:释放节点里的所有分片,然后释放自身节点。

 

static __inline__ void ipq_put(struct ipq *ipq, int *work)
{
    if (atomic_dec_and_test(&ipq->refcnt))
        ip_frag_destroy(ipq, work);
}

/* Complete destruction of ipq. */
static void ip_frag_destroy(struct ipq *qp, int *work)
{
    struct sk_buff *fp;
    
    /* 在ipq_kill()中对last_in设置为COMPLETE */
    BUG_TRAP(qp->last_in&COMPLETE);
    /* 判断删除定时器 */
    BUG_TRAP(del_timer(&qp->timer) == 0);

    /* 释放所有分片数据 */
    fp = qp->fragments;
    while (fp) {
        struct sk_buff *xp = fp->next;
        frag_kfree_skb(fp, work);
        fp = xp;
    }

    /* 最后释放自身*/
    frag_free_queue(qp, work);
}


参考《LINUX网络技术内幕》

参考 独孤阁的《IP碎片重组过程分析》

阅读(4538) | 评论(0) | 转发(0) |
0

上一篇:没有了

下一篇:Linux下Loopback流程分析

给主人留下些什么吧!~~