包缓冲的分配操作
================
(1) 包缓冲由包描述结构和数据缓冲区两部分组成, 当多个进程正在引用同一包结构时, 称为该包是共享的, 当不同的包结构指向同一数据缓冲区时, 这些包称为是克隆的, 克隆的包可以传递到不同包处理器中作不同的处理.
(2) alloc_skb(size,gfp_mask)用来分配一个数据区为size字节的包缓冲, kfree_skb(skb)用来回收一个包缓冲, 当最后一个克隆包结构被回收时, 包缓冲的数据区将被回收.
(3) 为了加速包结构的分配过程, 包结构的内存缓冲分配器使用了skb_headerinit()作为包结构的初始化函数, 此外每一CPU还设立了一个包结构队列(skb_head_pool)来作为局部分配和回收缓冲池, 最多可以容纳128(sysctl_hot_list_len)个包结构.
一些内嵌函数 ------------ skb_datarefp(skb) 返回数据区引用计数变量的指针 skb_realloc_headroom(skb, nhr) 将skb的包体重新分配到头部空隙为nhr字节的包缓冲上去 skb_get(skb) 引用包结构 kfree_skb(skb) 释放包结构的引用, 当最后一个进程释放包结构时, 包结构将被回收 kfree_skb_fast(skb) 快速释放包结构引用, 包结构内容不被初始化, 最后一个进程释放包结构时, 包结构将被回收 skb_cloned(skb) 是否为克隆包 skb_shared(skb) skb是否被多个进程共享 skb_share_check(skb, gpf_mask) 当包被共享时, 建立包的克隆包, 并释放对原包的引用 skb_unshare(skb, pri) 当包被克隆时, 建立克隆包的完全拷贝, 并释放与克隆包的引用 skb_orphan(skb) 使包成为不属于任何套接字的孤包 dev_alloc_skb(length) 分配数据区长度为(lengh+16), 包体开始于16字节的包缓冲 skb_cow(skb, headroom) 当包体的前部空隙小于headroom或者是克隆包时, 重建包缓冲并释放原包引用
包缓冲由sk_buff结构描述, 包缓冲数据区由其head和end成员界定, 而包数据体则由包数据区内data和tail界定的子区域来描述, 采用这种结构可以使添加或去除包头的操作变得非常方便.
skb_put(skb,len) 在包体尾部扩展长度为len的数据块, 返回扩展块的地址, __skb_put()为未校验版本 skb_push(skb,len) 在包体前部扩展长度为len的数据块, 返回扩展块的地址, __skb_push()为未校验版本 skb_pull(skb,len) 去除包体前部长度为len的数据块, 返回新包体的起始地址, __skb_pull()为未校验版本 skb_headroom(skb) 返回包体前部距离包区开始的长度 skb_tailroom(skb) 返回包体尾部距离包区结束的长度 skb_reserve(skb,len) 设置包体起始位置为包区开始的len字节 skb_trim(skb,len) 将包体截断为len字节, __skb_trim()为未校验版本
; include/linux/skbuff.h:
/* Backwards compatibility */ #define skb_realloc_headroom(skb, nhr) skb_copy_expand(skb, nhr, skb_tailroom(skb), GFP_ATOMIC)
/* Internal */ static inline atomic_t *skb_datarefp(struct sk_buff *skb) { return (atomic_t *)(skb->end); }
/** * skb_get - reference buffer * @skb: buffer to reference * * Makes another reference to a socket buffer and returns a pointer * to the buffer. */
static inline struct sk_buff *skb_get(struct sk_buff *skb) { atomic_inc(&skb->users); return skb; }
/* * If users==1, we are the only owner and are can avoid redundant * atomic change. */
/** * kfree_skb - free an sk_buff * @skb: buffer to free * * Drop a reference to the buffer and free it if the usage count has * hit zero. */
static inline void kfree_skb(struct sk_buff *skb) { if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) __kfree_skb(skb); 当引用数为1或者引用数减1等于零时, 回收包缓冲 }
/* Use this if you didn't touch the skb state [for fast switching] */ static inline void kfree_skb_fast(struct sk_buff *skb) { if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) kfree_skbmem(skb); }
/** * skb_cloned - is the buffer a clone * @skb: buffer to check * * Returns true if the buffer was generated with skb_clone() and is * one of multiple shared copies of the buffer. Cloned buffers are * shared data so must not be written to under normal circumstances. */
static inline int skb_cloned(struct sk_buff *skb) { return skb->cloned && atomic_read(skb_datarefp(skb)) != 1; }
/** * skb_shared - is the buffer shared * @skb: buffer to check * * Returns true if more than one person has a reference to this * buffer. */
static inline int skb_shared(struct sk_buff *skb) { return (atomic_read(&skb->users) != 1); }
/** * skb_share_check - check if buffer is shared and if so clone it * @skb: buffer to check * @pri: priority for memory allocation * * If the buffer is shared the buffer is cloned and the old copy * drops a reference. A new clone with a single reference is returned. * If the buffer is not shared the original buffer is returned. When * being called from interrupt status or with spinlocks held pri must * be GFP_ATOMIC. * * NULL is returned on a memory allocation failure. */
static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) { if (skb_shared(skb)) { struct sk_buff *nskb; nskb = skb_clone(skb, pri); kfree_skb(skb); return nskb; } return skb; }
/* * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping * a packet thats being forwarded. */
/** * skb_unshare - make a copy of a shared buffer * @skb: buffer to check * @pri: priority for memory allocation * * If the socket buffer is a clone then this function creates a new * copy of the data, drops a reference count on the old copy and returns * the new copy with the reference count at 1. If the buffer is not a clone * the original buffer is returned. When called with a spinlock held or * from interrupt state @pri must be %GFP_ATOMIC * * %NULL is returned on a memory allocation failure. */
static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) { struct sk_buff *nskb; if(!skb_cloned(skb)) return skb; nskb=skb_copy(skb, pri); kfree_skb(skb); /* Free our shared copy */ return nskb; }
/** * skb_orphan - orphan a buffer * @skb: buffer to orphan * * If a buffer currently has an owner then we call the owner's * destructor function and make the @skb unowned. The buffer continues * to exist but is no longer charged to its former owner. */
static inline void skb_orphan(struct sk_buff *skb) { if (skb->destructor) skb->destructor(skb); skb->destructor = NULL; skb->sk = NULL; }
/** * dev_alloc_skb - allocate an skbuff for sending * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has unspecified headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned in there is no free memory. Although this function * allocates memory it can be called from an interrupt. */
static inline struct sk_buff *dev_alloc_skb(unsigned int length) { struct sk_buff *skb;
skb = alloc_skb(length+16, GFP_ATOMIC); if (skb) skb_reserve(skb,16); return skb; }
/** * skb_cow - copy a buffer if need be * @skb: buffer to copy * @headroom: needed headroom * * If the buffer passed lacks sufficient headroom or is a clone then * it is copied and the additional headroom made available. If there * is no free memory %NULL is returned. The new buffer is returned if * a copy was made (and the old one dropped a reference). The existing * buffer is returned otherwise. * * This function primarily exists to avoid making two copies when making * a writable copy of a buffer and then growing the headroom. */
static inline struct sk_buff * skb_cow(struct sk_buff *skb, unsigned int headroom) { headroom = (headroom+15)&~15;
if ((unsigned)skb_headroom(skb) < headroom || skb_cloned(skb)) { struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom); kfree_skb(skb); skb = skb2; } return skb; }
; net/core/skbuff.c:
int sysctl_hot_list_len = 128;
static kmem_cache_t *skbuff_head_cache;
static union { struct sk_buff_head list; char pad[SMP_CACHE_BYTES]; } skb_head_pool[NR_CPUS]; 包结构缓冲池
void __init skb_init(void) { int i;
skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, SLAB_HWCACHE_ALIGN, skb_headerinit, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache");
for (i=0; i skb_queue_head_init(&skb_head_pool[ i ].list); }
struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) { struct sk_buff *skb; u8 *data;
if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { static int count = 0; if (++count < 5) { printk(KERN_ERR "alloc_skb called nonatomically " "from interrupt %p\n", NET_CALLER(size)); BUG(); } gfp_mask &= ~__GFP_WAIT; }
/* Get the HEAD */ skb = skb_head_from_pool(); if (skb == NULL) { skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (skb == NULL) goto nohead; }
/* Get the DATA. Size must match skb_add_mtu(). */ size = ((size + 15) & ~15); 数据区长度为16字节的倍数 data = kmalloc(size + sizeof(atomic_t), gfp_mask); if (data == NULL) goto nodata;
/* XXX: does not include slab overhead */ skb->truesize = size + sizeof(struct sk_buff); 包结构与数据块的总长度
/* Load the data pointers. */ skb->head = data; skb->data = data; skb->tail = data; skb->end = data + size;
/* Set up other state */ skb->len = 0; skb->cloned = 0;
atomic_set(&skb->users, 1); 设置包结构的引用计数 atomic_set(skb_datarefp(skb), 1); 设备数据区的引用计数 return skb;
nodata: skb_head_to_pool(skb); nohead: return NULL; } static __inline__ struct sk_buff *skb_head_from_pool(void) { struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
if (skb_queue_len(list)) { struct sk_buff *skb; unsigned long flags;
local_irq_save(flags); skb = __skb_dequeue(list); local_irq_restore(flags); return skb; } return NULL; } void __kfree_skb(struct sk_buff *skb) { if (skb->list) { printk(KERN_WARNING "Warning: kfree_skb passed an skb still " "on a list (from %p).\n", NET_CALLER(skb)); BUG(); }
dst_release(skb->dst); if(skb->destructor) { if (in_irq()) { printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", NET_CALLER(skb)); } skb->destructor(skb); } #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); #endif skb_headerinit(skb, NULL, 0); /* clean state */ kfree_skbmem(skb); } void kfree_skbmem(struct sk_buff *skb) { if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb))) kfree(skb->head); 当不是包副本或者副本数据区引用计数减1为零时,回收数据区
skb_head_to_pool(skb); 将包结构放入回收池 }
static __inline__ void skb_head_to_pool(struct sk_buff *skb) { struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
if (skb_queue_len(list) < sysctl_hot_list_len) { unsigned long flags;
local_irq_save(flags); __skb_queue_head(list, skb); local_irq_restore(flags);
return; } kmem_cache_free(skbuff_head_cache, skb); } static inline void skb_headerinit(void *p, kmem_cache_t *cache, unsigned long flags) 包结构构造器 { struct sk_buff *skb = p;
skb->next = NULL; skb->prev = NULL; skb->list = NULL; skb->sk = NULL; skb->stamp.tv_sec=0; /* No idea about time */ skb->dev = NULL; skb->dst = NULL; memset(skb->cb, 0, sizeof(skb->cb)); skb->pkt_type = PACKET_HOST; /* Default type */ skb->ip_summed = 0; skb->priority = 0; skb->security = 0; /* By default packets are insecure */ skb->destructor = NULL;
#ifdef CONFIG_NETFILTER skb->nfmark = skb->nfcache = 0; skb->nfct = NULL; #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; #endif #endif #ifdef CONFIG_NET_SCHED skb->tc_index = 0; #endif }
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 建立包结构副本 { struct sk_buff *n;
n = skb_head_from_pool(); if (!n) { n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; }
memcpy(n, skb, sizeof(*n)); 完整复制包结构 atomic_inc(skb_datarefp(skb)); 增加数据区的引用计数 skb->cloned = 1; 克隆标志
dst_clone(n->dst); 增加目的入口的引用计数 n->cloned = 1; n->next = n->prev = NULL; 复位副本的包队列指针 n->list = NULL; n->sk = NULL; 副本的套接字拥有者为空 atomic_set(&n->users, 1); n->destructor = NULL; #ifdef CONFIG_NETFILTER nf_conntrack_get(skb->nfct); #endif return n; }
/** * skb_copy - copy an sk_buff * @skb: buffer to copy * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data. This is used when the * caller wishes to modify the data and needs a private copy of the * data to alter. Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. */
struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 建立包的完全副本 { struct sk_buff *n;
/* * Allocate the copy buffer */
n=alloc_skb(skb->end - skb->head, gfp_mask); 分配包缓冲 if(n==NULL) return NULL;
/* Set the data pointer */ skb_reserve(n,skb->data-skb->head); /* Set the tail pointer and length */ skb_put(n,skb->len); /* Copy the bytes */ memcpy(n->head,skb->head,skb->end-skb->head); 复制包数据体 n->csum = skb->csum; copy_skb_header(n, skb); 复制包结构
return n; } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { /* * Shift between the two data areas in bytes */ unsigned long offset = new->data - old->data;
new->list=NULL; new->sk=NULL; new->dev=old->dev; new->priority=old->priority; new->protocol=old->protocol; new->dst=dst_clone(old->dst); new->h.raw=old->h.raw+offset; 设置新的传输层头指针 new->nh.raw=old->nh.raw+offset; 设置新的网络层头指针 new->mac.raw=old->mac.raw+offset; 设置新的硬件层头指针 memcpy(new->cb, old->cb, sizeof(old->cb)); new->used=old->used; atomic_set(&new->users, 1); new->pkt_type=old->pkt_type; new->stamp=old->stamp; new->destructor = NULL; new->security=old->security; #ifdef CONFIG_NETFILTER new->nfmark=old->nfmark; new->nfcache=old->nfcache; new->nfct=old->nfct; nf_conntrack_get(new->nfct); #ifdef CONFIG_NETFILTER_DEBUG new->nf_debug=old->nf_debug; #endif #endif #ifdef CONFIG_NET_SCHED new->tc_index = old->tc_index; #endif }
/** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head * @newtailroom: new free bytes at tail * @gfp_mask: allocation priority * * Make a copy of both an &sk_buff and its data and while doing so * allocate additional space. * * This is used when the caller wishes to modify the data and needs a * private copy of the data to alter as well as more space for new fields. * Returns %NULL on failure or the pointer to the buffer * on success. The returned buffer has a reference count of 1. * * You must pass %GFP_ATOMIC as the allocation priority if this function * is called from an interrupt. */
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, int gfp_mask) 将skb的包数据体复制到新包缓冲区中,前后空隙分别为newheadrom和newtailroom { struct sk_buff *n;
/* * Allocate the copy buffer */
n=alloc_skb(newheadroom + (skb->tail - skb->data) + newtailroom, gfp_mask); if(n==NULL) return NULL;
skb_reserve(n,newheadroom);
/* Set the tail pointer and length */ skb_put(n,skb->len);
/* Copy the data only. */ memcpy(n->data, skb->data, skb->len);
copy_skb_header(n, skb); return n; }
|