包缓冲的分配操作-blowingwind-ChinaUnix博客

草本植物

首页　| 　博文目录　| 　关于我

blowingwind

博客访问： 542003
博文数量： 120
博客积分： 3030
博客等级：中校
技术积分： 1445
用户组：普通用户
注册时间： 2006-03-05 01:00

文章分类

全部博文（120）

文章存档

2011年（1）

2009年（2）

2008年（32）

2007年（33）

2006年（52）

我的朋友

相关博文

包缓冲的分配操作

分类： LINUX

2008-04-08 15:43:29

包缓冲的分配操作


包缓冲的分配操作

================

(1) 包缓冲由包描述结构和数据缓冲区两部分组成, 当多个进程正在引用同一包结构时, 称为该包是共享的, 当不同的包结构指向同一数据缓冲区时, 这些包称为是克隆的, 克隆的包可以传递到不同包处理器中作不同的处理.

(2) alloc_skb(size,gfp_mask)用来分配一个数据区为size字节的包缓冲, kfree_skb(skb)用来回收一个包缓冲, 当最后一个克隆包结构被回收时, 包缓冲的数据区将被回收.

(3) 为了加速包结构的分配过程, 包结构的内存缓冲分配器使用了skb_headerinit()作为包结构的初始化函数, 此外每一CPU还设立了一个包结构队列(skb_head_pool)来作为局部分配和回收缓冲池, 最多可以容纳128(sysctl_hot_list_len)个包结构.

一些内嵌函数
------------
skb_datarefp(skb) 返回数据区引用计数变量的指针
skb_realloc_headroom(skb, nhr) 将skb的包体重新分配到头部空隙为nhr字节的包缓冲上去
skb_get(skb) 引用包结构
kfree_skb(skb) 释放包结构的引用, 当最后一个进程释放包结构时, 包结构将被回收
kfree_skb_fast(skb) 快速释放包结构引用, 包结构内容不被初始化, 最后一个进程释放包结构时, 包结构将被回收
skb_cloned(skb) 是否为克隆包
skb_shared(skb) skb是否被多个进程共享
skb_share_check(skb, gpf_mask) 当包被共享时, 建立包的克隆包, 并释放对原包的引用
skb_unshare(skb, pri) 当包被克隆时, 建立克隆包的完全拷贝, 并释放与克隆包的引用
skb_orphan(skb) 使包成为不属于任何套接字的孤包
dev_alloc_skb(length) 分配数据区长度为(lengh+16), 包体开始于16字节的包缓冲
skb_cow(skb, headroom) 当包体的前部空隙小于headroom或者是克隆包时, 重建包缓冲并释放原包引用

包缓冲由sk_buff结构描述, 包缓冲数据区由其head和end成员界定,
而包数据体则由包数据区内data和tail界定的子区域来描述,
采用这种结构可以使添加或去除包头的操作变得非常方便.

skb_put(skb,len) 在包体尾部扩展长度为len的数据块, 返回扩展块的地址,
__skb_put()为未校验版本
skb_push(skb,len) 在包体前部扩展长度为len的数据块, 返回扩展块的地址,
__skb_push()为未校验版本
skb_pull(skb,len) 去除包体前部长度为len的数据块, 返回新包体的起始地址,
__skb_pull()为未校验版本
skb_headroom(skb) 返回包体前部距离包区开始的长度
skb_tailroom(skb) 返回包体尾部距离包区结束的长度
skb_reserve(skb,len) 设置包体起始位置为包区开始的len字节
skb_trim(skb,len) 将包体截断为len字节, __skb_trim()为未校验版本

; include/linux/skbuff.h:

/* Backwards compatibility */
#define skb_realloc_headroom(skb, nhr) skb_copy_expand(skb, nhr, skb_tailroom(skb), GFP_ATOMIC)

/* Internal */
static inline atomic_t *skb_datarefp(struct sk_buff *skb)
{
return (atomic_t *)(skb->end);
}

/**
* skb_get - reference buffer
* @skb: buffer to reference
*
* Makes another reference to a socket buffer and returns a pointer
* to the buffer.
*/

static inline struct sk_buff *skb_get(struct sk_buff *skb)
{
atomic_inc(&skb->users);
return skb;
}

/*
* If users==1, we are the only owner and are can avoid redundant
* atomic change.
*/

/**
* kfree_skb - free an sk_buff
* @skb: buffer to free
*
* Drop a reference to the buffer and free it if the usage count has
* hit zero.
*/

static inline void kfree_skb(struct sk_buff *skb)
{
if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
__kfree_skb(skb); 当引用数为1或者引用数减1等于零时, 回收包缓冲
}

/* Use this if you didn't touch the skb state [for fast switching] */
static inline void kfree_skb_fast(struct sk_buff *skb)
{
if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
kfree_skbmem(skb);
}

/**
* skb_cloned - is the buffer a clone
* @skb: buffer to check
*
* Returns true if the buffer was generated with skb_clone() and is
* one of multiple shared copies of the buffer. Cloned buffers are
* shared data so must not be written to under normal circumstances.
*/

static inline int skb_cloned(struct sk_buff *skb)
{
return skb->cloned && atomic_read(skb_datarefp(skb)) != 1;
}

/**
* skb_shared - is the buffer shared
* @skb: buffer to check
*
* Returns true if more than one person has a reference to this
* buffer.
*/

static inline int skb_shared(struct sk_buff *skb)
{
return (atomic_read(&skb->users) != 1);
}

/**
* skb_share_check - check if buffer is shared and if so clone it
* @skb: buffer to check
* @pri: priority for memory allocation
*
* If the buffer is shared the buffer is cloned and the old copy
* drops a reference. A new clone with a single reference is returned.
* If the buffer is not shared the original buffer is returned. When
* being called from interrupt status or with spinlocks held pri must
* be GFP_ATOMIC.
*
* NULL is returned on a memory allocation failure.
*/

static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
{
if (skb_shared(skb)) {
struct sk_buff *nskb;
nskb = skb_clone(skb, pri);
kfree_skb(skb);
return nskb;
}
return skb;
}

/*
* Copy shared buffers into a new sk_buff. We effectively do COW on
* packets to handle cases where we have a local reader and forward
* and a couple of other messy ones. The normal one is tcpdumping
* a packet thats being forwarded.
*/

/**
* skb_unshare - make a copy of a shared buffer
* @skb: buffer to check
* @pri: priority for memory allocation
*
* If the socket buffer is a clone then this function creates a new
* copy of the data, drops a reference count on the old copy and returns
* the new copy with the reference count at 1. If the buffer is not a clone
* the original buffer is returned. When called with a spinlock held or
* from interrupt state @pri must be %GFP_ATOMIC
*
* %NULL is returned on a memory allocation failure.
*/

static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
{
struct sk_buff *nskb;
if(!skb_cloned(skb))
return skb;
nskb=skb_copy(skb, pri);
kfree_skb(skb); /* Free our shared copy */
return nskb;
}

/**
* skb_orphan - orphan a buffer
* @skb: buffer to orphan
*
* If a buffer currently has an owner then we call the owner's
* destructor function and make the @skb unowned. The buffer continues
* to exist but is no longer charged to its former owner.
*/

static inline void skb_orphan(struct sk_buff *skb)
{
if (skb->destructor)
skb->destructor(skb);
skb->destructor = NULL;
skb->sk = NULL;
}

/**
* dev_alloc_skb - allocate an skbuff for sending
* @length: length to allocate
*
* Allocate a new &sk_buff and assign it a usage count of one. The
* buffer has unspecified headroom built in. Users should allocate
* the headroom they think they need without accounting for the
* built in space. The built in space is used for optimisations.
*
* %NULL is returned in there is no free memory. Although this function
* allocates memory it can be called from an interrupt.
*/

static inline struct sk_buff *dev_alloc_skb(unsigned int length)
{
struct sk_buff *skb;

skb = alloc_skb(length+16, GFP_ATOMIC);
if (skb)
skb_reserve(skb,16);
return skb;
}

/**
* skb_cow - copy a buffer if need be
* @skb: buffer to copy
* @headroom: needed headroom
*
* If the buffer passed lacks sufficient headroom or is a clone then
* it is copied and the additional headroom made available. If there
* is no free memory %NULL is returned. The new buffer is returned if
* a copy was made (and the old one dropped a reference). The existing
* buffer is returned otherwise.
*
* This function primarily exists to avoid making two copies when making
* a writable copy of a buffer and then growing the headroom.
*/

static inline struct sk_buff *
skb_cow(struct sk_buff *skb, unsigned int headroom)
{
headroom = (headroom+15)&~15;

if ((unsigned)skb_headroom(skb) < headroom || skb_cloned(skb)) {
struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
kfree_skb(skb);
skb = skb2;
}
return skb;
}

; net/core/skbuff.c:

int sysctl_hot_list_len = 128;

static kmem_cache_t *skbuff_head_cache;

static union {
struct sk_buff_head list;
char pad[SMP_CACHE_BYTES];
} skb_head_pool[NR_CPUS]; 包结构缓冲池

void __init skb_init(void)
{
int i;

skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN,
skb_headerinit, NULL);
if (!skbuff_head_cache)
panic("cannot create skbuff cache");

for (i=0; i skb_queue_head_init(&skb_head_pool[ i ].list);
}

struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
{
struct sk_buff *skb;
u8 *data;

if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
static int count = 0;
if (++count < 5) {
printk(KERN_ERR "alloc_skb called nonatomically "
"from interrupt %p\n", NET_CALLER(size));
BUG();
}
gfp_mask &= ~__GFP_WAIT;
}

/* Get the HEAD */
skb = skb_head_from_pool();
if (skb == NULL) {
skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (skb == NULL)
goto nohead;
}

/* Get the DATA. Size must match skb_add_mtu(). */
size = ((size + 15) & ~15); 数据区长度为16字节的倍数
data = kmalloc(size + sizeof(atomic_t), gfp_mask);
if (data == NULL)
goto nodata;

/* XXX: does not include slab overhead */
skb->truesize = size + sizeof(struct sk_buff); 包结构与数据块的总长度

/* Load the data pointers. */
skb->head = data;
skb->data = data;
skb->tail = data;
skb->end = data + size;

/* Set up other state */
skb->len = 0;
skb->cloned = 0;

atomic_set(&skb->users, 1); 设置包结构的引用计数
atomic_set(skb_datarefp(skb), 1); 设备数据区的引用计数
return skb;

nodata:
skb_head_to_pool(skb);
nohead:
return NULL;
}
static __inline__ struct sk_buff *skb_head_from_pool(void)
{
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;

if (skb_queue_len(list)) {
struct sk_buff *skb;
unsigned long flags;

local_irq_save(flags);
skb = __skb_dequeue(list);
local_irq_restore(flags);
return skb;
}
return NULL;
}
void __kfree_skb(struct sk_buff *skb)
{
if (skb->list) {
printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
"on a list (from %p).\n", NET_CALLER(skb));
BUG();
}

dst_release(skb->dst);
if(skb->destructor) {
if (in_irq()) {
printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
NET_CALLER(skb));
}
skb->destructor(skb);
}
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
#endif
skb_headerinit(skb, NULL, 0); /* clean state */
kfree_skbmem(skb);
}
void kfree_skbmem(struct sk_buff *skb)
{
if (!skb->cloned || atomic_dec_and_test(skb_datarefp(skb)))
kfree(skb->head); 当不是包副本或者副本数据区引用计数减1为零时,回收数据区

skb_head_to_pool(skb); 将包结构放入回收池
}

static __inline__ void skb_head_to_pool(struct sk_buff *skb)
{
struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;

if (skb_queue_len(list) < sysctl_hot_list_len) {
unsigned long flags;

local_irq_save(flags);
__skb_queue_head(list, skb);
local_irq_restore(flags);

return;
}
kmem_cache_free(skbuff_head_cache, skb);
}
static inline void skb_headerinit(void *p, kmem_cache_t *cache,
unsigned long flags) 包结构构造器
{
struct sk_buff *skb = p;

skb->next = NULL;
skb->prev = NULL;
skb->list = NULL;
skb->sk = NULL;
skb->stamp.tv_sec=0; /* No idea about time */
skb->dev = NULL;
skb->dst = NULL;
memset(skb->cb, 0, sizeof(skb->cb));
skb->pkt_type = PACKET_HOST; /* Default type */
skb->ip_summed = 0;
skb->priority = 0;
skb->security = 0; /* By default packets are insecure */
skb->destructor = NULL;

#ifdef CONFIG_NETFILTER
skb->nfmark = skb->nfcache = 0;
skb->nfct = NULL;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
#endif
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#endif
}

struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 建立包结构副本
{
struct sk_buff *n;

n = skb_head_from_pool();
if (!n) {
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
}

memcpy(n, skb, sizeof(*n)); 完整复制包结构
atomic_inc(skb_datarefp(skb)); 增加数据区的引用计数
skb->cloned = 1; 克隆标志

dst_clone(n->dst); 增加目的入口的引用计数
n->cloned = 1;
n->next = n->prev = NULL; 复位副本的包队列指针
n->list = NULL;
n->sk = NULL; 副本的套接字拥有者为空
atomic_set(&n->users, 1);
n->destructor = NULL;
#ifdef CONFIG_NETFILTER
nf_conntrack_get(skb->nfct);
#endif
return n;
}

/**
* skb_copy - copy an sk_buff
* @skb: buffer to copy
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and its data. This is used when the
* caller wishes to modify the data and needs a private copy of the
* data to alter. Returns %NULL on failure or the pointer to the buffer
* on success. The returned buffer has a reference count of 1.
*
* You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt.
*/

struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 建立包的完全副本
{
struct sk_buff *n;

/*
* Allocate the copy buffer
*/

n=alloc_skb(skb->end - skb->head, gfp_mask); 分配包缓冲
if(n==NULL)
return NULL;

/* Set the data pointer */
skb_reserve(n,skb->data-skb->head);
/* Set the tail pointer and length */
skb_put(n,skb->len);
/* Copy the bytes */
memcpy(n->head,skb->head,skb->end-skb->head); 复制包数据体
n->csum = skb->csum;
copy_skb_header(n, skb); 复制包结构

return n;
}
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
/*
* Shift between the two data areas in bytes
*/
unsigned long offset = new->data - old->data;

new->list=NULL;
new->sk=NULL;
new->dev=old->dev;
new->priority=old->priority;
new->protocol=old->protocol;
new->dst=dst_clone(old->dst);
new->h.raw=old->h.raw+offset; 设置新的传输层头指针
new->nh.raw=old->nh.raw+offset; 设置新的网络层头指针
new->mac.raw=old->mac.raw+offset; 设置新的硬件层头指针
memcpy(new->cb, old->cb, sizeof(old->cb));
new->used=old->used;
atomic_set(&new->users, 1);
new->pkt_type=old->pkt_type;
new->stamp=old->stamp;
new->destructor = NULL;
new->security=old->security;
#ifdef CONFIG_NETFILTER
new->nfmark=old->nfmark;
new->nfcache=old->nfcache;
new->nfct=old->nfct;
nf_conntrack_get(new->nfct);
#ifdef CONFIG_NETFILTER_DEBUG
new->nf_debug=old->nf_debug;
#endif
#endif
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#endif
}

/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
* @newheadroom: new free bytes at head
* @newtailroom: new free bytes at tail
* @gfp_mask: allocation priority
*
* Make a copy of both an &sk_buff and its data and while doing so
* allocate additional space.
*
* This is used when the caller wishes to modify the data and needs a
* private copy of the data to alter as well as more space for new fields.
* Returns %NULL on failure or the pointer to the buffer
* on success. The returned buffer has a reference count of 1.
*
* You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt.
*/

struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom,
int newtailroom,
int gfp_mask) 将skb的包数据体复制到新包缓冲区中,前后空隙分别为newheadrom和newtailroom
{
struct sk_buff *n;

/*
* Allocate the copy buffer
*/

n=alloc_skb(newheadroom + (skb->tail - skb->data) + newtailroom,
gfp_mask);
if(n==NULL)
return NULL;

skb_reserve(n,newheadroom);

/* Set the tail pointer and length */
skb_put(n,skb->len);

/* Copy the data only. */
memcpy(n->data, skb->data, skb->len);

copy_skb_header(n, skb);
return n;
}

阅读(2219) | 评论(0) | 转发(1) |

上一篇：ip_route_input 相关的理解

下一篇：ip包到达后协议栈的基本处理,版本2.4(原创)

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6