主要源文件:linux-2.6.37/ include/ linux/ skbuff.h
linux-2.6.37/ include/ linux/ skbuff.c
一些相关数据结构
-
在include/linux/ktime.h中,
-
union ktime {
-
s64 tv64 ;
-
#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
-
struct {
-
# ifdef __BIG_ENDIAN
-
s32 sec , nsec ;
-
#else
-
s32 nsec , sec ;
-
#endif
-
} tv ;
-
#endif
-
} ;
-
-
typedef union ktime ktime_t ;
-
-
struct sk_buff_head {
-
-
struct sk_buff *next;
-
struct sk_buff *prev;
-
__u32 qlen;
-
spinlock_t lock;
-
};
-
-
-
# if BITS_PER_LONG > 32
-
# define NET_SKBUFF_DATA_USES_OFFSET 1
-
# endif
-
-
# ifdef NET_SKBUFF_DATA_USES_OFFSET
-
typedef unsigned int sk_buff_data_t ;
-
# else
-
typedef unsigned char *sk_buff_data_t ;
-
#endif
在include/linux/ktime.h中,
union ktime {
s64 tv64 ;
#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
struct {
# ifdef __BIG_ENDIAN
s32 sec , nsec ;
#else
s32 nsec , sec ;
#endif
} tv ;
#endif
} ;
typedef union ktime ktime_t ;
struct sk_buff_head {
/* These two members must be first. */
struct sk_buff *next;
struct sk_buff *prev;
__u32 qlen;
spinlock_t lock;
};
/* 关于sk_buff_data_t */
# if BITS_PER_LONG > 32
# define NET_SKBUFF_DATA_USES_OFFSET 1
# endif
# ifdef NET_SKBUFF_DATA_USES_OFFSET
typedef unsigned int sk_buff_data_t ;
# else
typedef unsigned char *sk_buff_data_t ;
#endif
==================================================================================================
sk_buff结构体
-
-
struct sk_buff {
-
-
struct sk_buff *next ;
-
struct sk_buff *prev ;
-
-
ktime_t tstamp ;
-
-
struct sock *sk ;
-
-
-
-
-
struct net_device *dev ;
-
-
-
-
-
char cb[48] __aligned (8) ;
-
...
-
-
-
-
-
unsigned int len , data_len ;
-
-
-
__be16 protocol ;
-
...
-
-
-
-
unsigned char *head , *data ;
-
...
-
-
sk_buff_data_t transport_header ;
-
sk_buff_data_t network_header ;
-
sk_buff_data_t mac_header ;
-
-
sk_buff_data_t tail ;
-
sk_buff_data_end ;
-
atomic_t users ;
-
-
-
unsigned int truesize ;
-
}
-
-
-
-
-
struct skb_shared_info {
-
-
-
-
unsigned short nr_frags;
-
-
...
-
-
-
-
-
-
struct sk_buff *frag_list ;
-
-
-
-
-
-
atomic_t dataref ;
-
-
-
-
-
-
skb_frag_t frags[MAX_SKB_FRAGS] ;
-
}
-
-
-
-
-
#define MAX_SKB_FRAGS (65536 / PAGE_SIZE + 2 )
-
-
typedef struct skb_frag_struct skb_frag_t ;
-
struct skb_frag_struct {
-
struct page *page ;
-
-
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-
__u32 page_offset ;
-
__u32 size;
-
#else
-
__u16 page_offset ;
-
__u16 size ;
-
#endif
-
};
/* struct sk_buff - socket buffer */
struct sk_buff {
/* These two members must be first */
struct sk_buff *next ; /* Next buffer in list */
struct sk_buff *prev ; /* Previous buffer in list */
ktime_t tstamp ; /* Time we arrived,记录接收或发送报文的时间戳*/
struct sock *sk ; /* Socket we are owned by */
/* Device we arrived on / are leaving by
* 通过该设备接收或发送,记录网络接口的信息和完成操作
*/
struct net_device *dev ;
/* This is the control buffer. It is free to use for every
* layer. Please put your private variables there.
*/
char cb[48] __aligned (8) ;
...
/* data_len为分页数据所包含的全部报文长度
* len为某时刻的报文总长度
* 那么,线性数据的长度为:skb->len - skb->data_len
*/
unsigned int len , data_len ;
/* 保存了下一个协议层的信息,在处理报文时由当前协议层设置 */
__be16 protocol ;
...
/* head指向线性数据区的开始
* data指向驻留线性数据区中数据的起始位置
*/
unsigned char *head , *data ;
...
/* 协议头表示 */
sk_buff_data_t transport_header ; /* 传输层协议头 */
sk_buff_data_t network_header ; /* 网络层协议头 */
sk_buff_data_t mac_header ; /* 链路层协议头 */
sk_buff_data_t tail ; /* 指向驻留在线性数据区的最后一字节数据*/
sk_buff_data_end ; /* 指向线性数据区的结尾,确保不超出可用存储缓冲区 */
atomic_t users ; /* 引用该sk_buff的数量*/
/* 该缓冲区所分配的总内存,包括sk_buff结构大小 + 数据块大小 (应该不包括分页大小?)*/
unsigned int truesize ;
}
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
struct skb_shared_info {
/* number of fragments belonged to this sk_buff
* 此sk_buff分页段的数目,它表示frags[]数组的元素数量,该数组包含sk_buff的分页数据
*/
unsigned short nr_frags;
...
/* 指向其分段列表,此sk_buff的总长度为frag_list链表中每个分段长度(skb->len)的和,
* 再加上原始的sk_buff的长度
* 通过此域可进行报文分段!!
*/
struct sk_buff *frag_list ;
/*
* Warning : all fields before dataref are cleared in __alloc_skb()
* 此sk_buff被引用的次数
*/
atomic_t dataref ;
/*
* must be last field
* 分段的数组,包含sk_buff的分页数据
*/
skb_frag_t frags[MAX_SKB_FRAGS] ;
}
/* To allow 64K frame to be packed as single skb without frag_list
* 允许小于64K的数据不用分段,即不适用frag_list
*/
#define MAX_SKB_FRAGS (65536 / PAGE_SIZE + 2 )
typedef struct skb_frag_struct skb_frag_t ;
struct skb_frag_struct {
struct page *page ; /* 该页的虚拟地可用page_address()得到*/
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
__u32 page_offset ;
__u32 size;
#else
__u16 page_offset ;
__u16 size ;
#endif
};
注意:分段和分页是两个不同的概念。
分页,即使用非线性数据区,非线性区的含义是包含在sk_buff中的数据长度超过了线性数据区
所能容纳的界限(一般为一页)。包含在非线性数据区中的数据是sk_buff结构中end域所指数据
的连续,全部数据的总长度包含在线性和非线性数据区中。
sk_buff数据的总长度存储在len域,非线性数据的长度存储在sk_buff的data_len域。
分页的实现:
在skb_shared_info中,skb_frag_t frags[MAX_SKB_FRAGS]
通过分页,使得一个sk_buff最多能存:64K的数据(非线性区)+ 一页数据(线性区)。
当DMA支持物理分散页的分散-聚集操作时,才有可能存在分页数据区。如果支持,就为线性数据区
分配一页的数据,其他数据则保存在分页数据区中,随后数据的每个sk_buff分段都会分配一页的数据。
如果不支持,就尝试在线性数据区为整个sk_buff数据分配连续的物理内存。
分段,主要指IP分段的实现。当一个数据报过大时,需要分为多个。即一个sk_buff分为多个
sk_buff,这些sk_buff形成一个链表。
分段的实现:
在skb_shared_info中,struct sk_buff *frag_list
通过frag_list可以遍历分段列表。
======================================================================================================
sk_buff的操作
1. alloc_skb
-
static inline struct sk_buff *alloc_skb( unsigned int size ,
-
gfp_t priority)
-
{
-
return __alloc_skb(size , priority , 0 , NUMA_NO_NONE) ;
-
}
static inline struct sk_buff *alloc_skb( unsigned int size ,
gfp_t priority)
{
return __alloc_skb(size , priority , 0 , NUMA_NO_NONE) ;
}
size是数据包的大小。
The returned buffer has no headroom and a tail room of size bytes.
2. skb_reserve
用来为协议头预留空间。拓展head room。
-
-
-
-
-
-
-
-
-
-
static inline void skb_reserve( struct sk_buff *skb , int len )
-
{
-
skb->data += len ;
-
skb->tail += len ;
-
}
/**
* skb_reserve - ajust headroom
* @skb : buffer to alter
* @len : bytes to move
*
* Increase the headroom of an empty &sk_buff by reducing the tail
* room. This is only allowed for an empty buffer.
*/
static inline void skb_reserve( struct sk_buff *skb , int len )
{
skb->data += len ;
skb->tail += len ;
}
此时,head room 大小为len,data room 大小0,tail room大小为原长 - len。
当构造一个报文时,要为协议头预留最大可能的空间。
如,MAX_TCP_HEADER = MAX_TCP_HEADER + MAX_IP_HEADER + LL_MAX_HEADER
3. skb_put
用来拓展data room。当要向data room增加数据时,先增加data room的可使用空间。
-
-
-
-
-
-
-
-
-
-
-
unsigned char *skb_put( struct sk_buff *skb , unsigned int len )
-
{
-
unsigned char *tmp = skb_tail_pointer(skb) ;
-
-
SKB_LINEAR_ASSERT(skb) ;
-
skb->tail += len ;
-
skb->len += len ;
-
if (unlikely(skb->tail > skb->end ))
-
skb_over_panic(skb , len , __builtin_return_address(0)) ;
-
return tmp ;
-
}
/**
* skb_put - add data to a buffer
* @skb : buffer to use
* @len : amount of data to add
*
* This function extends the used data area of the buffer. If this would
* exceed the total buffer size the kernel will panic. A pointer to the
* first byte of the extra data is returned.
*/
unsigned char *skb_put( struct sk_buff *skb , unsigned int len )
{
unsigned char *tmp = skb_tail_pointer(skb) ;
/* 如果存在非线性区,即data_len > 0 ,则报bug */
SKB_LINEAR_ASSERT(skb) ;
skb->tail += len ;
skb->len += len ;
if (unlikely(skb->tail > skb->end ))
skb_over_panic(skb , len , __builtin_return_address(0)) ;
return tmp ;
}
4. skb_push
用来拓展data room。和skb_put不同的是,它不是向tail room扩展,而是向head room扩展。
-
-
-
-
-
-
-
-
-
-
-
unsigned char *skb_push( struct sk_buff *skb , unsigned int len )
-
{
-
skb->data -= len ;
-
skb->len += len ;
-
if ( unlikely(skb->data < skb->head ) )
-
skb_under_panic(skb , len , __builtin_return_address(0)) ;
-
return skb->data ;
-
}
/**
* skb_push - add data to the start of a buffer
* @skb : buffer to use
* @len : amount of data to add
*
* This function extends the used data area of the buffer at the buffer
* start. If this would exceed the total buffer headroom the kernel will
* panic. A pointer to the first byte of the extra data is returned.
*/
unsigned char *skb_push( struct sk_buff *skb , unsigned int len )
{
skb->data -= len ;
skb->len += len ;
if ( unlikely(skb->data < skb->head ) )
skb_under_panic(skb , len , __builtin_return_address(0)) ;
return skb->data ;
}
注意:
发送报文一般要调用alloc_skb、skb_reserve、skb_put、skb_push。
发送报文时,在不同协议层处理数据时,该数据要添加相应的协议头。
因此,最高层添加数据和自身的协议头。alloc_skb用来申请一个sk_buff。
skb_reserve用来创建头空间。skb_put用来创建用户数据空间,用户数据复制到sk->data
指向的数据区。接下来是在用户数据的前面加上协议头,使用skb_push。
5. skb_pull
在报文到达时访问协议头,接收报文时调用。使head room向data room扩展。
-
-
-
-
-
-
-
-
-
-
-
unsigned char *skb_pull( struct sk_buff *skb , unsigned int len )
-
{
-
return skb_pull_inline(skb , len ) ;
-
}
-
-
static inline unsigned char *skb_pull_inline(struct sk_buff *skb , unsigned int len)
-
{
-
return unlikely(len > skb->len ) ? NULL : __skb_pull(skb , len) ;
-
}
-
-
static inline unsigned char *__skb_pull(struct sk_buff *skb , unsigned int len)
-
{
-
skb->len -= len ;
-
BUG_ON(skb->len < skb->data_len ) ;
-
return skb->data += len ;
-
}
/**
* skb_pull - remove data from the start of a buffer
* @skb : buffer to use
* @len : amount of data to remove
*
* This function removes data from the start of a buffer, returning the memory to
* the headroom. A pointer to the next data in the buffer is returned. Once the
* data has been pulled future pushes will overwrite the old data.
*/
unsigned char *skb_pull( struct sk_buff *skb , unsigned int len )
{
return skb_pull_inline(skb , len ) ;
}
static inline unsigned char *skb_pull_inline(struct sk_buff *skb , unsigned int len)
{
return unlikely(len > skb->len ) ? NULL : __skb_pull(skb , len) ;
}
static inline unsigned char *__skb_pull(struct sk_buff *skb , unsigned int len)
{
skb->len -= len ;
BUG_ON(skb->len < skb->data_len ) ;
return skb->data += len ;
}
====================================================================================================
-
# ifdef NET_SKBUFF_DATA_USES_OFFSET
-
static inline unsigned char *skb_transport_header(const struct sk_buff skb)
-
{
-
return skb->head + skb->transport_header ;
-
}
-
-
static inline void skb_reset_transport_header(struct sk_buff *skb)
-
{
-
skb->transport_header = skb->data - skb->head ;
-
}
-
# else
-
-
static inline unsigned char *skb_transport_header(const struct sk_buff skb)
-
{
-
return skb->transport_header ;
-
}
-
-
static inline void skb_reset_transport_header(struct sk_buff *skb)
-
{
-
skb->transport_header = skb->data ;
-
}
-
-
static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
-
{
-
return (struct tcphdr *) skb_transport_header(skb) ;
-
}
# ifdef NET_SKBUFF_DATA_USES_OFFSET
static inline unsigned char *skb_transport_header(const struct sk_buff skb)
{
return skb->head + skb->transport_header ;
}
static inline void skb_reset_transport_header(struct sk_buff *skb)
{
skb->transport_header = skb->data - skb->head ;
}
# else
static inline unsigned char *skb_transport_header(const struct sk_buff skb)
{
return skb->transport_header ;
}
static inline void skb_reset_transport_header(struct sk_buff *skb)
{
skb->transport_header = skb->data ;
}
static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
{
return (struct tcphdr *) skb_transport_header(skb) ;
}
sk_buff中tcp协议头的表示:
sk_buff_data_t transport_header ;
用函数tcp_hdr(skb)来获取。
当tcp协议头地址有变化时,用skb_reset_transport_header(skb)来更新transport_header。
===============================================================================================
向下遍历协议层(即发送数据包)时,构建协议头
1. 添加TCP头
TCP调用tcp_transmit_skb()来为TCP数据段构建一个TCP头。
首先计算TCP头的长度,要考虑当前TCP连接所使用的选项。一旦完成该操作,就需要调用
skb_push()来为TCP头分配空间。
-
-
-
-
-
-
-
-
-
-
-
static int tcp_transmit_skb(struct sock *sk , struct sk_buff *skb , int clone_it ,
-
gfp_t gfp_mask)
-
{
-
...
-
struct inet_sock *inet = inet_sk(sk) ;
-
unsigned tcp_option_size, tcp_header_size ;
-
struct tcphdr *th ;
-
...
-
tcp_header_size = tcp_option_size + sizeof(struct tcphdr) ;
-
...
-
skb_push(skb , tcp_header_size) ;
-
skb_reset_transport_header(skb) ;
-
...
-
-
th = tcp_hdr(skb) ;
-
th->source = inet->inet_sport ;
-
th->dest = inet->inet_dport ;
-
...
-
}
/* This routine actually transmit TCP packets queued in by tcp_do_sendmsg().
* This is used by both the initial transmission and possible later retransmissions.
* All SKB's seen here are completely headerless. It is our job to build the TCP
* header, and pass the packet down to IP so it can do the same plus pass the
* packet off to the device.
*
* We are working here with either a clone of the original SKB, or a fresh unique
* copy made by the retransmit engine.
*/
static int tcp_transmit_skb(struct sock *sk , struct sk_buff *skb , int clone_it ,
gfp_t gfp_mask)
{
...
struct inet_sock *inet = inet_sk(sk) ;
unsigned tcp_option_size, tcp_header_size ;
struct tcphdr *th ;
...
tcp_header_size = tcp_option_size + sizeof(struct tcphdr) ;
...
skb_push(skb , tcp_header_size) ;
skb_reset_transport_header(skb) ;
...
/* Build TCP header and checksum it. */
th = tcp_hdr(skb) ;
th->source = inet->inet_sport ;
th->dest = inet->inet_dport ;
...
}
2. 添加IP头
ip_build_and_send_pkt()构造报文的IP头,并发送给链路层。
-
-
-
-
int ip_build_and_sent_pkt(struct sk_buff *skb , struct sock *sk ,
-
__be32 saddr , __be32 daddr , struct ip_options *opt)
-
{
-
struct inet_sock *inet = inet_sk(sk) ;
-
...
-
struct iphdr *iph ;
-
-
skb_push(skb , sizeof(struct iphdr) + (opt ? opt->optlen : 0) ) ;
-
skb_reset_network_header(skb) ;
-
iph = ip_hdr(skb) ;
-
iph->version = 4 ;
-
iph->ihl = 5 ;
-
iph->tos = inet->tos ;
-
...
-
}
/*
* Add an ip header to a sk_buff and sent it out.
*/
int ip_build_and_sent_pkt(struct sk_buff *skb , struct sock *sk ,
__be32 saddr , __be32 daddr , struct ip_options *opt)
{
struct inet_sock *inet = inet_sk(sk) ;
...
struct iphdr *iph ;
/* Build the IP header. */
skb_push(skb , sizeof(struct iphdr) + (opt ? opt->optlen : 0) ) ;
skb_reset_network_header(skb) ;
iph = ip_hdr(skb) ;
iph->version = 4 ;
iph->ihl = 5 ;
iph->tos = inet->tos ;
...
}
3. 添加链路层头
eth_header构造以太网帧协议头。
-
#define ETH_HLEN 14
-
-
-
-
-
-
-
-
-
-
-
-
-
int eth_header(struct sk_buff *skb , struct net_device *dev ,
-
unsigned short type , const void *daddr , const void *saddr,
-
unsigned len)
-
{
-
struct ethhdr *eth = (struct ethhdr *) skb_push(skb , ETH_HLEN) ;
-
...
-
}
#define ETH_HLEN 14
/**
* eth_header - create the Ethernet header
* @skb : buffer to alter
* @dev : source device
* @type : Ethernet type field
* @daddr : destination address
* @saddr : source address
* @len : packet length (<= skb->len)
*
* Set the protocal type. For a packet of type ETH_P_802_3/2 we put
* the length in here instead.
*/
int eth_header(struct sk_buff *skb , struct net_device *dev ,
unsigned short type , const void *daddr , const void *saddr,
unsigned len)
{
struct ethhdr *eth = (struct ethhdr *) skb_push(skb , ETH_HLEN) ;
...
}
=======================================================================================================
向上遍历协议层(接收数据包)时,解析协议头
1. 解析以太网头
当新报文到达时,要为新报文分配一个新的sk_buff,其大小等于报文的长度。sk_buff
的data域指向报文的起始位置(以太网头)。使用skb_pull来提取不同的协议层头。
该例程在sk_buff到IP backlog队列排队之前完成。
-
-
-
-
-
-
-
-
-
-
__be16 eth_type_trans(struct sk_buff *skb , struct net_device *dev )
-
{
-
struct ethhdr *eth ;
-
skb->dev = dev ;
-
skb_reset_mac_header(skb) ;
-
skb_pull_inline(skb , ETH_HLEN) ;
-
eth = eth_hdr(skb) ;
-
...
-
}
/**
* eth_type_trans - determine the packet's protocol ID.
* @skb : received socket data
* @dev : receiving network device
*
* The rule here is that we
* assume 802.3 if the type field is short enough to be a length.
* This is normal practice and works for any 'now in use' protocol.
*/
__be16 eth_type_trans(struct sk_buff *skb , struct net_device *dev )
{
struct ethhdr *eth ;
skb->dev = dev ;
skb_reset_mac_header(skb) ; /* 更新mac_header */
skb_pull_inline(skb , ETH_HLEN) ; /* 此后data指向IP头 */
eth = eth_hdr(skb) ;
...
}
2. 解析IP头
现在sk_buff处于IP backlog队列中,由netif_receive_skb()负责处理,该函数将sk_buff
从backlog队列中取出。
netif_receive_skb() 接收数据包得主要处理函数。
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
int netif_receive_skb(struct sk_buff *skb)
/**
* netif_receive_skb - process receive buffer from network
* @skb : buffer to process
* netif_receive_skb() is the main receive data processing function.
* It always succeeds. The buffer may be dropped during processing
* for congestion control or by the protocol layers.
*
* This function may only be called from softirq context and interrupts
* should be enabled.
*
* Return values (usually ignored) :
* NET_RX_SUCCESS : no congestion
* NET_RX_DROP : packet was dropped
*/
int netif_receive_skb(struct sk_buff *skb)
3. 解析tcp头
网络层处理完报文,在将data指针指向传输层起始位置,并更新transport_header后,
将报文递给传输层,这些工作有ip_local_deliver_finish()来完成。
-
static int ip_local_deliver_finish(struct sk_buff *skb)
-
{
-
...
-
__skb_pull(skb , ip_hdrlen(skb)) ;
-
skb_reset_transport_header(skb) ;
-
...
-
}
-
-
static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
-
{
-
return ip_hdr(skb)->ihl * 4 ;
-
}
static int ip_local_deliver_finish(struct sk_buff *skb)
{
...
__skb_pull(skb , ip_hdrlen(skb)) ;
skb_reset_transport_header(skb) ;
...
}
static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
{
return ip_hdr(skb)->ihl * 4 ;
}
传输层调用tcp_v4_do_rcv()处理传输层头报文。如果连接已建立,并且TCP报文中有数据,
就调用skb_copy_datagram_iovec()将从skb->data偏移tcp_header_len开始的数据复制给
用户应用程序。如果由于某些原因不能复制数据给用户应用程序,就将sk_buff的data指针
向前移动tcp_header_len,再将其发往套接字的接受队列排队。