Chinaunix首页 | 论坛 | 博客
  • 博客访问: 136196
  • 博文数量: 38
  • 博客积分: 2510
  • 博客等级: 少校
  • 技术积分: 376
  • 用 户 组: 普通用户
  • 注册时间: 2010-05-07 22:44
文章分类
文章存档

2010年(38)

我的朋友

分类: LINUX

2010-06-01 23:36:15

首先了解网桥处理包遵循着以下几条原则:
1 在一个接口上接收到的包不会再在那个接口上发送这个数据包。
2 每个接收到的数据包都要学习其源MAC 地址。
3 如果数据包是多播包或广播包,则要在同一个网段中除了接收端口外的其他所有端口
发送这个数据包,如果上层协议栈对多播包感兴趣,则需要把数据包提交给上层协议栈。
4 如果数据包的目的MAC 地址不能在CAM 表中找到,则要在同一个网段中除了接收端
口外的其他所有端口发送这个数据包。
5 如果能够在CAM 表中查询到目的MAC 地址,则在特定的端口上发送这个数据包,如
果发送端口和接收端口是同一端口,则不发送。
网桥以混杂方式工作,它接收与之连接的所有LAN 传送的每一帧。当一帧到达时,网
桥必须决定将其丢弃还是转发。如果要转发,则必须决定发往哪个LAN。
这需要通过查询网桥中一张大型散列表里的目的地址而作出决定。该表可列出每个可能
的目的地,以及它属于哪一条输出线路(LAN)。在插入网桥之初,所有的散列表均为空。由
于网桥不知道任何目的地的位置,因而采用扩散算法(flooding algorithm):把每个到来的、
目的地不明的帧输出到连在此网桥的所有LAN 中(除了发送该帧的LAN)。随着时间的推移,
网桥将了解每个目的地的位置。一旦知道了目的地位置,发往该处的帧就只放到适当的LAN
上,而不再散发。
网桥采用的算法是逆向学习法(backward learning)。网桥按混杂的方式工作,故它能看见
所连接的任一LAN 上传送的帧。查看源地址即可知道在哪个LAN 上可访问哪台机器,于是
在散列表中添上一项。
建立一个逻辑网段之后,我们还需要为这个网段分配特定的端口。在Linux 中,一个端
口实际上就是一个物理网卡。网桥的每个物理网卡作为一个端口,运行于混杂模式,而且
是在链路层工作,所以就不需要IP 了。
在Linux 内核网桥的实现中,一个逻辑网段用net_bridge 结构体表示。

一个逻辑网段需要保留的信息有:
net_bridge 结构
struct net_bridge
{
spinlock_t lock;
struct list_head port_list;//网桥内的端口列表(即物理网卡列表)的表头

//一个逻辑网段中可以具有很多个端口,所有的端口都挂在以port_list 为链表头的链表上。

//list_head 在linux/list.h 中定义,是一个简单的双向链表的数据结构

struct net_device *dev;// //网桥都会有一个虚拟设备用来进行管理,就是它了。

即操作中的br0
struct net_device_stats statistics;// 本逻辑网段虚拟网卡的统计数据

//按照Linux 网卡驱动的接口,一个网卡的统计信息是由每个网卡的私有数据处理的。一般

的写法是用dev‐>priv 来指向每个网卡的统计数据。网卡的get_stats 方法就是用来读取统计
数据。
spinlock_t hash_lock;// //hash 表的读写锁,这个表就是用于存放桥的

MAC‐PORT 对应表
struct hlist_head hash[BR_HASH_SIZE];// 本网段中CAM 表

(hash[BR_HASH_SIZE]
//CAM 表中的每个项用net_bridge_fdb_entry 结构体代表

//一个逻辑网段中的所有表项形成一个CAM 表,他们之间的组织关系是一个HASH 链

表。HASH 链的个数为BR_HASH_SIZE(256)。
struct list_head age_list; // //网桥链表

unsigned long feature_mask;
/* STP *///一个网段的生成树(STP)信息

bridge_id designated_root;
bridge_id bridge_id;
u32 root_path_cost;
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
unsigned long bridge_max_age;
unsigned long ageing_time;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
//////STP Info End/////////////////////////////////////////////////////////////////

u8 group_addr[ETH_ALEN];
u16 root_port;
enum {
BR_NO_STP, /* no spanning tree */
BR_KERNEL_STP, /* old STP in kernel */
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
unsigned char topology_change;
unsigned char topology_change_detected;
struct timer_list hello_timer;
struct timer_list tcn_timer;
struct timer_list topology_change_timer;
struct timer_list gc_timer;
struct kobject ifobj;
};
net_bridge_port 结构
接收该数据包的网桥的端口的相关信息.
这个结构对应了内核缓存中的skb->dev->br_port;
struct net_bridge_port
{
struct net_bridge *br;//本端口所属的逻辑网段(br) //当前端口(接收数据包这个)

所在的桥组
struct net_device *dev;//本端口所指向的物理网卡(dev)

struct list_head list;// //网桥端口组中的下一个端口

/* STP */用于生成树管理的信息
u8 priority;
u8 state; //端口所在的网桥的状态

u16 port_no;// 本端口在网桥中的编号(port_no)

unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
};
net_bridge_fdb_entry 结构
struct net_bridge_fdb_entry
{
struct hlist_node hlist;
struct net_bridge_port *dst;// 此项所对应的端口(dst)

struct rcu_head rcu;
atomic_t use_count; //此项当前的引用计数(use_count)

unsigned long ageing_timer;// 处理MAC 超时(ageing_timer)

mac_addr addr; //MAC 地址(addr)

unsigned char is_local;// 是否是本机的MAC 地址(is_local)

unsigned char is_static;// 是否是静态MAC 地址(is_static)

};
事实上,整个桥的处理,就是一个根据源地址学习(写表),根据目的地址确定发送端
口(查表)的过程。当然,还有一个插曲,如处理发给本机的数据,需要交到上层,如果开
启了STP,还要处理STP 协议
net_device 结构
本逻辑网段用于和外部通信的虚拟网络设备(dev)
Linux 网桥可以在网桥上为每个逻辑网段配置一个IP,用于和外部通信。实际上这个IP
不是配置在一个特定的物理网卡上面, 而是建立一个虚拟的网卡,虚拟网卡可以附在每个
同一逻辑网段的物理网卡上,让这个网卡可以象所有的物理网卡一样工作。从而使网桥可以
和外部通信。(因为在物理网卡的中断处理程序中,如果定义了网桥或网桥模块,则克隆skb,
然后进行处理。)
代码结构
网桥代码分为两个部分,一部分是内核部分的模块,一个是用户空间的配置工具。
用户空间的配置工具实际上是对内核模块的调用。
用户空间的配置工具核心命令流程
Brcl addbr br0/ br_add_bridge 函数
brctl.c==􀃎 brctl_cmd.c==􀃎 libbridge_if.c(br_add_bridge)
int br_add_bridge(const char *brname)
{
int ret;
#ifdef SIOCBRADDBR
// br_socket_fd 在libbridge_init.c 中定义,是网桥模块的句柄。

ret = ioctl(br_socket_fd, SIOCBRADDBR, brname);// 系统调用, 对应网桥模块的

br_ioctl_deviceless_stub,其传入的参数是添加网桥. SIOCBRADDBR
if (ret < 0)
#endif
{
char _br[IFNAMSIZ];
unsigned long arg[3]
= { BRCTL_ADD_BRIDGE, (unsigned long) _br };
strncpy(_br, brname, IFNAMSIZ);
ret = ioctl(br_socket_fd, SIOCSIFBR, arg); // 系统调用, 对应网桥模块的

br_ioctl_deviceless_stub,其传入的参数是SIOCSIFBR.
}
return ret < 0 ? errno : 0;
}
Bridge 模块定义函数(br.c)
网桥模块的创建与清理功能。
module_init(br_init)
module_exit(br_deinit)
MODULE_LICENSE("GPL");
MODULE_VERSION(BR_VERSION);
模块初始化(br_init)(br.c)
&#1048708; static int __init br_init(void)
&#1048715; br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv);
&#1048715; err = br_fdb_init(); /* br_fdb.c */
int __init br_fdb_init(void)
{
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
sizeof(struct net_bridge_fdb_entry),
0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!br_fdb_cache)
return ‐ENOMEM;
get_random_bytes(&fdb_salt, sizeof(fdb_salt));
return 0;
}
&#1048715; err = br_netfilter_init();
&#1048715; err = register_netdevice_notifier(&br_device_notifier);
&#1048715; err = br_netlink_init();
&#1048715; brioctl_set(br_ioctl_deviceless_stub);
&#1048715; br_handle_frame_hook = br_handle_frame; /*网桥处理函数*/ /* br_input.c */
&#1048715; br_fdb_get_hook = br_fdb_get;
&#1048715; br_fdb_put_hook = br_fdb_put;
模块的清理(br_deinit)(br.c)
&#1048708; static void __exit br_deinit(void)
模块控制函数(br_ioctl_deviceless_stub)(br_ioctl.c)
int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
{
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
return old_deviceless(uarg);
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (copy_from_user(buf, uarg, IFNAMSIZ))
return -EFAULT;
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
return br_add_bridge(buf);
return br_del_bridge(buf);
}
}
return -EOPNOTSUPP;
}
br_add_bridge 添加网桥的功能函数(br_if.c)
int br_add_bridge(const char *name)
{
struct net_device *dev;
int ret;
dev = new_bridge_dev(name);
if (!dev)
return -ENOMEM;
rtnl_lock();
if (strchr(dev->name, '%')) {
ret = dev_alloc_name(dev, dev->name);
if (ret < 0) {
free_netdev(dev);
goto out;
}
}
ret = register_netdevice(dev);//注册网络设备

if (ret)
goto out;
ret = br_sysfs_addbr(dev);
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;
}
new_bridge_dev(br_if.c)主要是设置网桥的参数
网桥依附于一个网络设备中。网桥数据作为这个虚拟网络设备的私有数据而存在。
static struct net_device *new_bridge_dev(const char *name)
{
struct net_bridge *br;
struct net_device *dev;
dev = alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
if (!dev)
return NULL;
br = netdev_priv(dev);
br->dev = dev;
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features;
br->stp_enabled = BR_NO_STP;
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0;
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0;
br->topology_change_detected = 0;
br->ageing_time = 300 * HZ;
INIT_LIST_HEAD(&br->age_list);
br_stp_timer_init(br);
return dev;
}

单元流程
调用入口netif_receive_skb/handle_bridge(dev.c)
在src/net/core/dev.c 的int netif_receive_skb(struct sk_buff *skb)中:
//现在看到的是有数据进入的网卡,这个网卡可能是网桥的其中一个端口(网卡),也能不
是网桥的其中的一个端口(网卡),当这个网卡的中端处理函数处理,并将接收到的网路包
向链路层传送的时候,就会执行netif_receive_skb,网桥就在这里触发。

netif_receive_skb (dev.c)
skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);//

if (!skb)//如果skb==NULL 就goto out;

goto out;
/*
* Called via br_handle_frame_hook.
* Return NULL if skb is handled
* note: already called with rcu_read_lock (preempt_disabled)
*/

/*//通过查看系统中是否配置了网桥或网桥模块,来执行不同的handle_bridge 函数。
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
/* These hooks defined here for ATM */
struct net_bridge;
br_handle_frame_hook 在/ net/core/dev.c 中调用。
如果没有定义了网桥或网桥模块,handle_bridge 是个空函数,不做任何处理。
如果定义了网桥或网桥模块,则定义三个HOOK 函数。
br_fdb_get_hook 函数
br_fdb_put_hook 函数
br_handle_frame_hook 函数
*/
//三个HOOK 函数的原形。

struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
/* * If bridge module is loaded call bridging hook. * returns NULL if packet was consumed. */
struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
struct sk_buff *skb) __read_mostly;
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;
////如果是回环数据或者skb->dev->br_port :接收该数据包的端口是网桥端口组的

一员,如果接收当前数据包的接口不是网桥的某一物理端口,则其值
不做任何修改就返回。
if (skb‐>pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference(skb‐>dev‐>br_port)) == NULL)//如果这个设备的br_port 为

空,说明它不属于网桥
return skb;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
//定义了网桥处理函数这段代码将数据包进行转向,转向的后的处理函数是钩子函数

br_handle_frame_hook
//br_handle_frame_hook 是网桥处理的主函数HOOK。

return br_handle_frame_hook(port, skb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
#endif
*/
网络包在网桥中的处理
br_handle_frame(br_input.c)
/*
* Called via br_handle_frame_hook.
* Return NULL if skb is handled
* note: already called with rcu_read_lock (preempt_disabled)
*/

//p 是接收该数据包的网桥的端口

struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
// //源地址不能使广播地址或者0 地址

if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto drop;
// //见后面分析:更新cam 表

if (unlikely(is_link_local(dest))) {
/* Pause frames shouldn't be passed up by driver anyway */
if (skb->protocol == htons(ETH_P_PAUSE))
goto drop;
/* Process STP BPDU's through normal netif_receive_skb() path */
if (p->br->stp_enabled != BR_NO_STP) {
if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_handle_local_finish))
return NULL;
else
return skb;
}
}
//察看端口所在的网桥的状态

// /*众所周之,网桥之所以是网桥,比HUB 更智能,是因为它有一个MAC-PORT 的

表,
//这样转发数据就不用广播,而查表定端口就可以了.每次收到一个包,网桥都会学习其

来源MAC,
//添加进这个表。Linux 中这个表叫CAM 表(这个名字是其它资料上看的)。

//如果桥的状态是LEARNING 或FORWARDING(学习或转发),

//则学习该包的源地址skb->mac.ethernet->h_source, 将其添加到 CAM 表中*/

switch (p->state) {
case BR_STATE_FORWARDING:
if (br_should_route_hook) {
if (br_should_route_hook(&skb))
return skb;
dest = eth_hdr(skb)->h_dest;
}
/* fall through */
case BR_STATE_LEARNING://学习源MAC 地址

if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
break;
default:
drop:
kfree_skb(skb);
}
return NULL;
}
br_handle_frame_finish(br_input.c)
/* note: already called with rcu_read_lock (preempt_disabled) */
int br_handle_frame_finish(struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
struct net_bridge *br;
struct net_bridge_fdb_entry *dst;
int passedup = 0;
if (!p || p->state == BR_STATE_DISABLED)//当前网卡设备对应的网桥端口为空就

丢弃
goto drop;
/* insert into forwarding database after filtering to avoid spoofing */
// //更新cam 表,防止spoof 数据包

br = p->br;//网卡所在的网桥设备

br_fdb_update(br, p, eth_hdr(skb)->h_source);//更新CAM 表

if (p->state == BR_STATE_LEARNING)
goto drop;
// /** 如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份

//* 送到AF_PACKET 协议处理体(netif_receive_skb)。*/

if (br->dev->flags & IFF_PROMISC) {
struct sk_buff *skb2;
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 != NULL) {
passedup = 1;
br_pass_frame_up(br, skb2);//传递到上层协议,用来对网桥对应的虚拟网

卡进行分析。
}
}
// /** 目的MAC 为广播或多播,则需要向本机的上层协议栈传送这个数据包,这里

//* 有一个标志变量passedup,用于表示是否传送过了,如果已传送过,那就算了*/

if (is_multicast_ether_addr(dest)) {
br->statistics.multicast++;
br_flood_forward(br, skb, !passedup);
if (!passedup)
br_pass_frame_up(br, skb);
goto out;
}
// /*查询CAM 表*/

dst = __br_fdb_get(br, dest);
//用户层常需要用到一个虚拟的地址来管理网桥,如果目的地址非常,且为本地址地址,则

交由上层函数处理*/
if (dst != NULL && dst->is_local) {
if (!passedup)
br_pass_frame_up(br, skb);
else
kfree_skb(skb);
goto out;
}
// /* 转发 */如果找到端口,就直接转发过去。

if (dst != NULL) {
br_forward(dst->dst, skb);
goto out;
}
// /*如果表里边查不到,那么发送到网桥的所有的接口……*/

br_flood_forward(br, skb, 0);
out:
return 0;
drop:
kfree_skb(skb);
goto out;
}
br_forward(转发到对应的接口)(br_forward.c)
/* called with rcu_read_lock */
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
if (should_deliver(to, skb)) {//判断netfilter 规则

__br_forward(to, skb);
return;
}
kfree_skb(skb);
}
__br_forward(br_forward.c)
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
struct net_device *indev;
indev = skb->dev;//来源设备

skb->dev = to->dev;//更改一下传递的设备,目的设备

skb_forward_csum(skb);
//将网络包,从来源设备indev,传递到目的设备skb-dev 中

NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
br_forward_finish);
}
br_forward_finish(br_forward.c)
int br_forward_finish(struct sk_buff *skb)
{
return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
}
br_dev_queue_push_xmit
int br_dev_queue_push_xmit(struct sk_buff *skb)//skb‐dev 表示的是目的设备

{
/* drop mtu oversized packets except gso */如果包的mtu 值大于设备的mtu 值,就抛弃
它。
if (packet_length(skb) > skb‐>dev‐>mtu && !skb_is_gso(skb))
kfree_skb(skb);
else {
/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
if (nf_bridge_maybe_copy_header(skb))
kfree_skb(skb);
else {
skb_push(skb, ETH_HLEN);
dev_queue_xmit(skb);
}
}
return 0;
}
skb_push(src/include/linux/skbuff.h)
/**
* skb_push ‐ add data to the start of a buffer
* @skb: buffer to use
* @len: amount of data to add
*
* This function extends the used data area of the buffer at the buffer
* start. If this would exceed the total buffer headroom the kernel will
* panic. A pointer to the first byte of the extra data is returned.
*/

static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
{
skb‐>data ‐= len;
skb‐>len += len;
if (unlikely(skb‐>data<skb‐>head))
skb_under_panic(skb, len, current_text_addr());
return skb‐>data;
}
dev_queue_xmit(src/net/core/dev.c)
/**
* dev_queue_xmit ‐ transmit a buffer
* @skb: buffer to transmit
*
* Queue a buffer for transmission to a network device. The caller must
* have set the device and priority and built the buffer before calling
* this function. The function can be called from an interrupt.
*
* A negative errno code is returned on a failure. A success does not
* guarantee the frame will be transmitted as it may be dropped due
* to congestion or traffic shaping.
*
* ‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐
* I notice this method can also return errors from the queue disciplines,
* including NET_XMIT_DROP, which is a positive value. So, errors can also
* be positive.
*
* Regardless of the return value, the skb is consumed, so it is currently
* difficult to retry a send to this method. (You can bump the ref count
* before sending to hold a reference for retry if you are careful.)
*
* When calling this method, interrupts MUST be enabled. This is because
* the BH enable code must have IRQs enabled so that it will not deadlock.
* ‐‐BLG
*/

int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb‐>dev;//包要传送的目的设备

struct Qdisc *q;
int rc = ‐ENOMEM;
/* GSO will handle the following emulations directly. */
if (netif_needs_gso(dev, skb))
goto gso;
if (skb_shinfo(skb)>frag_list &&
!(dev‐>features & NETIF_F_FRAGLIST) &&
__skb_linearize(skb))
goto out_kfree_skb;
/* Fragmented skb is linearized if device does not support SG,
* or if at least one of fragments is in highmem and device
* does not support DMA from it.
*/

if (skb_shinfo(skb)>nr_frags &&
(!(dev‐>features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
__skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/

if (skb‐>ip_summed == CHECKSUM_PARTIAL) {
skb_set_transport_header(skb, skb‐>csum_start ‐
skb_headroom(skb));
if (!(dev‐>features & NETIF_F_GEN_CSUM) &&
(!(dev‐>features & NETIF_F_IP_CSUM) ||
skb‐>protocol != htons(ETH_P_IP)))
if (skb_checksum_help(skb))
goto out_kfree_skb;
}
gso:
spin_lock_prefetch(&dev‐>queue_lock);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/

rcu_read_lock_bh();
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
* rcu structure ‐ it may be accessed without acquiring
* a lock (but the structure may be stale.) The freeing of the
* qdisc will be deferred until it's known that there are no
* more references to it.
*
* If the qdisc has an enqueue function, we still need to
* hold the queue_lock before calling it, since queue_lock
* also serializes access to the device queue.
*/

q = rcu_dereference(dev‐>qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb‐>tc_verd = SET_TC_AT(skb‐>tc_verd,AT_EGRESS);
#endif
if (q‐>enqueue) {
/* Grab device queue */
spin_lock(&dev‐>queue_lock);
q = dev‐>qdisc;
if (q‐>enqueue) {
rc = q‐>enqueue(skb, q);
qdisc_run(dev);
spin_unlock(&dev‐>queue_lock);
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}
spin_unlock(&dev‐>queue_lock);
}
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
Really, it is unlikely that netif_tx_lock protection is necessary
here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
Check this and shot the lock. It is not prone from deadlocks.
Either shot noqueue qdisc, it is even simpler 8)
*/

if (dev‐>flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
if (dev‐>xmit_lock_owner != cpu) {
HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
rc = 0;
if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
goto out;
}
}
HARD_TX_UNLOCK(dev);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev‐>name);
} else {
/* Recursion is detected! It is possible,
* unfortunately */

if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device "
"%s, fix it urgently!\n", dev‐>name);
}
}
rc = ‐ENETDOWN;
rcu_read_unlock_bh();
out_kfree_skb:
kfree_skb(skb);
return rc;
out:
rcu_read_unlock_bh();
return rc;
}
dev_hard_start_xmit(设备的发送函数)
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
if (likely(!skb‐>next)) {
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
if (netif_needs_gso(dev, skb)) {
if (unlikely(dev_gso_segment(skb)))
goto out_kfree_skb;
if (skb‐>next)
goto gso;
}
return dev‐>hard_start_xmit(skb, dev);
}
gso:
do {
struct sk_buff *nskb = skb‐>next;
int rc;
skb‐>next = nskb‐>next;
nskb‐>next = NULL;
rc = dev‐>hard_start_xmit(nskb, dev);
if (unlikely(rc)) {
nskb‐>next = skb‐>next;
skb‐>next = nskb;
return rc;
}
if (unlikely(netif_queue_stopped(dev) && skb‐>next))
return NETDEV_TX_BUSY;
} while (skb‐>next);
skb‐>destructor = DEV_GSO_CB(skb)>destructor;
out_kfree_skb:
kfree_skb(skb);
return 0;
}
br_flood_forward(转发到所有其他的接口)(br_forward.c)
/* called under bridge lock */
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
{
br_flood(br, skb, clone, __br_forward);
}
__br_forward(br_forward.c)(见前面的源码)
br_flood(br_forward.c)
/* called under bridge lock */
static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
void (*__packet_hook)(const struct net_bridge_port *p,
struct sk_buff *skb))
{
struct net_bridge_port *p;
struct net_bridge_port *prev;
if (clone) {
struct sk_buff *skb2;
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
br->statistics.tx_dropped++;
return;
}
skb = skb2;
}
prev = NULL;
list_for_each_entry_rcu(p, &br->port_list, list) {//对于网桥链表中的每一个端口(网

卡设备)
if (should_deliver(p, skb)) {//应当接着传递,根据netfilter 规则

if (prev != NULL) {
struct sk_buff *skb2;
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
br->statistics.tx_dropped++;
kfree_skb(skb);
return;
}
__packet_hook(prev, skb2);//将克隆来的网络包传递到设备prev 中。

}
prev = p;//依次传递

}
}
if (prev != NULL) {
__packet_hook(prev, skb);
return;
}
kfree_skb(skb);
}


阅读(1556) | 评论(0) | 转发(0) |
0

上一篇:Linux虚拟网卡TUN/TAP

下一篇:net_rx_action详解

给主人留下些什么吧!~~