Bridge转发逻辑
——lvyilong316
本文主要介绍linux bridge的转发流程,以及bridge设计的几个hook点。首先看一张完整的转发图。
下面逐一看每个函数的分析
l netif_receive_skb:网卡接收函数
/net/core/dev.c
-
int netif_receive_skb(struct sk_buff *skb)
-
{
-
//…
-
skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-
//…
-
}
l handle_bridge:网桥处理函数
/net/core/dev.c
-
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-
struct packet_type **pt_prev, int *ret,
-
struct net_device *orig_dev)
-
{
-
struct net_bridge_port *port;
-
if (skb->pkt_type == PACKET_LOOPBACK ||
-
(port = rcu_dereference(skb->dev->br_port)) == NULL)// A注意这个判断
-
return skb;
-
-
if (*pt_prev) { //一般来说pt_prev为NULL
-
*ret = deliver_skb(skb, *pt_prev, orig_dev);
-
*pt_prev = NULL;
-
}
-
//调用bridge挂载函数,改函数在bridge模块装载时初始化
-
return br_handle_frame_hook(port, skb);
-
}
-
br_handle_frame_hook函数在bridge模块装载时初始化
-
/* net/bridge/br.c */
-
static int __init br_init(void)
-
{
-
//......
-
br_handle_frame_hook = br_handle_frame;
-
//......
-
}
l br_handle_frame
/net/bridge/br_input.c
-
struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
-
{
-
const unsigned char *dest = eth_hdr(skb)->h_dest;
-
int (*rhook)(struct sk_buff *skb);
-
//…
-
if (unlikely(is_link_local(dest))) {//如果是本地多播地址(形如:01:80:c2:00:00:0X)
-
//…
-
// 自身包进入PF_BEIDGE的INPUT点, 一般处理的包数不多
-
if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-
NULL, br_handle_local_finish))
-
return NULL; /* frame consumed by filter */
-
else
-
return skb; /* continue processing */
-
}
-
//进入转发逻辑
-
forward:
-
switch (p->state) {
-
case BR_STATE_FORWARDING:
-
rhook = rcu_dereference(br_should_route_hook);
-
if (rhook != NULL) {
-
if (rhook(skb))
-
return skb;
-
dest = eth_hdr(skb)->h_dest;
-
}
-
/* fall through */
-
case BR_STATE_LEARNING://如果数据包的目的mac为bridge的mac
-
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
-
skb->pkt_type = PACKET_HOST;
-
//bridge的PRE_ROUTING
-
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
-
br_handle_frame_finish);
-
break;
-
default:
-
drop:
-
kfree_skb(skb);
-
}
-
return NULL;
-
}
l br_handle_frame_finish:这个函数完成更新mac表、查找mac表确定出口dev。
/net/bridge/br_input.c
-
int br_handle_frame_finish(struct sk_buff *skb)
-
{
-
const unsigned char *dest = eth_hdr(skb)->h_dest;
-
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
-
struct net_bridge *br;
-
struct net_bridge_fdb_entry *dst;
-
struct sk_buff *skb2;
-
/* insert into forwarding database after filtering to avoid spoofing */
-
br = p->br;
-
br_fdb_update(br, p, eth_hdr(skb)->h_source);
-
/* The packet skb2 goes to the local host (NULL to skip). */
-
skb2 = NULL;
-
if (br->dev->flags & IFF_PROMISC) //如果网桥设备被设置为混杂模式
-
skb2 = skb;
-
dst = NULL;
-
if (is_multicast_ether_addr(dest)) {//如果是多播(首字节为x01)
-
br->dev->stats.multicast++;
-
skb2 = skb;
-
} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //如果目的mac为本机mac
-
skb2 = skb;
-
/* Do not forward the packet since it's local. */
-
skb = NULL; //skb2为要发往本机上层协议栈的,skb为要转发的
-
}
-
-
if (skb2 == skb)
-
skb2 = skb_clone(skb, GFP_ATOMIC);
-
if (skb2) //发往本机上层协议栈
-
br_pass_frame_up(br, skb2);
-
if (skb) { //转发
-
if (dst)
-
br_forward(dst->dst, skb);
-
else
-
br_flood_forward(br, skb);
-
}
-
out:
-
return 0;
-
drop:
-
kfree_skb(skb);
-
goto out;
-
}
l br_pass_frame_up发往本地
// net/bridge/br_input.c
-
static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
-
{
-
struct net_device *indev, *brdev = br->dev;
-
brdev->stats.rx_packets++;
-
brdev->stats.rx_bytes += skb->len;
-
indev = skb->dev;
-
skb->dev = brdev;
-
//bridge的LOCAL_IN
-
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
-
netif_receive_skb);
-
}
这段代码非常简单,对net_bridge的数据统计进行更新以后,再更新skb->dev,最后通过NF_HOOK在NF_BR_LOCAL_IN挂接点上调用回了netif_receive_skb。
前面已经提到,在netif_receive_skb函数中,调用了handle_bridge函数,并且触发了网桥的处理流程,现在发往网桥虚拟设备的数据包又回到了netif_receive_skb,那么网桥的处理过程会不会又被调用到呢?
在 linux/net/bridge/br_if.c里面可以看到br_add_if函数,实际上的操作是将某一网口(dev)加入网桥组,这个函数调用了new_nbp(br, dev); 用以填充net_bridge以及dev结构的重要成员,里面将dev->br_port(这里dev是加入bridge的dev而不是bridge自身对应的dev)设定为一个新建的net_bridge_port结构。而上面的br_pass_frame_up函数将skb->dev赋成了br->dev,实际上skb->dev变成了网桥建立的虚拟设备(bridge自身对应的dev),这个设备是网桥本身而不是桥组的某一端口,系统没有为其调用br_add_if,所以这个net_device结构的br_port指针没有进行赋值。
在handle_bridge中有这样的检查
if (skb->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference(skb->dev->br_port)) == NULL)
return skb;
经过br_pass_frame_up 函数后,skb->dev->br_port为空,所以将直接返回skb而不进行网桥处理。
另外,我们看到,系统在NF_BR_LOCAL_IN挂接点上调用了netif_receive_skb,但是net_if_receive_skb还会调用ip_rcv函数,所以数据包在NF_IP_LOCAL_IN还可以被捕获到。
l br_forwar转发
// net/bridge/br_forward.c
-
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
-
{
-
//接口检查,确认端口处于BR_STATE_FORWARDING状态,网桥允许转发,并且转发的出口和入口的dev不相等
-
if (should_deliver(to, skb)) {
-
__br_forward(to, skb);
-
return;
-
}
-
kfree_skb(skb);
-
}
l __br_forward
// net/bridge/br_forward.c
-
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
-
{
-
struct net_device *indev;
-
indev = skb->dev;
-
skb->dev = to->dev; //修改skb->dev为目的出口对应的dev
-
skb_forward_csum(skb); //计算校验和
-
//bridge的FORWARD
-
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
-
br_forward_finish);
-
}
l br_forward_finish
-
int br_forward_finish(struct sk_buff *skb)
-
{ //bridge的POST_ROUTING
-
return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
-
br_dev_queue_push_xmit);
-
}
l br_dev_queue_push_xmit
// net/bridge/br_forward.c
-
int br_dev_queue_push_xmit(struct sk_buff *skb)
-
{
-
/* drop mtu oversized packets except gso */
-
if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
-
kfree_skb(skb);
-
else {
-
/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
-
if (nf_bridge_maybe_copy_header(skb))
-
kfree_skb(skb);
-
else {
-
skb_push(skb, ETH_HLEN);
-
dev_queue_xmit(skb);
-
}
-
}
-
return 0;
-
}
在dev_queue_xmit()会判断skb中的dev字段,根据这个字段指示的设备调用该设备的发送函数hard_start_xmit来对skb进行转发。其实到这里bridge的转发逻辑基本就完成了,但是如果目的dev依然是bridge呢,那就调用bridge的hard_start_xmit,而bridge的hard_start_xmit 在bridge初始化中由br_dev_setup设置。
/* net/bridge/br_device.c */
-
void br_dev_setup(struct net_device *dev)
-
{
-
//......
-
dev->hard_start_xmit = br_dev_xmit;
-
//......
-
}
l br_dev_xmit
/* net/bridge/br_device.c */
/* net device transmit always called with no BH (preempt_disabled) */
-
int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
-
{
-
struct net_bridge *br = netdev_priv(dev);
-
const unsigned char *dest = skb->data;
-
struct net_bridge_fdb_entry *dst;
-
br->statistics.tx_packets++;
-
br->statistics.tx_bytes += skb->len;
-
skb->mac.raw = skb->data;
-
skb_pull(skb, ETH_HLEN);
-
if (dest[0] & 1)
-
// 多播发送
-
br_flood_deliver(br, skb, 0);
-
else if ((dst = __br_fdb_get(br, dest)) != NULL) //查转发表
-
// 单播发送
-
br_deliver(dst->dst, skb);
-
else
-
// 广播发送
-
br_flood_deliver(br, skb, 0);
-
// 这些发送函数最终都会调用__br_deliver()函数
-
return 0;
-
}
l __br_deliver
/* net/bridge/br_forward.c */
-
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
-
{
-
skb->dev = to->dev; //设置为出口dev
-
// 此处是PF_BRIDGE的OUTPUT点
-
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
-
br_forward_finish);
-
}
注意这里调用完成后又要调用 br_forward_finish,但这不是循环,因为__br_delive中skb->dev已经改变,下一轮调用的hard_start_xmit 也会不同。最后再看一遍全局的转发图,应该就比较清晰了。
阅读(5340) | 评论(0) | 转发(0) |