关于桥的知识,网上有很多文章,网桥从开始的硬件设备,到现在linux内核的软实现,包括后来的vlan,switch芯片.这里也是参考了《深入理解linux网络内幕》中桥的部分.
参考内核2.6.32.61 kernel/net/bridge/*
这里先普及下知识:
HUB:整个HUB就是一个冲突域,采用CSMA/CD机制检测和侦听,从一个端口进来的数据包不经分析就会被转发到其它所有端口发送出去,连在此HUB上的设备共享带宽,利用率低,效率低,有距离限制,任意一个时刻只能有2台计算机之间可以通信。
网桥:建立桥接表(MAC),根据MAC表来决定向哪个端口进行数据转发,每个端口为一个冲突域,每台设备将享用一个端口的带宽。
交换机:网桥和交换机都是一个广播域,每个端口都是一个冲突域,并形成MAC表来指导帧转发,不同之处:交换机端口数量多,可以划分VLAN来将整个广播域分割为多个广播域。
其实最新接触桥的是,在测试网络桥接性能的时候,然后在代码dev.c中:
-
#ifdef CONFIG_NET_CLS_ACT
-
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
ncls:
-
#endif
-
-
skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-
if (!skb)
-
goto out;
-
-
type = skb->protocol;
-
list_for_each_entry_rcu(ptype,
-
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-
if (ptype->type == type &&
-
(ptype->dev == null_or_orig || ptype->dev == skb->dev ||
-
ptype->dev == orig_dev)) {
-
if (pt_prev)
-
ret = deliver_skb(skb, pt_prev, orig_dev);
-
pt_prev = ptype;
-
}
-
}
这里截取了netif_recevice_skb函数部分代码. 桥的处理函数为
handle_bridge,当然更新的内核这里调用的接口有些变化(比如3.1 )
-
rx_handler = rcu_dereference(skb->dev->rx_handler);
我们来看看handle_bridge
-
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
-
/* This hook is defined here for ATM LANE */
-
int (*br_fdb_test_addr_hook)(struct net_device *dev,
-
unsigned char *addr) __read_mostly;
-
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
-
#endif
-
-
/*
-
* If bridge module is loaded call bridging hook.
-
* returns NULL if packet was consumed.
-
*/
-
struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-
struct sk_buff *skb) __read_mostly;
-
EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-
static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-
struct packet_type **pt_prev, int *ret,
-
struct net_device *orig_dev)
-
{
-
struct net_bridge_port *port;
-
-
if (skb->pkt_type == PACKET_LOOPBACK ||
-
(port = rcu_dereference(skb->dev->br_port)) == NULL)
-
return skb;
-
-
if (*pt_prev) {
-
*ret = deliver_skb(skb, *pt_prev, orig_dev);
-
*pt_prev = NULL;
-
}
-
-
return br_handle_frame_hook(port, skb);
-
}
-
#else
-
#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
-
#endif
这里
CONFIG_BRIDGE CONFIG_BRIDGE_MODULE必须定义,否则桥处理函数为空.
skb->pkt_type这个是在驱动层来判断的,一般由eth.c中eth_type_trans来初始化. 这里如果是回环设备或者不是桥端口则返回。
-
/**
-
* eth_type_trans - determine the packet's protocol ID.
-
* @skb: received socket data
-
* @dev: receiving network device
-
*
-
* The rule here is that we
-
* assume 802.3 if the type field is short enough to be a length.
-
* This is normal practice and works for any 'now in use' protocol.
-
*/
-
__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
-
{
-
struct ethhdr *eth;
-
unsigned char *rawp;
-
-
skb->dev = dev;
-
skb_reset_mac_header(skb);
-
skb_pull(skb, ETH_HLEN);
-
eth = eth_hdr(skb);
-
-
if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
-
if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast))
-
skb->pkt_type = PACKET_BROADCAST;
-
else
-
skb->pkt_type = PACKET_MULTICAST;
-
}
-
-
/*
-
* This ALLMULTI check should be redundant by 1.4
-
* so don't forget to remove it.
-
*
-
* Seems, you forgot to remove it. All silly devices
-
* seems to set IFF_PROMISC.
-
*/
-
-
else if (1 /*dev->flags&IFF_PROMISC */ ) {
-
if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr)))
-
skb->pkt_type = PACKET_OTHERHOST;
-
}
-
-
/*
-
* Some variants of DSA tagging don't have an ethertype field
-
* at all, so we check here whether one of those tagging
-
* variants has been configured on the receiving interface,
-
* and if so, set skb->protocol without looking at the packet.
-
*/
-
if (netdev_uses_dsa_tags(dev))
-
return htons(ETH_P_DSA);
-
if (netdev_uses_trailer_tags(dev))
-
return htons(ETH_P_TRAILER);
-
-
if (ntohs(eth->h_proto) >= 1536)
-
return eth->h_proto;
-
-
rawp = skb->data;
-
-
/*
-
* This is a magic hack to spot IPX packets. Older Novell breaks
-
* the protocol design and runs IPX over 802.3 without an 802.2 LLC
-
* layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-
* won't work for fault tolerant netware but does for the rest.
-
*/
-
if (*(unsigned short *)rawp == 0xFFFF)
-
return htons(ETH_P_802_3);
-
-
/*
-
* Real 802.2 LLC
-
*/
-
return htons(ETH_P_802_2);
-
}
而pt_prev默认为空,它是嗅探器的处理.前面我们讲过.最近就是调用br_handle_frame_hook它了。
在br.c中 br_init函数中对它进行了初始化
-
br_handle_frame_hook = br_handle_frame;
-
/*
-
* Called via br_handle_frame_hook.
-
* Return NULL if skb is handled
-
* note: already called with rcu_read_lock (preempt_disabled)
-
*/
-
struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
-
{
-
const unsigned char *dest = eth_hdr(skb)->h_dest;
-
int (*rhook)(struct sk_buff *skb);
-
-
if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) //检查源地址的合法性,非0、非全F,非多播地址. 这个函数可以展开.
-
goto drop;
-
-
skb = skb_share_check(skb, GFP_ATOMIC);
-
if (!skb)
-
return NULL;
-
-
if (unlikely(is_link_local(dest))) { // 判断目的地址是不是多播地址01:80:c2:00:00:0X,如果是进入处理 ;01:80:c2:00:00:00,由802.1D stp所使用.
-
/* Pause frames shouldn't be passed up by driver anyway */
-
if (skb->protocol == htons(ETH_P_PAUSE))
-
goto drop;
-
-
/* If STP is turned off, then forward */
-
if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) //桥默认初始化的stp默认没有开启.,并且是stp地址. 那么直接转发.
-
goto forward;
-
-
if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, // 这里不关注netfilter(ebtables),br_handle_local_finish调用br_fdb_update更新报文的源mac与端口的关系(fdb),也是mac-port对应模式的更新. 它固定返回0值,所以除非有钩子过滤使返回值为真,返回null,否则返回skb.
-
NULL, br_handle_local_finish))
-
return NULL; /* frame consumed by filter */
-
else
-
return skb; /* continue processing */
-
}
-
-
forward:
-
switch (p->state) { // 根据桥端口状态不同处理.
-
case BR_STATE_FORWARDING:
-
rhook = rcu_dereference(br_should_route_hook); // 这里关于br_should_route_hook是ebtables的东西。在ebtables_broute.c中初始化的.当然需要开启它.转发后,返回skb.
-
-
static int __init ebtable_broute_init(void)
-
{
-
int ret;
-
-
ret = register_pernet_subsys(&broute_net_ops);
-
if (ret < 0)
-
return ret;
-
/* see br_input.c */
-
rcu_assign_pointer(br_should_route_hook, ebt_broute);
-
return 0;
-
}
-
if (rhook != NULL) {
-
if (rhook(skb))
-
return skb;
-
dest = eth_hdr(skb)->h_dest;
-
}
-
/* fall through */
-
case BR_STATE_LEARNING: //主要关注learning状态的处理.
-
if (!compare_ether_addr(p->br->dev->dev_addr, dest)) //判断目的mac是否发往本地的.如果是则设置pkt_type为PACKET_HOST
-
skb->pkt_type = PACKET_HOST;
-
-
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
-
br_handle_frame_finish); //它需要单独分析.
-
break;
-
default:
-
drop:
-
kfree_skb(skb);
-
}
-
return NULL;
-
}
这里在br_input.c定义. 我们首先看参数p,它来自handle_bridge 中 port 而port来自 port = rcu_dereference(skb->dev->br_port; 而这里的操作涉及网桥的添加,以及添加接口.
上面代码进行了简单的注释和分析.最后看br_handle_frame_finish
-
/* note: already called with rcu_read_lock (preempt_disabled) */
-
int br_handle_frame_finish(struct sk_buff *skb)
-
{
-
const unsigned char *dest = eth_hdr(skb)->h_dest;
-
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
-
struct net_bridge *br;
-
struct net_bridge_fdb_entry *dst;
-
struct sk_buff *skb2;
-
-
if (!p || p->state == BR_STATE_DISABLED)
-
goto drop;
-
-
/* insert into forwarding database after filtering to avoid spoofing */
-
br = p->br;
-
br_fdb_update(br, p, eth_hdr(skb)->h_source); // 更新fdb
-
-
if (p->state == BR_STATE_LEARNING)
-
goto drop;
-
-
/* The packet skb2 goes to the local host (NULL to skip). */
-
skb2 = NULL;
-
-
if (br->dev->flags & IFF_PROMISC)
-
skb2 = skb;
-
-
dst = NULL;
-
-
if (is_multicast_ether_addr(dest)) { //多播
-
br->dev->stats.multicast++;
-
skb2 = skb;
-
} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { //本地
-
skb2 = skb;
-
/* Do not forward the packet since it's local. */
-
skb = NULL;
-
}
-
-
if (skb2 == skb)
-
skb2 = skb_clone(skb, GFP_ATOMIC);
-
-
if (skb2)
-
br_pass_frame_up(br, skb2); // 发往本地的处理,把设备接口赋值为桥接口,调用netif_recevice_skb继续处理.
-
-
if (skb) {
-
if (dst)
-
br_forward(dst->dst, skb); // 查询fdb找到mac-port信息,转发,通过br_forward_finish调用br_dev_queue_push_xmit发送出去.最后调用了dev_queue_xmit.
-
else
-
br_flood_forward(br, skb); //没有查询到,广播,即循环桥每个端口,br_flood,用br_forward发送出去.
-
}
-
-
out:
-
return 0;
-
drop:
-
kfree_skb(skb);
-
goto out;
-
}
上面讲了这么多,只是针对接收到的数据帧的桥处理。这里我们没有发现stp的处理。现在内核貌似已经把它剥离到其他模块了.
阅读(989) | 评论(0) | 转发(0) |