分类:
2012-07-04 21:44:53
原文地址:linux协议栈之网桥实现之二 作者:danforn
前面已经分析了,将接口添进网桥时,用户空间调用ioctl(br_socket_fd, SIOCBRADDIF, &ifr)
注意到在void br_dev_setup(struct net_device *dev)中已经对dev->do_ioctl进行了赋值,即:
dev->do_ioctl = br_dev_ioctl
进行ioctl进行访问的时候,进入到br_dev_ioctl: (net/brige/br_ioctl.c)
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch(cmd) {
case SIOCDEVPRIVATE:
return old_dev_ioctl(dev, rq, cmd);
//添加一个接口
case SIOCBRADDIF:
//删除一个接口
case SIOCBRDELIF:
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
}
pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
return -EOPNOTSUPP;
}
我们在用户空间使用的标志是SIOCBRADDIF。所以流程进入add_del_if()
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net_device *dev;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
dev = dev_get_by_index(ifindex);
if (dev == NULL)
return -EINVAL;
if (isadd)
ret = br_add_if(br, dev);
else
ret = br_del_if(br, dev);
dev_put(dev);
return ret;
}
因为cmd == SIOCBRADDIF为真,所以调用br_add_if():
int br_add_if(struct net_bridge *br, struct net_device *dev) (net/brige/br_if.c))
{
struct net_bridge_port *p;
int err = 0;
//回环。或者非以及网接口
if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER)
return -EINVAL;
//构造数据包函数为网桥类型
if (dev->hard_start_xmit == br_dev_xmit)
return -ELOOP;
//此接口已经存在于网桥
if (dev->br_port != NULL)
return -EBUSY;
//为dev 创建网桥接口.dev->br_port。指向所属网桥端口
//dev->br_port->br:指向它所属的网桥
//为该接口创建net_bridge_port
if (IS_ERR(p = new_nbp(br, dev, br_initial_port_cost(dev))))
return PTR_ERR(p);
//更新port->MAC对应表
if ((err = br_fdb_insert(br, p, dev->dev_addr, 1)))
destroy_nbp(p);
else if ((err = br_sysfs_addif(p)))
del_nbp(p);
else {
//设置接口为混杂模式
dev_set_promiscuity(dev, 1);
//将p->list更新至br->port_list中
list_add_rcu(&p->list, &br->port_list);
spin_lock_bh(&br->lock);
br_stp_recalculate_bridge_id(br);
if ((br->dev->flags & IFF_UP)
&& (dev->flags & IFF_UP) && netif_carrier_ok(dev))
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
dev_set_mtu(br->dev, br_min_mtu(br));
}
return err;
}
为接口创建net_bridge_port的函数为new_nbp。这个函数比较简单:
static struct net_bridge_port *new_nbp(struct net_bridge *br,
struct net_device *dev,
unsigned long cost)
{
int index;
struct net_bridge_port *p;
index = find_portno(br);
if (index < 0)
return ERR_PTR(index);
p = kmalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return ERR_PTR(-ENOMEM);
memset(p, 0, sizeof(*p));
p->br = br;
dev_hold(dev);
p->dev = dev;
p->path_cost = cost;
p->priority = 0x8000 >> BR_PORT_BITS;
dev->br_port = p;
p->port_no = index;
br_init_port(p);
p->state = BR_STATE_DISABLED;
kobject_init(&p->kobj);
return p;
}
之后,把要加入的接口对应的mac与接口作为本机静态项加入到prot—mac对应表。这是在br_fdb_insert()中实现的
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, int is_local)
{
int ret;
spin_lock_bh(&br->hash_lock);
ret = fdb_insert(br, source, addr, is_local);
spin_unlock_bh(&br->hash_lock);
return ret;
}
操作存在异步性,在插入之前加锁。具体的插入在fdb_insert中实现
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, int is_local)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
int hash = br_mac_hash(addr);
//判断是否为有效的mac 地址
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
hlist_for_each_entry(fdb, h, &br->hash[hash], hlist) {
//如果表中已经包含了此项
if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) {
//如果为本机MAC
/* attempt to update an entry for a local interface */
if (fdb->is_local) {
/* it is okay to have multiple ports with same
* address, just don't allow to be spoofed.
*/
if (is_local)
return 0;
if (net_ratelimit())
printk(KERN_WARNING "%s: received packet with "
" own address as source address\n",
source->dev->name);
return -EEXIST;
}
//如果添加的是本机IP
if (is_local) {
printk(KERN_WARNING "%s adding interface with same address "
"as a received packet\n",
source->dev->name);
goto update;
}
//如果添加的是静态MAC
//则不更新相关的信息
if (fdb->is_static)
return 0;
/* move to end of age list */
list_del(&fdb->u.age_list);
goto update;
}
}
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (!fdb)
return ENOMEM;
memcpy(fdb->addr.addr, addr, ETH_ALEN);
atomic_set(&fdb->use_count, 1);
hlist_add_head_rcu(&fdb->hlist, &br->hash[hash]);
if (!timer_pending(&br->gc_timer)) {
br->gc_timer.expires = jiffies + hold_time(br);
add_timer(&br->gc_timer);
}
update:
fdb->dst = source;
fdb->is_local = is_local;
fdb->is_static = is_local;
fdb->ageing_timer = jiffies;
if (!is_local)
list_add_tail(&fdb->u.age_list, &br->age_list);
return 0;
}
此函数先判断要插入项是否存在,若是已存在,且不为静态项,具更新对应项。若不存在该项,则分配一个net_bridge_fdb_entry,插入到CAM表
先来分析一下net_bridge_port的结构:
struct net_bridge_port
{
//当前端口所在的briage
struct net_bridge *br;
//此端口对应的物理端口
struct net_device *dev;
//同一桥内的端口链表?
struct list_head list;
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
};
对应的net_bridge_fdb_entry结构:
//CAM表中对应的数据结构
struct net_bridge_fdb_entry
{
//用于CAM表连接的链表指针
struct hlist_node hlist;
//此项对应的物理出口
struct net_bridge_port *dst;
union {
struct list_head age_list;
struct rcu_head rcu;
} u;
//此项的当前的引用计数
atomic_t use_count;
//超时时间
unsigned long ageing_timer;
//MAC地址
mac_addr addr;
//是否为主机地址
unsigned char is_local;
//是否为静态地址
unsigned char is_static;
};
struct net_bridge_port
{
//当前端口所在的briage
struct net_bridge *br;
//此端口对应的物理端口
struct net_device *dev;
//同一桥内的端口链表?
struct list_head list;
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
};
这样,就往桥中添加了一个接口,从上图中可以反应出接口与桥之间的关系。我们可以用brctl show指令看到当前所有的桥,以及桥里相应的接口。用ifconfig br0可以看当前桥的状态,如果细心一点可以看到,bro已经有了对应的MAC。这是怎么来的呢?
桥MAC地址的更新:
注意到在br_add_if中调用了函数br_stp_recalculate_bridge_id()
在上面的代码分析中,为了简化分析,把stp的相关流程忽略掉了,现在我们看下这个函数做了些什么
/* called under bridge lock */
void br_stp_recalculate_bridge_id(struct net_bridge *br)
{
const unsigned char *addr = br_mac_zero;
struct net_bridge_port *p;
//遍历桥中所有的端口
list_for_each_entry(p, &br->port_list, list) {
//取所有接口中MAC的最少值
if (addr == br_mac_zero ||
memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
addr = p->dev->dev_addr;
}
//如果不与现在桥的MAC相同
if (memcmp(br->bridge_id.addr, addr, ETH_ALEN))
br_stp_change_bridge_id(br, addr);
}
这个函数比较简单,它就是遍历桥对应的所有接口,然后取最小的MAC。然后判断最小MAC跟现在的MAC是否相同
继续跟踪br_stp_change_bridge_id
static void br_stp_change_bridge_id(struct net_bridge *br,
const unsigned char *addr)
{
unsigned char oldaddr[6];
struct net_bridge_port *p;
int wasroot;
wasroot = br_is_root_bridge(br);
memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
memcpy(br->bridge_id.addr, addr, ETH_ALEN);
//注意到这里,呵呵,桥的MAC更新了
memcpy(br->dev->dev_addr, addr, ETH_ALEN);
list_for_each_entry(p, &br->port_list, list) {
if (!memcmp(p->designated_bridge.addr, oldaddr, ETH_ALEN))
memcpy(p->designated_bridge.addr, addr, ETH_ALEN);
if (!memcmp(p->designated_root.addr, oldaddr, ETH_ALEN))
memcpy(p->designated_root.addr, addr, ETH_ALEN);
}
br_configuration_update(br);
br_port_state_selection(br);
if (br_is_root_bridge(br) && !wasroot)
br_become_root_bridge(br);
}
看到上面的注释了吧,桥的MAC就是在这里得到更新的,所以,桥的MAC地址取得是所有接口中的最小值
网桥对接收数据的处理:
回到本章的开始的handle_bridge函数,会调用br_handle_frame_hook进行接收数据的处理
在网桥的初始化代码中,把br_handle_frame_hook赋值为了br_handle_frame
没错,这就是网桥的处理函数。跟进个函数
nt br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
//目的mac地址
const unsigned char *dest = eth_hdr(skb)->h_dest;
//端口禁用
if (p->state == BR_STATE_DISABLED)
goto err;
//源mac 为多播或者广播,丢弃
//FF.XX.XX.XX.XX.XX形式
if (eth_hdr(skb)->h_source[0] & 1)
goto err;
//如果状态为学习或者转发,则学习源mac 更新CAM 表
if (p->state == BR_STATE_LEARNING ||
p->state == BR_STATE_FORWARDING)
// br_fdb_insert函数我们在前面已经分析过了
br_fdb_insert(p->br, p, eth_hdr(skb)->h_source, 0);
//stp 的处理,stp-enabled 是否启用stp 协议
//bridge_ula stp使用的多播mac地址
if (p->br->stp_enabled &&
!memcmp(dest, bridge_ula, 5) &&
!(dest[5] & 0xF0)) {
if (!dest[5]) {
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_stp_handle_bpdu);
return 1;
}
}
else if (p->state == BR_STATE_FORWARDING) {
//在初始化中,并末对br_should_route_hook进行赋值
//所以br_should_route_hook为假
if (br_should_route_hook) {
if (br_should_route_hook(pskb))
return 0;
skb = *pskb;
dest = eth_hdr(skb)->h_dest;
}
//目的地址与桥地址相同。则传与上层处理
//置skb->pkt_type = PACKET_HOST
if (!memcmp(p->br->dev->dev_addr, dest, ETH_ALEN))
skb->pkt_type = PACKET_HOST;
//网桥在NF_BR_PRE_ROUTING点上的netfiter处理
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
return 1;
}
err:
kfree_skb(skb);
return 1;
}
在这个函数里,进行相关的入口判断之后,会把当前数据包的源MAC与接口对应更新到CAM表中,更新函数br_fdb_insert()在前面已经分析过了,不太明白的可以倒过去看下,不过注意了,这是不是做为静态项插入的。
接着判断包是不是传给本机的,如果是,则置包的pkt_type为PACKET_HOST
关于NF_HOOK()宏,我们在以后的netfiter中有专题分析。这是我们只要知道,正常的数据包会流进br_handle_frame_finish()进行处理
/* note: already called with rcu_read_lock (preempt_disabled) */
int br_handle_frame_finish(struct sk_buff *skb)
{
//取得目的MAC地址
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = skb->dev->br_port;
struct net_bridge *br = p->br;
struct net_bridge_fdb_entry *dst;
int passedup = 0;
//混杂模式
/*如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份
送到AF_PACKET协议处理体(网络软中断函数net_rx_action中ptype_all链的处理)。*/
if (br->dev->flags & IFF_PROMISC) {
struct sk_buff *skb2;
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 != NULL) {
passedup = 1;
br_pass_frame_up(br, skb2);
}
}
//目的mac 为多播或者广播,则需要传至上层进行处理
//passedup为传送标志,为1 时表示已经上传过了
if (dest[0] & 1) {
br_flood_forward(br, skb, !passedup);
if (!passedup)
br_pass_frame_up(br, skb);
goto out;
}
//查询CAM 表
dst = __br_fdb_get(br, dest);
//到本机的? 传至上层协议处理
if (dst != NULL && dst->is_local) {
if (!passedup)
br_pass_frame_up(br, skb);
else
kfree_skb(skb);
goto out;
}
//不是本机的数据,则转发
if (dst != NULL) {
br_forward(dst->dst, skb);
goto out;
}
//如果查询不到,在其它端口上都发送此包
br_flood_forward(br, skb, 0);
out:
return 0;
}
在这里函数里,通过查找CAM表,取得发送端口,如果当前CAM表里没有到目的MAC的端口,则在其它端口上都发送此数据包。
在这个函数里,我们看到,查询CAM表的函数为:__br_fdb_get()
接着分析一下此函数
struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
const unsigned char *addr)
{
struct hlist_node *h;
struct net_bridge_fdb_entry *fdb;
//遍历对应MAC哈希项中的fdb
hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) {
if (!memcmp(fdb->addr.addr, addr, ETH_ALEN)) {
if (unlikely(has_expired(br, fdb)))
break;
return fdb;
}
}
return NULL;
}
这个函数非常容易,首先取得目的MAC对应的哈希项。然后再遍历里面的数据,查看是否含有目的地址的项。如果是送给本机的数据包,则传至上层协议,如不是,则需要转发。关于上层怎么处理,以及如何转发。