上一节只是说了关于网桥的接收处理,这里分析下网桥模块的初始化工作. 对于桥的用户空间配置工具一般来说有brctl和ebtables.
参考内核 2.6.32.61 kernel/net/bridge/*
我们来看一下br.c中br_init函数
-
static int __init br_init(void)
-
{
-
int err;
-
-
err = stp_proto_register(&br_stp_proto); //注册stp协议,把协议方到garp_protos里,在net/802/stp.c stp_pdu_rcv中有使用
-
-
static const struct stp_proto br_stp_proto = {
-
.rcv = br_stp_rcv,
-
};
-
if (err < 0) {
-
printk(KERN_ERR "bridge: can't register sap for STP\n");
-
return err;
-
}
-
-
err = br_fdb_init(); // 创建fdb 缓冲区
-
if (err)
-
goto err_out;
-
-
err = register_pernet_subsys(&br_net_ops); //注册桥子系统,主要是网络命名空间.
-
if (err)
-
goto err_out1;
-
-
err = br_netfilter_init(); //netfilter桥部分的初始化,ebtables工具配置使用.
-
-
int __init br_netfilter_init(void)
-
{
-
int ret;
-
-
ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-
if (ret < 0)
-
return ret;
-
#ifdef CONFIG_SYSCTL
-
brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
-
if (brnf_sysctl_header == NULL) {
-
printk(KERN_WARNING
-
"br_netfilter: can't register to sysctl.\n");
-
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-
return -ENOMEM;
-
}
-
#endif
-
printk(KERN_NOTICE "Bridge firewalling registered\n");
-
return 0;
-
}
-
if (err)
-
goto err_out2;
-
-
err = register_netdevice_notifier(&br_device_notifier); //注册桥设备关心的通知链
-
if (err)
-
goto err_out3;
-
-
err = br_netlink_init(); // netlink
-
if (err)
-
goto err_out4;
-
-
brioctl_set(br_ioctl_deviceless_stub); // ioctl socket netlink相关的 不是设备本身的ioctl
-
br_handle_frame_hook = br_handle_frame;
-
-
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
-
br_fdb_test_addr_hook = br_fdb_test_addr;
-
#endif
-
-
return 0;
-
err_out4:
-
unregister_netdevice_notifier(&br_device_notifier);
-
err_out3:
-
br_netfilter_fini();
-
err_out2:
-
unregister_pernet_subsys(&br_net_ops);
-
err_out1:
-
br_fdb_fini();
-
err_out:
-
stp_proto_unregister(&br_stp_proto);
-
return err;
-
}
这里没有分析什么,初始化的部分内容不是很多,但是都很重要.
下面分析一下桥的创建以及添加接口. 在br_if.c中
-
int br_add_bridge(struct net *net, const char *name)
-
{
-
struct net_device *dev;
-
int ret;
-
-
dev = new_bridge_dev(net, name); //这个函数的精华部分 ,申请netdevice,并初始化
-
if (!dev)
-
return -ENOMEM;
-
-
rtnl_lock();
-
if (strchr(dev->name, '%')) {
-
ret = dev_alloc_name(dev, dev->name); //申请名字
-
if (ret < 0)
-
goto out_free;
-
}
-
-
SET_NETDEV_DEVTYPE(dev, &br_type);
-
-
ret = register_netdevice(dev); //注册设备
-
if (ret)
-
goto out_free;
-
-
ret = br_sysfs_addbr(dev); // sysfs文件系统相关的
-
if (ret)
-
unregister_netdevice(dev);
-
out:
-
rtnl_unlock();
-
return ret;
-
-
out_free:
-
free_netdev(dev);
-
goto out;
-
}
我们看new_bridge_dev
-
static struct net_device *new_bridge_dev(struct net *net, const char *name)
-
{
-
struct net_bridge *br;
-
struct net_device *dev;
-
-
dev = alloc_netdev(sizeof(struct net_bridge), name, //申请设备,并br_dev_setup初始化 (模式和普通的网卡初始化没什么区别)
-
-
void br_dev_setup(struct net_device *dev)
-
{
-
random_ether_addr(dev->dev_addr);
-
ether_setup(dev);
-
-
dev->netdev_ops = &br_netdev_ops; //初始化设备的桥操作
-
dev->destructor = free_netdev;
-
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
-
dev->tx_queue_len = 0;
-
dev->priv_flags = IFF_EBRIDGE;
-
-
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
-
NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
-
}
-
br_dev_setup);
-
-
if (!dev)
-
return NULL;
-
dev_net_set(dev, net); //网络命名空间
-
-
br = netdev_priv(dev); // br结构指向了netdev私有空间
-
br->dev = dev;
-
-
spin_lock_init(&br->lock);
-
INIT_LIST_HEAD(&br->port_list);
-
spin_lock_init(&br->hash_lock);
-
-
br->bridge_id.prio[0] = 0x80; //桥特权级默认0x8000
-
br->bridge_id.prio[1] = 0x00;
-
-
memcpy(br->group_addr, br_group_address, ETH_ALEN);
-
-
br->feature_mask = dev->features; //对br的初始化工作
-
br->stp_enabled = BR_NO_STP; //默认stp关闭
-
br->designated_root = br->bridge_id;
-
br->root_path_cost = 0;
-
br->root_port = 0; //根port默认为0
-
br->bridge_max_age = br->max_age = 20 * HZ;
-
br->bridge_hello_time = br->hello_time = 2 * HZ;
-
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-
br->topology_change = 0;
-
br->topology_change_detected = 0;
-
br->ageing_time = 300 * HZ;
-
-
br_netfilter_rtable_init(br); //初始化桥路由信息,pmtu.可以看这个函数的注释
-
-
/*
-
* Initialize bogus route table used to keep netfilter happy.
-
* Currently, we fill in the PMTU entry because netfilter
-
* refragmentation needs it, and the rt_flags entry because
-
* ipt_REJECT needs it. Future netfilter modules might
-
* require us to fill additional fields.
-
*/
-
void br_netfilter_rtable_init(struct net_bridge *br)
-
{
-
struct rtable *rt = &br->fake_rtable;
-
-
atomic_set(&rt->u.dst.__refcnt, 1);
-
rt->u.dst.dev = br->dev;
-
rt->u.dst.path = &rt->u.dst;
-
rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
-
rt->u.dst.flags = DST_NOXFRM;
-
rt->u.dst.ops = &fake_dst_ops;
-
}
-
-
INIT_LIST_HEAD(&br->age_list); //初始化老化时间链表
-
-
br_stp_timer_init(br); //stp相关的几个timer 4个:hello_timer、tcn_timer、topology_change_timer、gc_timer
-
-
void br_stp_timer_init(struct net_bridge *br)
-
{
-
setup_timer(&br->hello_timer, br_hello_timer_expired,
-
(unsigned long) br);
-
-
setup_timer(&br->tcn_timer, br_tcn_timer_expired,
-
(unsigned long) br);
-
-
setup_timer(&br->topology_change_timer,
-
br_topology_change_timer_expired,
-
(unsigned long) br);
-
-
setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
-
}
-
return dev;
-
}
这里简单说下定时器的作用:它主要跟stp相关
hello timer:用于定期产生配置bpdu。只有根网桥可以使用该定时器
tcn timer:由检测到拓扑变化而且必须通知根网桥的网桥使用.
Topology change timer: (拓扑变化 TC)
由根网桥使用,以便记住要在其配置bpdu中设定一个特殊标示。此标示用于将拓扑变化通知其他网桥(非根)
Aging 定时器 (gc timer)
用于从转发数据库中清除无效的地址。该定时器由网桥使用,而不论其是否启用stp
我们继续看看往桥里添加一个接口:
关于这个函数的调用,可以参考brctl 命令的使用 例如:brctl addif br0 eth0
这里为什么要说呢,主要刚才我们注册br其实也是netdevice设备,那么struct net_bridge呢?看下ioctl的具体操作就明白了。
-
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-
{
-
struct net_bridge *br = netdev_priv(dev);
-
-
switch(cmd) {
-
case SIOCDEVPRIVATE:
-
return old_dev_ioctl(dev, rq, cmd);
-
-
case SIOCBRADDIF:
-
case SIOCBRDELIF:
-
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
-
-
}
-
-
pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
-
return -EOPNOTSUPP;
-
}
添加桥端口函数:br_add_if
-
/* called with RTNL */
-
int br_add_if(struct net_bridge *br, struct net_device *dev)
-
{
-
struct net_bridge_port *p;
-
int err = 0;
-
-
/* Don't allow bridging non-ethernet like devices */ //非以太网类型的设备退出
-
if ((dev->flags & IFF_LOOPBACK) ||
-
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
-
return -EINVAL;
-
-
/* No bridging of bridges */
-
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) //桥设备不能再加入桥
-
return -ELOOP;
-
-
/* Device is already being bridged */ //已经属于桥的端口不能直接加入另外一个桥里
-
if (dev->br_port != NULL)
-
return -EBUSY;
-
-
p = new_nbp(br, dev); // 初始化桥端口,和端口状态信息默认为blocking 且为指定端口 并且初始化port timer
-
if (IS_ERR(p))
-
return PTR_ERR(p);
-
-
err = dev_set_promiscuity(dev, 1);
-
if (err)
-
goto put_back;
-
-
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
-
SYSFS_BRIDGE_PORT_ATTR);
-
if (err)
-
goto err0;
-
-
err = br_fdb_insert(br, p, dev->dev_addr); // 添加到fdb里 mac--port
-
if (err)
-
goto err1;
-
-
err = br_sysfs_addif(p);
-
if (err)
-
goto err2;
-
-
rcu_assign_pointer(dev->br_port, p);
-
dev_disable_lro(dev);
-
-
list_add_rcu(&p->list, &br->port_list); 把port添加到br port_list
-
-
spin_lock_bh(&br->lock);
-
br_stp_recalculate_bridge_id(br);
-
br_features_recompute(br);
-
-
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
-
(br->dev->flags & IFF_UP))
-
br_stp_enable_port(p); // 这里port状态为转发 根据具体情况
-
spin_unlock_bh(&br->lock);
-
-
br_ifinfo_notify(RTM_NEWLINK, p);
-
-
dev_set_mtu(br->dev, br_min_mtu(br)); //设置mtu
-
-
kobject_uevent(&p->kobj, KOBJ_ADD);
-
-
return 0;
-
err2:
-
br_fdb_delete_by_port(br, p, 1);
-
err1:
-
kobject_put(&p->kobj);
-
p = NULL; /* kobject_put frees */
-
err0:
-
dev_set_promiscuity(dev, -1);
-
put_back:
-
dev_put(dev);
-
kfree(p);
-
return err;
-
}
图为桥和端口和fdb entry以及netdevice间的关系.
这里说一下port timers:
-
void br_stp_port_timer_init(struct net_bridge_port *p)
-
{
-
setup_timer(&p->message_age_timer, br_message_age_timer_expired,
-
(unsigned long) p);
-
-
setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
-
(unsigned long) p);
-
-
setup_timer(&p->hold_timer, br_hold_timer_expired,
-
(unsigned long) p);
-
}
message_age_timer: 由于bpdu携带的信息生存期是有限的,它用于强制执行这个生存期 限于收到bpdu数据
消息生存期 > 最大值 :丢弃
消息生存期 < 最大值 : 启动Message Age定时器.(时间为差值)
forward_delay_timer:负责状态转移,机制如图
hold_timer:限制端口bpdu的发送速率
我们这里看一下br_make_forwarding函数:
-
/* called under bridge lock */
-
static void br_make_forwarding(struct net_bridge_port *p)
-
{
-
struct net_bridge *br = p->br;
-
-
if (p->state != BR_STATE_BLOCKING)
-
return;
-
-
if (br->forward_delay == 0) {
-
p->state = BR_STATE_FORWARDING;
-
br_topology_change_detection(br);
-
del_timer(&p->forward_delay_timer);
-
}
-
else if (p->br->stp_enabled == BR_KERNEL_STP) //默认没有开启
-
p->state = BR_STATE_LISTENING;
-
else
-
p->state = BR_STATE_LEARNING; //正常情况下
-
-
br_log_state(p);
-
-
if (br->forward_delay != 0)
-
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
-
}
这里
br->forward_delay肯定不为0,见桥创建函数:
-
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
而stp默认是没有开启的,所以最后的状态是BR_STATE_LEARING ,也就是刚把端口加入桥的时候,在port timer :forward_delay_timer没有到期的时候,它处于学习状态,而到期触发定时器后:
-
static void br_forward_delay_timer_expired(unsigned long arg)
-
{
-
struct net_bridge_port *p = (struct net_bridge_port *) arg;
-
struct net_bridge *br = p->br;
-
-
pr_debug("%s: %d(%s) forward delay timer\n",
-
br->dev->name, p->port_no, p->dev->name);
-
spin_lock(&br->lock);
-
if (p->state == BR_STATE_LISTENING) {
-
p->state = BR_STATE_LEARNING;
-
mod_timer(&p->forward_delay_timer,
-
jiffies + br->forward_delay);
-
} else if (p->state == BR_STATE_LEARNING) {
-
p->state = BR_STATE_FORWARDING;
-
if (br_is_designated_for_some_port(br))
-
br_topology_change_detection(br);
-
}
-
br_log_state(p);
-
spin_unlock(&br->lock);
-
}
它把端口状态设置为了转发态.
关于桥下环路问题 即生成树协议,这里不做讨论 . 有兴趣的可以参考《深入理解linux网络内幕》第十五章生成树协议,以及参考相关的rfc.
新版生成树协议:
Rstp
802.1w
Mstp
802.1s 针对多个vlan的
当然这里说的还是比较粗略,只是作为一个深入学习的引子.
阅读(8420) | 评论(0) | 转发(0) |