netif_receive_skb (dev.c) skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);//
if (!skb)//如果skb==NULL 就goto out;
goto out; /* * Called via br_handle_frame_hook. * Return NULL if skb is handled * note: already called with rcu_read_lock (preempt_disabled) */ /*//通过查看系统中是否配置了网桥或网桥模块,来执行不同的handle_bridge 函数。 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) /* These hooks defined here for ATM */ struct net_bridge; br_handle_frame_hook 在/ net/core/dev.c 中调用。 如果没有定义了网桥或网桥模块,handle_bridge 是个空函数,不做任何处理。 如果定义了网桥或网桥模块,则定义三个HOOK 函数。 br_fdb_get_hook 函数 br_fdb_put_hook 函数 br_handle_frame_hook 函数 */ //三个HOOK 函数的原形。
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,unsigned char *addr); void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; /* * If bridge module is loaded call bridging hook. * returns NULL if packet was consumed. */ struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb) __read_mostly; static inline struct sk_buff *handle_bridge(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { struct net_bridge_port *port; ////如果是回环数据或者skb->dev->br_port :接收该数据包的端口是网桥端口组的
一员,如果接收当前数据包的接口不是网桥的某一物理端口,则其值 不做任何修改就返回。 if (skb‐>pkt_type == PACKET_LOOPBACK || (port = rcu_dereference(skb‐>dev‐>br_port)) == NULL)//如果这个设备的br_port 为
空,说明它不属于网桥 return skb; if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } //定义了网桥处理函数这段代码将数据包进行转向,转向的后的处理函数是钩子函数
br_handle_frame_hook //br_handle_frame_hook 是网桥处理的主函数HOOK。
return br_handle_frame_hook(port, skb); } #else #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif */ 网络包在网桥中的处理 br_handle_frame(br_input.c) /* * Called via br_handle_frame_hook. * Return NULL if skb is handled * note: already called with rcu_read_lock (preempt_disabled) */ //p 是接收该数据包的网桥的端口
struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; // //源地址不能使广播地址或者0 地址
if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; // //见后面分析:更新cam 表
if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; /* Process STP BPDU's through normal netif_receive_skb() path */ if (p->br->stp_enabled != BR_NO_STP) { if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; else return skb; } } //察看端口所在的网桥的状态
// /*众所周之,网桥之所以是网桥,比HUB 更智能,是因为它有一个MAC-PORT 的
表, //这样转发数据就不用广播,而查表定端口就可以了.每次收到一个包,网桥都会学习其
来源MAC, //添加进这个表。Linux 中这个表叫CAM 表(这个名字是其它资料上看的)。
//如果桥的状态是LEARNING 或FORWARDING(学习或转发),
//则学习该包的源地址skb->mac.ethernet->h_source, 将其添加到 CAM 表中*/
switch (p->state) { case BR_STATE_FORWARDING: if (br_should_route_hook) { if (br_should_route_hook(&skb)) return skb; dest = eth_hdr(skb)->h_dest; } /* fall through */ case BR_STATE_LEARNING://学习源MAC 地址
if (!compare_ether_addr(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish); break; default: drop: kfree_skb(skb); } return NULL; } br_handle_frame_finish(br_input.c) /* note: already called with rcu_read_lock (preempt_disabled) */ int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; struct net_bridge_fdb_entry *dst; int passedup = 0; if (!p || p->state == BR_STATE_DISABLED)//当前网卡设备对应的网桥端口为空就
丢弃 goto drop; /* insert into forwarding database after filtering to avoid spoofing */ // //更新cam 表,防止spoof 数据包
br = p->br;//网卡所在的网桥设备
br_fdb_update(br, p, eth_hdr(skb)->h_source);//更新CAM 表
if (p->state == BR_STATE_LEARNING) goto drop; // /** 如果网桥的虚拟网卡处于混杂模式,那么每个接收到的数据包都需要克隆一份
//* 送到AF_PACKET 协议处理体(netif_receive_skb)。*/
if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 != NULL) { passedup = 1; br_pass_frame_up(br, skb2);//传递到上层协议,用来对网桥对应的虚拟网
卡进行分析。 } } // /** 目的MAC 为广播或多播,则需要向本机的上层协议栈传送这个数据包,这里
//* 有一个标志变量passedup,用于表示是否传送过了,如果已传送过,那就算了*/
if (is_multicast_ether_addr(dest)) { br->statistics.multicast++; br_flood_forward(br, skb, !passedup); if (!passedup) br_pass_frame_up(br, skb); goto out; } // /*查询CAM 表*/
dst = __br_fdb_get(br, dest); //用户层常需要用到一个虚拟的地址来管理网桥,如果目的地址非常,且为本地址地址,则
交由上层函数处理*/ if (dst != NULL && dst->is_local) { if (!passedup) br_pass_frame_up(br, skb); else kfree_skb(skb); goto out; } // /* 转发 */如果找到端口,就直接转发过去。
if (dst != NULL) { br_forward(dst->dst, skb); goto out; } // /*如果表里边查不到,那么发送到网桥的所有的接口……*/
br_flood_forward(br, skb, 0); out: return 0; drop: kfree_skb(skb); goto out; } br_forward(转发到对应的接口)(br_forward.c) /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { if (should_deliver(to, skb)) {//判断netfilter 规则
__br_forward(to, skb); return; } kfree_skb(skb); } __br_forward(br_forward.c) static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { struct net_device *indev; indev = skb->dev;//来源设备
skb->dev = to->dev;//更改一下传递的设备,目的设备
skb_forward_csum(skb); //将网络包,从来源设备indev,传递到目的设备skb-dev 中
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, br_forward_finish); } br_forward_finish(br_forward.c) int br_forward_finish(struct sk_buff *skb) { return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, br_dev_queue_push_xmit); } br_dev_queue_push_xmit int br_dev_queue_push_xmit(struct sk_buff *skb)//skb‐dev 表示的是目的设备
{ /* drop mtu oversized packets except gso */如果包的mtu 值大于设备的mtu 值,就抛弃 它。 if (packet_length(skb) > skb‐>dev‐>mtu && !skb_is_gso(skb)) kfree_skb(skb); else { /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ if (nf_bridge_maybe_copy_header(skb)) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); dev_queue_xmit(skb); } } return 0; } skb_push(src/include/linux/skbuff.h) /** * skb_push ‐ add data to the start of a buffer * @skb: buffer to use * @len: amount of data to add * * This function extends the used data area of the buffer at the buffer * start. If this would exceed the total buffer headroom the kernel will * panic. A pointer to the first byte of the extra data is returned. */ static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) { skb‐>data ‐= len; skb‐>len += len; if (unlikely(skb‐>data<skb‐>head)) skb_under_panic(skb, len, current_text_addr()); return skb‐>data; } dev_queue_xmit(src/net/core/dev.c) /** * dev_queue_xmit ‐ transmit a buffer * @skb: buffer to transmit * * Queue a buffer for transmission to a network device. The caller must * have set the device and priority and built the buffer before calling * this function. The function can be called from an interrupt. * * A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. * * ‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐‐ * I notice this method can also return errors from the queue disciplines, * including NET_XMIT_DROP, which is a positive value. So, errors can also * be positive. * * Regardless of the return value, the skb is consumed, so it is currently * difficult to retry a send to this method. (You can bump the ref count * before sending to hold a reference for retry if you are careful.) * * When calling this method, interrupts MUST be enabled. This is because * the BH enable code must have IRQs enabled so that it will not deadlock. * ‐‐BLG */ int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb‐>dev;//包要传送的目的设备
struct Qdisc *q; int rc = ‐ENOMEM; /* GSO will handle the following emulations directly. */ if (netif_needs_gso(dev, skb)) goto gso; if (skb_shinfo(skb)‐>frag_list && !(dev‐>features & NETIF_F_FRAGLIST) && __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, * or if at least one of fragments is in highmem and device * does not support DMA from it. */ if (skb_shinfo(skb)‐>nr_frags && (!(dev‐>features & NETIF_F_SG) || illegal_highdma(dev, skb)) && __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb‐>ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb‐>csum_start ‐ skb_headroom(skb)); if (!(dev‐>features & NETIF_F_GEN_CSUM) && (!(dev‐>features & NETIF_F_IP_CSUM) || skb‐>protocol != htons(ETH_P_IP))) if (skb_checksum_help(skb)) goto out_kfree_skb; } gso: spin_lock_prefetch(&dev‐>queue_lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure ‐ it may be accessed without acquiring * a lock (but the structure may be stale.) The freeing of the * qdisc will be deferred until it's known that there are no * more references to it. * * If the qdisc has an enqueue function, we still need to * hold the queue_lock before calling it, since queue_lock * also serializes access to the device queue. */ q = rcu_dereference(dev‐>qdisc); #ifdef CONFIG_NET_CLS_ACT skb‐>tc_verd = SET_TC_AT(skb‐>tc_verd,AT_EGRESS); #endif if (q‐>enqueue) { /* Grab device queue */ spin_lock(&dev‐>queue_lock); q = dev‐>qdisc; if (q‐>enqueue) { rc = q‐>enqueue(skb, q); qdisc_run(dev); spin_unlock(&dev‐>queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } spin_unlock(&dev‐>queue_lock); } /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... Really, it is unlikely that netif_tx_lock protection is necessary here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. Check this and shot the lock. It is not prone from deadlocks. Either shot noqueue qdisc, it is even simpler 8) */ if (dev‐>flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ if (dev‐>xmit_lock_owner != cpu) { HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { rc = 0; if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } } HARD_TX_UNLOCK(dev); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev‐>name); } else { /* Recursion is detected! It is possible, * unfortunately */ if (net_ratelimit()) printk(KERN_CRIT "Dead loop on virtual device " "%s, fix it urgently!\n", dev‐>name); } } rc = ‐ENETDOWN; rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; out: rcu_read_unlock_bh(); return rc; } dev_hard_start_xmit(设备的发送函数) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { if (likely(!skb‐>next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); if (netif_needs_gso(dev, skb)) { if (unlikely(dev_gso_segment(skb))) goto out_kfree_skb; if (skb‐>next) goto gso; } return dev‐>hard_start_xmit(skb, dev); } gso: do { struct sk_buff *nskb = skb‐>next; int rc; skb‐>next = nskb‐>next; nskb‐>next = NULL; rc = dev‐>hard_start_xmit(nskb, dev); if (unlikely(rc)) { nskb‐>next = skb‐>next; skb‐>next = nskb; return rc; } if (unlikely(netif_queue_stopped(dev) && skb‐>next)) return NETDEV_TX_BUSY; } while (skb‐>next); skb‐>destructor = DEV_GSO_CB(skb)‐>destructor; out_kfree_skb: kfree_skb(skb); return 0; } br_flood_forward(转发到所有其他的接口)(br_forward.c) /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) { br_flood(br, skb, clone, __br_forward); } __br_forward(br_forward.c)(见前面的源码) br_flood(br_forward.c) /* called under bridge lock */ static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { struct net_bridge_port *p; struct net_bridge_port *prev; if (clone) { struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { br->statistics.tx_dropped++; return; } skb = skb2; } prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) {//对于网桥链表中的每一个端口(网
卡设备) if (should_deliver(p, skb)) {//应当接着传递,根据netfilter 规则
if (prev != NULL) { struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { br->statistics.tx_dropped++; kfree_skb(skb); return; } __packet_hook(prev, skb2);//将克隆来的网络包传递到设备prev 中。
} prev = p;//依次传递
} } if (prev != NULL) { __packet_hook(prev, skb); return; } kfree_skb(skb); }
|