Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1232130
  • 博文数量: 177
  • 博客积分: 1528
  • 博客等级: 上尉
  • 技术积分: 1891
  • 用 户 组: 普通用户
  • 注册时间: 2010-12-15 18:03
文章分类

全部博文(177)

文章存档

2020年(1)

2018年(19)

2017年(4)

2016年(21)

2015年(40)

2014年(13)

2013年(26)

2012年(16)

2011年(37)

我的朋友

分类: LINUX

2011-04-06 12:00:20

1. 前言  
 
本文简要介绍数据包在进入桥网卡后在Linux网络协议栈的处理流程,并描述netfilter的hook点的挂接处理情况,具体各部分的详细处理待后续文章中说明。  
 
以下内核代码版本为2.6.19.2.  
 
2. 函数处理流程  
bridge入口点handle_bridge()   
  1. /* net/core/dev.c */
  2. int netif_receive_skb(struct sk_buff *skb)
  3. {
  4.     //......
  5.     if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
  6.          goto out;
  7.     //......
  8.   }
 
bridge基本挂接点处理函数:br_handle_frame_hook()   
    
  1. static __inline__ int handle_bridge(struct sk_buff **pskb,
  2.         struct packet_type **pt_prev, int *ret,
  3.         struct net_device *orig_dev)
  4. {
  5.     struct net_bridge_port *port;
  6.     if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
  7.             (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
  8.         return 0;
  9.     if (*pt_prev) {
  10.         *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
  11.         *pt_prev = NULL;
  12.     }
  13.     
  14.     return br_handle_frame_hook(port, pskb);
  15. }
 
bridge_handle_frame_hook()的实际实现:   
  1. /* net/bridge/br.c */
  2. static int __init br_init(void)
  3. {
  4.     //......
  5.     br_handle_frame_hook = br_handle_frame;
  6.     //......
  7.   }
 
br_handle_frame: PF_BEIDGE的prerouting点  
  1. /* net/bridge/br_input.c */
  2. int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
  3. {
  4.      struct sk_buff *skb = *pskb;
  5.      const unsigned char *dest = eth_hdr(skb)->h_dest;
  6.      if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
  7.          goto err;
  8.      if (unlikely(is_link_local(dest))) {
  9.          // 自身包进入PF_BEIDGE的INPUT点, 一般处理的包数不多
  10.          skb->pkt_type = PACKET_HOST;
  11.          // 正常是返回1的, 然后就返回1, 表示桥模块全权处理该包了

  12.          return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
  13.                   NULL, br_handle_local_finish) != 0;
  14.      }
  15.      if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
  16.          // br_should_route_hook函数一般没定义
  17.           if (br_should_route_hook) {
  18.             if (br_should_route_hook(pskb))
  19.                 return 0;
  20.             skb = *pskb;
  21.             dest = eth_hdr(skb)->h_dest;
  22.          }
  23.          if (!compare_ether_addr(p->br->dev->dev_addr, dest))
  24.              skb->pkt_type = PACKET_HOST;
  25.          // PF_BRIDGE的prerouting处理结束后进入br_handle_frame_finish

  26.          NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
  27.          br_handle_frame_finish);
  28.          // 处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕

  29.          return 1;
  30.      }
  31. err:
  32.     kfree_skb(skb);
  33.     // 处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕

  34.     return 1;
  35. }
通过br_handle_frame_finish进入bridge的转发:  
  1. /* note: already called with rcu_read_lock (preempt_disabled) */
  2. int br_handle_frame_finish(struct sk_buff *skb)
  3. {
  4.      const unsigned char *dest = eth_hdr(skb)->h_dest;
  5.      struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
  6.      struct net_bridge *br;
  7.      struct net_bridge_fdb_entry *dst;
  8.      int passedup = 0;
  9.      if (!p || p->state == BR_STATE_DISABLED)
  10.          goto drop;
  11.      /* insert into forwarding database after filtering to avoid spoofing */
  12.      br = p->br;
  13.      br_fdb_update(br, p, eth_hdr(skb)->h_source);
  14.      if (p->state == BR_STATE_LEARNING)
  15.          goto drop;
  16.      if (br->dev->flags & IFF_PROMISC) {
  17.          struct sk_buff *skb2;
  18.          skb2 = skb_clone(skb, GFP_ATOMIC);
  19.          if (skb2 != NULL) {
  20.              passedup = 1;
  21.              br_pass_frame_up(br, skb2);
  22.          }
  23.      }
  24.      if (is_multicast_ether_addr(dest)) {
  25.          // 多播转发,也是调用广播处理

  26.          br->statistics.multicast++;
  27.          br_flood_forward(br, skb, !passedup);
  28.          if (!passedup)
  29.              br_pass_frame_up(br, skb);
  30.          goto out;
  31.      }
  32.      // 根据目的MAC找目的出口

  33.      dst = __br_fdb_get(br, dest);
  34.      if (dst != NULL && dst->is_local) {
  35.          if (!passedup)
  36.              br_pass_frame_up(br, skb);
  37.          else
  38.              kfree_skb(skb);
  39.          goto out;
  40.      }
  41.      if (dst != NULL) {
  42.          // 单播转发
  43.            br_forward(dst->dst, skb);
  44.           goto out;
  45.      }
  46.      // 广播转发

  47.      br_flood_forward(br, skb, 0);
  48. out:
  49.      return 0;
  50. drop:
  51.      kfree_skb(skb);
  52.      goto out;
  53. }
广播/多播转发: br_flood_forward/br_flood   
  1. /* called under bridge lock */
  2. void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
  3. {
  4.  br_flood(br, skb, clone, __br_forward);
  5. }
  6. /* called under bridge lock */
  7. static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
  8.  void (*__packet_hook)(const struct net_bridge_port *p,
  9.          struct sk_buff *skb))
  10. {
  11.  struct net_bridge_port *p;
  12.  struct net_bridge_port *prev;
  13.  if (clone) {
  14.   struct sk_buff *skb2;
  15.   if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
  16.    br->statistics.tx_dropped++;
  17.    return;
  18.   }
  19.   skb = skb2;
  20.  }
  21.  prev = NULL;
  22.  list_for_each_entry_rcu(p, &br->port_list, list) {
  23.   if (should_deliver(p, skb)) {
  24.    if (prev != NULL) {
  25.     struct sk_buff *skb2;
  26.     if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
  27.      br->statistics.tx_dropped++;
  28.      kfree_skb(skb);
  29.      return;
  30.     }
  31. // 这里实际是__br_forward

  32.     __packet_hook(prev, skb2);
  33.    }
  34.    prev = p;
  35.   }
  36.  }
  37.  if (prev != NULL) {
  38. // 这里实际是__br_forward

  39.   __packet_hook(prev, skb);
  40.   return;
  41.  }
  42.  kfree_skb(skb);
  43. }
   
单播转发: br_forward  
  1. /* net/bridge/br_forward.c */
  2. /* called with rcu_read_lock */
  3. void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
  4. {
  5.  if (should_deliver(to, skb)) {
  6. // 也是调用__br_forward

  7.   __br_forward(to, skb);
  8.   return;
  9.  }
  10.  kfree_skb(skb);
  11. }
FORWARD点:   
  1. static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
  2. {
  3.  struct net_device *indev;
  4.  indev = skb->dev;
  5.  skb->dev = to->dev;
  6.  skb->ip_summed = CHECKSUM_NONE;
  7. // 进入PF_BRIDGE的forward hook, 结束后进入br_forward_finish()

  8.  NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
  9.    br_forward_finish);
  10. }
 
POSTROUTING点:  
  1. // 从FORWARD点处理后直接进入POSTROUTING点处理

  2. int br_forward_finish(struct sk_buff *skb)
  3. {
  4. // 进入PF_BRIDGE的postrouting hook, 结束后进入br_dev_queue_push_xmit()

  5.  return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
  6.          br_dev_queue_push_xmit);
  7. }
数据包发出:  
  1. int br_dev_queue_push_xmit(struct sk_buff *skb)
  2. {
  3.  /* drop mtu oversized packets except gso */
  4.  if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
  5.   kfree_skb(skb);
  6.  else {
  7.   /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
  8.   if (nf_bridge_maybe_copy_header(skb))
  9.    kfree_skb(skb);
  10.   else {
  11.    skb_push(skb, ETH_HLEN);
  12. // 此处调用dev设备的hard_start_xmit()函数

  13.    dev_queue_xmit(skb);
  14.   }
  15.  }
  16.  return 0;
  17. }
桥网卡设备的hard_start_xmit()函数定义为:   
  1. /* net/bridge/br_device.c */
  2. void br_dev_setup(struct net_device *dev)
  3. {
  4. //......

  5.  dev->hard_start_xmit = br_dev_xmit;
  6. //......

  7. }
  8. /* net device transmit always called with no BH (preempt_disabled) */
  9. int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
  10. {
  11.  struct net_bridge *br = netdev_priv(dev);
  12.  const unsigned char *dest = skb->data;
  13.  struct net_bridge_fdb_entry *dst;
  14.  br->statistics.tx_packets++;
  15.  br->statistics.tx_bytes += skb->len;
  16.  skb->mac.raw = skb->data;
  17.  skb_pull(skb, ETH_HLEN);
  18.  if (dest[0] & 1)
  19. // 多播发送

  20.   br_flood_deliver(br, skb, 0);
  21.  else if ((dst = __br_fdb_get(br, dest)) != NULL)
  22. // 单播发送

  23.   br_deliver(dst->dst, skb);
  24.  else
  25. // 广播发送

  26.   br_flood_deliver(br, skb, 0);
  27. // 这些发送函数最终都会调用__br_deliver()函数

  28.  return 0;
  29. }
  30.   
  31. /* net/bridge/br_forward.c */
  32. static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
  33. {
  34.  skb->dev = to->dev;
  35. // 此处是PF_BRIDGE的OUTPUT点

  36.  NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
  37.    br_forward_finish);
  38. }
 
总结: PF_BRIDGE中的各个hook点和PF_INET不同, 可用下面的图表示:  
 
  PREROUTING --+--FORWARD-----POSTROUTING------+----OUTPUT  
               |                               |  
               |                               |  
              INPUT  
   
3. BF_BRIDGE的hook点  
 
在net/bridge/br_netfilter.c中定义了以下hook点,注意这些hook点主要是PF_BRIDGE协议族的。   
  1. /* net/bridge/br_netfilter.c */
  2. /* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
  3.  * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
  4.  * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  5.  * ip_refrag() can return NF_STOLEN. */
  6. static struct nf_hook_ops br_nf_ops[] = {
  7. // PF_BRIDGE的挂接点

  8. // PREROUTING点

  9.  { .hook = br_nf_pre_routing,
  10.    .owner = THIS_MODULE,
  11.    .pf = PF_BRIDGE,
  12.    .hooknum = NF_BR_PRE_ROUTING,
  13.    .priority = NF_BR_PRI_BRNF, },
  14. // INPUT点

  15.  { .hook = br_nf_local_in,
  16.    .owner = THIS_MODULE,
  17.    .pf = PF_BRIDGE,
  18.    .hooknum = NF_BR_LOCAL_IN,
  19.    .priority = NF_BR_PRI_BRNF, },
  20. // FORWARD点

  21.  { .hook = br_nf_forward_ip,
  22.    .owner = THIS_MODULE,
  23.    .pf = PF_BRIDGE,
  24.    .hooknum = NF_BR_FORWARD,
  25.    .priority = NF_BR_PRI_BRNF - 1, },
  26. // FORWARD点

  27.  { .hook = br_nf_forward_arp,
  28.    .owner = THIS_MODULE,
  29.    .pf = PF_BRIDGE,
  30.    .hooknum = NF_BR_FORWARD,
  31.    .priority = NF_BR_PRI_BRNF, },
  32. // OUTPUT点

  33.  { .hook = br_nf_local_out,
  34.    .owner = THIS_MODULE,
  35.    .pf = PF_BRIDGE,
  36.    .hooknum = NF_BR_LOCAL_OUT,
  37.    .priority = NF_BR_PRI_FIRST, },
  38. // POSTROUTING点

  39.  { .hook = br_nf_post_routing,
  40.    .owner = THIS_MODULE,
  41.    .pf = PF_BRIDGE,
  42.    .hooknum = NF_BR_POST_ROUTING,
  43.    .priority = NF_BR_PRI_LAST, },
  44. // 后面是PF_INET/PF_INET6的挂接点, 其实也没进行什么数据包操作,

  45. // 就是自身的输入输出包不通过桥处理,要短路掉

  46.  { .hook = ip_sabotage_in,
  47.    .owner = THIS_MODULE,
  48.    .pf = PF_INET,
  49.    .hooknum = NF_IP_PRE_ROUTING,
  50.    .priority = NF_IP_PRI_FIRST, },
  51.  { .hook = ip_sabotage_in,
  52.    .owner = THIS_MODULE,
  53.    .pf = PF_INET6,
  54.    .hooknum = NF_IP6_PRE_ROUTING,
  55.    .priority = NF_IP6_PRI_FIRST, },
  56.  { .hook = ip_sabotage_out,
  57.    .owner = THIS_MODULE,
  58.    .pf = PF_INET,
  59.    .hooknum = NF_IP_FORWARD,
  60.    .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
  61.  { .hook = ip_sabotage_out,
  62.    .owner = THIS_MODULE,
  63.    .pf = PF_INET6,
  64.    .hooknum = NF_IP6_FORWARD,
  65.    .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
  66.  { .hook = ip_sabotage_out,
  67.    .owner = THIS_MODULE,
  68.    .pf = PF_INET,
  69.    .hooknum = NF_IP_LOCAL_OUT,
  70.    .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
  71.  { .hook = ip_sabotage_out,
  72.    .owner = THIS_MODULE,
  73.    .pf = PF_INET6,
  74.    .hooknum = NF_IP6_LOCAL_OUT,
  75.    .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
  76.  { .hook = ip_sabotage_out,
  77.    .owner = THIS_MODULE,
  78.    .pf = PF_INET,
  79.    .hooknum = NF_IP_POST_ROUTING,
  80.    .priority = NF_IP_PRI_FIRST, },
  81.  { .hook = ip_sabotage_out,
  82.    .owner = THIS_MODULE,
  83.    .pf = PF_INET6,
  84.    .hooknum = NF_IP6_POST_ROUTING,
  85.    .priority = NF_IP6_PRI_FIRST, },
  86. };
  87.   
  88. // PF_BRIDGE的PRROUTING点处理函数

  89. static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
  90.           const struct net_device *in,
  91.           const struct net_device *out,
  92.           int (*okfn)(struct sk_buff *))
  93. {
  94. ......
  95. // 此处继续调用PF_INET族的PREROUTING点的hook处理

  96.  NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
  97.   br_nf_pre_routing_finish);
  98.  return NF_STOLEN;
  99. inhdr_error:
  100. // IP_INC_STATS_BH(IpInHdrErrors);

  101. out:
  102.  return NF_DROP;
  103. }
  104.   
  105. // PF_BRIDGE的FORWARD点处理

  106. static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
  107.          const struct net_device *in,
  108.          const struct net_device *out,
  109.          int (*okfn)(struct sk_buff *))
  110. {
  111. ......
  112. // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理

  113.  NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
  114.   br_nf_forward_finish);
  115.  return NF_STOLEN;
  116. }
  117. // PF_BRIDGE的OUTPUT点处理

  118. static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
  119.         const struct net_device *in,
  120.         const struct net_device *out,
  121.         int (*okfn)(struct sk_buff *))
  122. {
  123. ......
  124.  /* IP forwarded traffic has a physindev, locally
  125.   * generated traffic hasn't. */
  126.  if (realindev != NULL) {
  127.   if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
  128.    struct net_device *parent = bridge_parent(realindev);
  129.    if (parent)
  130.     realindev = parent;
  131.   }
  132. // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在//

  133. NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1以上
  134.   NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
  135.           realoutdev, br_nf_local_out_finish,
  136.           NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
  137.  } else {
  138. // 此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在

  139. // NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1以上

  140.   NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
  141.           realoutdev, br_nf_local_out_finish,
  142.           NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
  143.  }
  144. out:
  145.  return NF_STOLEN;
  146. }
  147.   
  148. // PF_BRIDGE的POSTROUTING点

  149. static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
  150.            const struct net_device *in,
  151.            const struct net_device *out,
  152.            int (*okfn)(struct sk_buff *))
  153. {
  154. ......
  155. // 此处继续调用PF_INET/PF_INET6族的POSTROUTING点的hook处理

  156.  NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
  157.   br_nf_dev_queue_xmit);
  158.  return NF_STOLEN;
  159. #ifdef CONFIG_NETFILTER_DEBUG
  160. print_error:
  161.  if (skb->dev != NULL) {
  162.   printk("[%s]", skb->dev->name);
  163.   if (realoutdev)
  164.    printk("[%s]", realoutdev->name);
  165.  }
  166.  printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
  167.         skb->data);
  168.  dump_stack();
  169.  return NF_ACCEPT;
  170. #endif
  171. }
 
由此可见, PF_INET的各个hook点也被PF_BRIDGE的各个hook点调用,因此可以在桥网卡中进行过滤或NAT等操作。  
 
4. 结论  
 
BRIDGE的数据处理流程是是一个独立的处理过程, 如果处理正常的话就不再返回到其他协议处理。  
在桥的处理层次也和IP协议一样,可以挂接多个PF_BRIDGE的挂接点,这些挂接点中又调用了PF_INET族的挂接点,从而实现了桥下的过滤、NAT等功能。 

阅读(5810) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~