Chinaunix首页 | 论坛 | 博客
  • 博客访问: 475400
  • 博文数量: 223
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 2145
  • 用 户 组: 普通用户
  • 注册时间: 2014-03-01 10:23
个人简介

该坚持的时候坚持,该妥协的时候妥协,该放弃的时候放弃

文章分类

全部博文(223)

文章存档

2017年(56)

2016年(118)

2015年(3)

2014年(46)

我的朋友

分类: 嵌入式

2016-11-23 23:17:02

一、网络结构
当PC有两个网卡,W1是192.168.1.10=====连接的路由器IP:192.168.1.1===PC2:168.1.1.10
W2是10.0.0.30====连接的路由器:10.0.0.1=====PC3
PC想要发送数据时,在路由器表中寻找目的地址168.1.1.10,网关:192.168.1.1

二、UDP包数据流程
入口:首先是socket的文件对应的file_operations
  1. /*
  2.  *    Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
  3.  *    in the operation structures but are done directly via the socketcall() multiplexor.
  4.  */

  5. static const struct file_operations socket_file_ops = {
  6.     .owner =    THIS_MODULE,
  7.     .llseek =    no_llseek,
  8.     .aio_read =    sock_aio_read,                    //读入口
  9.     .aio_write =    sock_aio_write,                  //写入口
  10.     .poll =        sock_poll,
  11.     .unlocked_ioctl = sock_ioctl,
  12. #ifdef CONFIG_COMPAT
  13.     .compat_ioctl = compat_sock_ioctl,
  14. #endif
  15.     .mmap =        sock_mmap,
  16.     .open =        sock_no_open,    /* special open code to disallow open via /proc */
  17.     .release =    sock_close,
  18.     .fasync =    sock_fasync,
  19.     .sendpage =    sock_sendpage,
  20.     .splice_write = generic_splice_sendpage,
  21.     .splice_read =    sock_splice_read,
  22. };
sock_aio_write:
  1. static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
  2.              unsigned long nr_segs, loff_t pos)
  3. {
  4.     struct sock_iocb siocb, *x;

  5.     if (pos != 0)
  6.         return -ESPIPE;

  7.     x = alloc_sock_iocb(iocb, &siocb);
  8.     if (!x)
  9.         return -ENOMEM;

  10.     return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);         //调用do_sock_write
  11. }
do_sock_write:
  1. static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
  2.             struct file *file, const struct iovec *iov,
  3.             unsigned long nr_segs)
  4. {
  5.     struct socket *sock = file->private_data;
  6.     size_t size = 0;
  7.     int i;

  8.     for (i = 0; i < nr_segs; i++)
  9.         size += iov[i].iov_len;

  10.     msg->msg_name = NULL;
  11.     msg->msg_namelen = 0;
  12.     msg->msg_control = NULL;
  13.     msg->msg_controllen = 0;
  14.     msg->msg_iov = (struct iovec *)iov;
  15.     msg->msg_iovlen = nr_segs;
  16.     msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
  17.     if (sock->type == SOCK_SEQPACKET)
  18.         msg->msg_flags |= MSG_EOR;

  19.     return __sock_sendmsg(iocb, sock, msg, size);                             //调用__sock_sendmsg
  20. }
__sock_sendmsg:
  1. static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
  2.                  struct msghdr *msg, size_t size)
  3. {
  4.     struct sock_iocb *si = kiocb_to_siocb(iocb);
  5.     int err;

  6.     si->sock = sock;
  7.     si->scm = NULL;
  8.     si->msg = msg;
  9.     si->size = size;

  10.     err = security_socket_sendmsg(sock, msg, size);
  11.     if (err)
  12.         return err;

  13.     return sock->ops->sendmsg(iocb, sock, msg, size);                  //调用ops->sendmsg函数指针,这个proto_ops可能是应用层socket函数的接口
  14. }
到这里是SCI和协议无关层的部分。

协议栈入口:udp_sendmsg
  1. int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
  2.         size_t len)
  3. {
  4.     struct inet_sock *inet = inet_sk(sk);
  5.     struct udp_sock *up = udp_sk(sk);
  6.     int ulen = len;
  7.     struct ipcm_cookie ipc;
  8.     struct rtable *rt = NULL;
  9.     int free = 0;
  10.     int connected = 0;
  11.     __be32 daddr, faddr, saddr;
  12.     __be16 dport;
  13.     u8 tos;
  14.     int err, is_udplite = IS_UDPLITE(sk);
  15.     int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
  16.     int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);

  17.     if (len > 0xFFFF)
  18.         return -EMSGSIZE;

  19.     .........................

  20.         security_sk_classify_flow(sk, &fl);
  21.         err = ip_route_output_flow(net, &rt, &fl, sk, 1);                                   //udp处理的函数(选择路由)
  22.         if (err) {
  23.             if (err == -ENETUNREACH)
  24.                 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
  25.             goto out;
  26.         }

  27.         err = -EACCES;
  28.         if ((rt->rt_flags & RTCF_BROADCAST) &&
  29.          !sock_flag(sk, SOCK_BROADCAST))
  30.             goto out;
  31.         if (connected)
  32.             sk_dst_set(sk, dst_clone(&rt->u.dst));
  33.     }

  34.     ..........................

  35. do_append_data:
  36.     up->len += ulen;
  37.     getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
  38.     err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
  39.             sizeof(struct udphdr), &ipc, &rt,
  40.             corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
  41.     if (err)
  42.         udp_flush_pending_frames(sk);                                                       
  43.     else if (!corkreq)
  44.         err = udp_push_pending_frames(sk);                                                      //用于数据传输,该函数为数据包添加UDP包头,校验数据,将数据发送出去。
  45.     else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
  46.         up->pending = 0;
  47.     release_sock(sk);

  48. out:
  49.     ip_rt_put(rt);
  50.     if (free)
  51.         kfree(ipc.opt);
  52.     if (!err)
  53.         return len;
  54.     /*
  55.      * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
  56.      * ENOBUFS might not be good (it's not tunable per se), but otherwise
  57.      * we don't have a good statistic (IpOutDiscards but it can be too many
  58.      * things). We could add another new stat but at least for now that
  59.      * seems like overkill.
  60.      */
  61.     if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  62.         UDP_INC_STATS_USER(sock_net(sk),
  63.                 UDP_MIB_SNDBUFERRORS, is_udplite);
  64.     }
  65.     return err;

  66. do_confirm:
  67.     dst_confirm(&rt->u.dst);
  68.     if (!(msg->msg_flags&MSG_PROBE) || len)
  69.         goto back_from_confirm;
  70.     err = 0;
  71.     goto out;
  72. }
dup_push_pending_frames:
  1. /*
  2.  * Push out all pending data as one UDP datagram. Socket is locked.
  3.  */
  4. static int udp_push_pending_frames(struct sock *sk)
  5. {
  6.     struct udp_sock *up = udp_sk(sk);
  7.     struct inet_sock *inet = inet_sk(sk);
  8.     struct flowi *fl = &inet->cork.fl;
  9.     struct sk_buff *skb;
  10.     struct udphdr *uh;
  11.     int err = 0;
  12.     int is_udplite = IS_UDPLITE(sk);
  13.     __wsum csum = 0;

  14.     /* Grab the skbuff where UDP header space exists. */
  15.     if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
  16.         goto out;

  17.     /*
  18.      * Create a UDP header
  19.      */
  20.     uh = udp_hdr(skb);
  21.     uh->source = fl->fl_ip_sport;
  22.     uh->dest = fl->fl_ip_dport;
  23.     uh->len = htons(up->len);
  24.     uh->check = 0;

  25.     if (is_udplite)                  /* UDP-Lite */
  26.         csum = udplite_csum_outgoing(sk, skb);

  27.     else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */

  28.         skb->ip_summed = CHECKSUM_NONE;
  29.         goto send;

  30.     } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */

  31.         udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
  32.         goto send;

  33.     } else                         /* `normal' UDP */
  34.         csum = udp_csum_outgoing(sk, skb);

  35.     /* add protocol-dependent pseudo-header */
  36.     uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
  37.                  sk->sk_protocol, csum );
  38.     if (uh->check == 0)
  39.         uh->check = CSUM_MANGLED_0;

  40. send:
  41.     err = ip_push_pending_frames(sk);                                                                   //同时也是ip处理函数的入口
  42. out:
  43.     up->len = 0;
  44.     up->pending = 0;
  45.     if (!err)
  46.         UDP_INC_STATS_USER(sock_net(sk),
  47.                 UDP_MIB_OUTDATAGRAMS, is_udplite);
  48.     return err;
  49. }

三、IP协议关系

  该图是《Understanding Linux Network Internals》的原图

* ip包的接收函数调用流程
* ip包接收函数调用流程(kernel-2.6.27)
ip_rcv()
   ->ip_rcv_finish()
      ->ip_route_input()
      ->dst_input()
         ->ip_local_deliver()      //本地数据包,调用传输层协议处理函数处理
         ->ip_forward()             // 非本地包,转发
         ->ip_mr_input()           //多播数据包转发

ip_push_pending_frames:
  1. /*
  2.  *    Combined all pending IP fragments on the socket as one IP datagram
  3.  *    and push them out.
  4.  */
  5. int ip_push_pending_frames(struct sock *sk)
  6. {
  7.     ..........


  8.     /* Netfilter gets whole the not fragmented skb. */
  9.     err = ip_local_out(skb);                                                                           //这里调用了ip_local_out
  10.     if (err) {
  11.         if (err > 0)
  12.             err = inet->recverr ? net_xmit_errno(err) : 0;
  13.         if (err)
  14.             goto error;
  15.     }

  16. out:
  17.     ip_cork_release(inet);
  18.     return err;

  19. error:
  20.     IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
  21.     goto out;
  22. }
ip_local_out:
  1. int ip_local_out(struct sk_buff *skb)
  2. {
  3.     int err;

  4.     err = __ip_local_out(skb);                                        //继续调用__ip_local_out
  5.     if (likely(err == 1))
  6.         err = dst_output(skb);                                        //这里调用dst_output

  7.     return err;
  8. }
_dst_output:
  1. static inline int dst_output(struct sk_buff *skb)
  2. {
  3.     return skb->dst->output(skb);                                            //调用sk_buff里的dst函数中的output函数(ip_finish_output)
  4. }
ip_finish_output:
  1. static int ip_finish_output(struct sk_buff *skb)
  2. {
  3. #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
  4.     /* Policy lookup after SNAT yielded a new policy */
  5.     if (skb->dst->xfrm != NULL) {
  6.         IPCB(skb)->flags |= IPSKB_REROUTED;
  7.         return dst_output(skb);
  8.     }
  9. #endif
  10.     if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
  11.         return ip_fragment(skb, ip_finish_output2);
  12.     else
  13.         return ip_finish_output2(skb);                                          //调用ip_finish_output2
  14. }
ip_finish_output2:
  1. static inline int ip_finish_output2(struct sk_buff *skb)
  2. {
  3.     struct dst_entry *dst = skb->dst;
  4.     struct rtable *rt = (struct rtable *)dst;
  5.     struct net_device *dev = dst->dev;
  6.     unsigned int hh_len = LL_RESERVED_SPACE(dev);

  7.     if (rt->rt_type == RTN_MULTICAST)
  8.         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
  9.     else if (rt->rt_type == RTN_BROADCAST)
  10.         IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);

  11.     /* Be paranoid, rather than too clever. */
  12.     if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
  13.         struct sk_buff *skb2;

  14.         skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
  15.         if (skb2 == NULL) {
  16.             kfree_skb(skb);
  17.             return -ENOMEM;
  18.         }
  19.         if (skb->sk)
  20.             skb_set_owner_w(skb2, skb->sk);
  21.         kfree_skb(skb);
  22.         skb = skb2;
  23.     }

  24.     if (dst->hh)                                                            //判断路由是否有信息
  25.         return neigh_hh_output(dst->hh, skb);
  26.     else if (dst->neighbour)                                                //如果没有,建立邻居子系统
  27.         return dst->neighbour->output(skb);                                 //neighbour->output起始对应arp_generic_ops下面的neigh_resolve_output

  28.     if (net_ratelimit())
  29.         printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
  30.     kfree_skb(skb);
  31.     return -EINVAL;
  32. }
arp_generic_ops:
  1. static struct neigh_ops arp_generic_ops = {
  2.     .family =        AF_INET,
  3.     .solicit =        arp_solicit,
  4.     .error_report =        arp_error_report,
  5.     .output =        neigh_resolve_output,                                    //neigh_resolve_output在这里
  6.     .connected_output =    neigh_connected_output,
  7.     .hh_output =        dev_queue_xmit,
  8.     .queue_xmit =        dev_queue_xmit,
  9. };
neigh_resolve_output:
  1. /* Slow and careful. */

  2. int neigh_resolve_output(struct sk_buff *skb)
  3. {
  4.     struct dst_entry *dst = skb->dst;
  5.     struct neighbour *neigh;
  6.     int rc = 0;

  7.     if (!dst || !(neigh = dst->neighbour))
  8.         goto discard;

  9.     __skb_pull(skb, skb_network_offset(skb));

  10.     if (!neigh_event_send(neigh, skb)) {
  11.         int err;
  12.         struct net_device *dev = neigh->dev;
  13.         if (dev->header_ops->cache && !dst->hh) {
  14.             write_lock_bh(&neigh->lock);
  15.             if (!dst->hh)
  16.                 neigh_hh_init(neigh, dst, dst->ops->protocol);
  17.             err = dev_hard_header(skb, dev, ntohs(skb->protocol),
  18.                      neigh->ha, NULL, skb->len);
  19.             write_unlock_bh(&neigh->lock);
  20.         } else {
  21.             read_lock_bh(&neigh->lock);
  22.             err = dev_hard_header(skb, dev, ntohs(skb->protocol),
  23.                      neigh->ha, NULL, skb->len);
  24.             read_unlock_bh(&neigh->lock);
  25.         }
  26.         if (err >= 0)
  27.             rc = neigh->ops->queue_xmit(skb);                                    //这里调用queue_xmit函数
  28.         else
  29.             goto out_kfree_skb;
  30.     }
  31. out:
  32.     return rc;
  33. discard:
  34.     NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
  35.          dst, dst ? dst->neighbour : NULL);
  36. out_kfree_skb:
  37.     rc = -EINVAL;
  38.     kfree_skb(skb);
  39.     goto out;
  40. }
到此协议栈就结束了。

设备无关接口:dev_queue_xmit:

  1. int dev_queue_xmit(struct sk_buff *skb)
  2. {
  3.     struct net_device *dev = skb->dev;
  4.     struct netdev_queue *txq;
  5.     struct Qdisc *q;
  6.     int rc = -ENOMEM;

  7.     .............

  8.     if (dev->flags & IFF_UP) {
  9.         int cpu = smp_processor_id(); /* ok because BHs are off */

  10.         if (txq->xmit_lock_owner != cpu) {

  11.             HARD_TX_LOCK(dev, txq, cpu);

  12.             if (!netif_tx_queue_stopped(txq)) {
  13.                 rc = 0;
  14.                 if (!dev_hard_start_xmit(skb, dev, txq)) {                                 //这里调用dev_hard_start_xmit
  15.                     HARD_TX_UNLOCK(dev, txq);
  16.                     goto out;
  17.                 }
  18.             }
  19.             HARD_TX_UNLOCK(dev, txq);
  20.             if (net_ratelimit())
  21.                 printk(KERN_CRIT "Virtual device %s asks to "
  22.                  "queue packet!\n", dev->name);
  23.         } else {
  24.             /* Recursion is It is possible,
  25.              * unfortunately */
  26.             if (net_ratelimit())
  27.                 printk(KERN_CRIT "Dead loop on virtual device "
  28.                  "%s, fix it urgently!\n", dev->name);
  29.         }
  30.     }

  31.     rc = -ENETDOWN;
  32.     rcu_read_unlock_bh();

  33. out_kfree_skb:
  34.     kfree_skb(skb);
  35.     return rc;
  36. out:
  37.     rcu_read_unlock_bh();
  38.     return rc;
  39. }
dev_hard_start_xmit:
  1. int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
  2.             struct netdev_queue *txq)
  3. {
  4.     const struct net_device_ops *ops = dev->netdev_ops;                          //赋值了net_dev_ops操作函数集
  5.     int rc;

  6.     if (likely(!skb->next)) {
  7.         if (!list_empty(&ptype_all))
  8.             dev_queue_xmit_nit(skb, dev);

  9.         if (netif_needs_gso(dev, skb)) {
  10.             if (unlikely(dev_gso_segment(skb)))
  11.                 goto out_kfree_skb;
  12.             if (skb->next)
  13.                 goto gso;
  14.         }

  15.         rc = ops->ndo_start_xmit(skb, dev);                                                 //调用了ndo_start_xmit函数指针,调用了驱动函数
  16.         /*
  17.          * TODO: if skb_orphan() was called by
  18.          * dev->hard_start_xmit() (for example, the unmodified
  19.          * igb driver does that; bnx2 doesn't), then
  20.          * skb_tx_software_timestamp() will be unable to send
  21.          * back the time stamp.
  22.          *
  23.          * How can this be prevented? Always create another
  24.          * reference to the socket before calling
  25.          * dev->hard_start_xmit()? Prevent that skb_orphan()
  26.          * does anything in dev->hard_start_xmit() by clearing
  27.          * the skb destructor before the call and restoring it
  28.          * afterwards, then doing the skb_orphan() ourselves?
  29.          */
  30.         return rc;
  31.     }

  32. gso:
  33.     do {
  34.         struct sk_buff *nskb = skb->next;

  35.         skb->next = nskb->next;
  36.         nskb->next = NULL;
  37.         rc = ops->ndo_start_xmit(nskb, dev);
  38.         if (unlikely(rc)) {
  39.             nskb->next = skb->next;
  40.             skb->next = nskb;
  41.             return rc;
  42.         }
  43.         if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
  44.             return NETDEV_TX_BUSY;
  45.     } while (skb->next);

  46.     skb->destructor = DEV_GSO_CB(skb)->destructor;

  47. out_kfree_skb:
  48.     kfree_skb(skb);
  49.     return 0;
  50. }

用户程序recvmsg接受网络包,
驱动则使用中断:①接收:从硬件中读取数据放到skb结构中,netif_rx();

netif_rx软中断,在设备无关接口中net_rx_action来处理(dev.c)
  1. /*
  2.  * This is called single threaded during boot, so no need
  3.  * to take the rtnl semaphore.
  4.  */
  5. static int __init net_dev_init(void)
  6. {
  7.     int i, rc = -ENOMEM;

  8.     BUG_ON(!dev_boot_phase);

  9.     if (dev_proc_init())
  10.         goto out;

  11.     if (netdev_kobject_init())
  12.         goto out;

  13.     INIT_LIST_HEAD(&ptype_all);
  14.     for (i = 0; i < PTYPE_HASH_SIZE; i++)
  15.         INIT_LIST_HEAD(&ptype_base[i]);

  16.     if (register_pernet_subsys(&netdev_net_ops))
  17.         goto out;

  18.     /*
  19.      *    Initialise the packet receive queues.
  20.      */

  21.     for_each_possible_cpu(i) {
  22.         struct softnet_data *queue;

  23.         queue = &per_cpu(softnet_data, i);
  24.         skb_queue_head_init(&queue->input_pkt_queue);
  25.         queue->completion_queue = NULL;
  26.         INIT_LIST_HEAD(&queue->poll_list);

  27.         queue->backlog.poll = process_backlog;
  28.         queue->backlog.weight = weight_p;
  29.         queue->backlog.gro_list = NULL;
  30.         queue->backlog.gro_count = 0;
  31.     }

  32.     dev_boot_phase = 0;

  33.     /* The loopback device is special if any other network devices
  34.      * is present in a network namespace the loopback device must
  35.      * be present. Since we now dynamically allocate and free the
  36.      * loopback device ensure this invariant is maintained by
  37.      * keeping the loopback device as the first device on the
  38.      * list of network devices. Ensuring the loopback devices
  39.      * is the first device that appears and the last network device
  40.      * that disappears.
  41.      */
  42.     if (register_pernet_device(&loopback_net_ops))
  43.         goto out;

  44.     if (register_pernet_device(&default_device_ops))
  45.         goto out;

  46.     open_softirq(NET_TX_SOFTIRQ, net_tx_action);                                       //这里在调用net_rx_action
  47.     open_softirq(NET_RX_SOFTIRQ, net_rx_action);

  48.     hotcpu_notifier(dev_cpu_callback, 0);
  49.     dst_init();
  50.     dev_mcast_init();
  51.     rc = 0;
  52. out:
  53.     return rc;
  54. }
netif_receive_skb:
  1. int netif_receive_skb(struct sk_buff *skb)
  2. {
  3.     struct packet_type *ptype, *pt_prev;
  4.     struct net_device *orig_dev;
  5.     struct net_device *null_or_orig;
  6.     int ret = NET_RX_DROP;
  7.     __be16 type;

  8.     if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
  9.         return NET_RX_SUCCESS;

  10.     /* if we've gotten here through NAPI, check netpoll */
  11.     if (netpoll_receive_skb(skb))
  12.         return NET_RX_DROP;

  13.     if (!skb->tstamp.tv64)
  14.         net_timestamp(skb);

  15.     if (!skb->iif)
  16.         skb->iif = skb->dev->ifindex;

  17.     null_or_orig = NULL;
  18.     orig_dev = skb->dev;
  19.     if (orig_dev->master) {
  20.         if (skb_bond_should_drop(skb))
  21.             null_or_orig = orig_dev; /* deliver only exact match */
  22.         else
  23.             skb->dev = orig_dev->master;
  24.     }

  25.     __get_cpu_var(netdev_rx_stat).total++;

  26.     skb_reset_network_header(skb);
  27.     skb_reset_transport_header(skb);
  28.     skb->mac_len = skb->network_header - skb->mac_header;

  29.     pt_prev = NULL;

  30.     rcu_read_lock();

  31. #ifdef CONFIG_NET_CLS_ACT
  32.     if (skb->tc_verd & TC_NCLS) {
  33.         skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
  34.         goto ncls;
  35.     }
  36. #endif

  37.     list_for_each_entry_rcu(ptype, &ptype_all, list) {
  38.         if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||                                  //判断协议的类型
  39.          ptype->dev == orig_dev) {
  40.             if (pt_prev)
  41.                 ret = deliver_skb(skb, pt_prev, orig_dev);                                           //调用deliver_skb
  42.             pt_prev = ptype;
  43.         }
  44.     }

  45. #ifdef CONFIG_NET_CLS_ACT
  46.     skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
  47.     if (!skb)
  48.         goto out;
  49. ncls:
  50. #endif

  51.     skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
  52.     if (!skb)
  53.         goto out;
  54.     skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
  55.     if (!skb)
  56.         goto out;

  57.     skb_orphan(skb);

  58.     type = skb->protocol;
  59.     list_for_each_entry_rcu(ptype,
  60.             &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
  61.         if (ptype->type == type &&
  62.          (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
  63.          ptype->dev == orig_dev)) {
  64.             if (pt_prev)
  65.                 ret = deliver_skb(skb, pt_prev, orig_dev);
  66.             pt_prev = ptype;
  67.         }
  68.     }

  69.     if (pt_prev) {
  70.         ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
  71.     } else {
  72.         kfree_skb(skb);
  73.         /* Jamal, now you will not able to escape explaining
  74.          * me how you were going to use this. :-)
  75.          */
  76.         ret = NET_RX_DROP;
  77.     }

  78. out:
  79.     rcu_read_unlock();
  80.     return ret;
  81. }
deliver_skb:
  1. static inline int deliver_skb(struct sk_buff *skb,
  2.              struct packet_type *pt_prev,
  3.              struct net_device *orig_dev)
  4. {
  5.     atomic_inc(&skb->users);
  6.     return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);                              //调用func函数
  7. }
如果是ip包,会交给网络层处理,ip协议栈的入口函数(ip_rcv)
  1. /*
  2.  *     Main IP Receive routine.
  3.  */
  4. int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
  5. {
  6.     struct iphdr *iph;
  7.     u32 len;

  8.     /* When the interface is in promisc. mode, drop all the crap
  9.      * that it receives, do not try to analyse it.
  10.      */
  11.     if (skb->pkt_type == PACKET_OTHERHOST)
  12.         goto drop;

  13.     IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);

  14.     if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
  15.         IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
  16.         goto out;
  17.     }

  18.     if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  19.         goto inhdr_error;

  20.     iph = ip_hdr(skb);

  21.     /*
  22.      *    RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
  23.      *
  24.      *    Is the datagram acceptable?
  25.      *
  26.      *    1.    Length at least the size of an ip header
  27.      *    2.    Version of 4
  28.      *    3.    Checksums correctly. [Speed optimisation for later, skip loopback checksums]
  29.      *    4.    Doesn't have a bogus length
  30.      */

  31.     if (iph->ihl < 5 || iph->version != 4)
  32.         goto inhdr_error;

  33.     if (!pskb_may_pull(skb, iph->ihl*4))
  34.         goto inhdr_error;

  35.     iph = ip_hdr(skb);

  36.     if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
  37.         goto inhdr_error;

  38.     len = ntohs(iph->tot_len);
  39.     if (skb->len < len) {
  40.         IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
  41.         goto drop;
  42.     } else if (len < (iph->ihl*4))
  43.         goto inhdr_error;

  44.     /* Our transport medium may have padded the buffer out. Now we know it
  45.      * is IP we can trim to the true length of the frame.
  46.      * Note this now means skb->len holds ntohs(iph->tot_len).
  47.      */
  48.     if (pskb_trim_rcsum(skb, len)) {
  49.         IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
  50.         goto drop;
  51.     }

  52.     /* Remove any debris in the socket control block */
  53.     memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));

  54.     return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
  55.          ip_rcv_finish);

  56. inhdr_error:
  57.     IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
  58. drop:
  59.     kfree_skb(skb);
  60. out:
  61.     return NET_RX_DROP;
  62. }

阅读(620) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~