Chinaunix首页 | 论坛 | 博客
  • 博客访问: 59605
  • 博文数量: 8
  • 博客积分: 1588
  • 博客等级: 上尉
  • 技术积分: 145
  • 用 户 组: 普通用户
  • 注册时间: 2010-04-06 18:05
文章分类

全部博文(8)

文章存档

2010年(8)

我的朋友

分类: LINUX

2010-06-22 11:38:48

2010/6/22:继续前面的研究,突然发现老版本好处多多,建议有兴趣分析内核的菜鸟,比如我,还是找些老版本的来看,即省时又受益啊。其实代码这玩意,写的一堆不过是说明了一件事情,有时候可能是简单的不得了的事,但为啥写出来的又这么复杂?因为要考虑到异常处理,又要通用,又要可调试,可维护。。。
比如写一个链表操作的功能吧,初学者可能不到100行就能实现插入、删除、遍历的操作了。在看linux的,要考虑通用性(定义了一堆宏),要考虑临界保护(又加了一堆代码),还有多CPU。。。唉,烦不胜烦。功能强大了,但阅读困难了。分析者的目的就是想看一下功能的实现,但又不得不考虑一下众多分支那个是功能相关,哪个是功能不相关。还是上帝最霸道,要有光!然后就有了光。。。
 
今天不想写太多,周二距离周末还是遥遥无期,写一下IP报文的发送/接收处理吧。
先看看接收的,网卡设备在接收到一个报文后,由DMA传送到内存当中,并触发中断。在网卡的中断处理程序中把接收的报文组成sk_buff结构,并调用netif_rx函数,同时把sk_buff的地址作为参数传入。
 

void
netif_rx(struct sk_buff *skb)
{
  /* Set any necessary flags. */
  skb->sk = NULL;
  skb->free = 1;
  
  /* and add it to the "backlog" queue. */
  IS_SKB(skb);
  skb_queue_tail(&backlog,skb);//添加到backlog队列中,返回。因为是中断,所以不能继续处理这个报文。后面的处理交给了inet_bh这个函数。这个地方与2.6的内核不同,2.6是在内核线程ksoftirqd中处理,每个CPU一个daemon。而这个版本的实现不同,是在每一次的系统调用结束后检查一次。
   
  /* If any packet arrived, mark it for processing. */
  if (backlog != NULL) mark_bh(INET_BH);

  return;
}

然后是调用inet_bh,这个函数比较长了

void
inet_bh(void *tmp)
{
  struct sk_buff *skb;
  struct packet_type *ptype;
  unsigned short type;
  unsigned char flag = 0;
  int nitcount;

  /* Atomically check and mark our BUSY state. */
  if (set_bit(1, (void*)&in_bh))
      return;

  /* Can we send anything now? */
  dev_transmit();//先看看有没有要发送到报文,可能是找不到什么好的地方,居然在这里加,比较奇怪。看了老版本的代码还是比较简陋
  
  /* Any data left to process? */
  while((skb=skb_dequeue(&backlog))!=NULL)//把刚刚加入到skbuff取出来
  {
      nitcount=dev_nit;
    flag=0;
    sti();
       /*
    * Bump the pointer to the next structure.
    * This assumes that the basic 'skb' pointer points to
    * the MAC header, if any (as indicated by its "length"
    * field). Take care now!
    */ //这个就是设置三层头的指针来,需要把MAC头去掉

       skb->h.raw = skb->data + skb->dev->hard_header_len;
       skb->len -= skb->dev->hard_header_len;

       /*
    * Fetch the packet protocol ID. This is also quite ugly, as
    * it depends on the protocol driver (the interface itself) to
    * know what the type is, or where to get it from. The Ethernet
    * interfaces fetch the ID from the two bytes in the Ethernet MAC
    * header (the h_proto field in struct ethhdr), but drivers like
    * SLIP and PLIP have no alternative but to force the type to be
    * IP or something like that. Sigh- FvK
    */ //需要知道二层报文承载的三层协议是什么,由于dev.c是通用模块,不知道对于的网络设备是什么,所以需要调用驱动层指针来获得,英文的注释应该很好理解

       type = skb->dev->type_trans(skb, skb->dev);

    /*
     * We got a packet ID. Now loop over the "known protocols"
     * table (which is actually a linked list, but this will
     * change soon if I get my way- FvK), and forward the packet
     * to anyone who wants it.
     */

    //这里遍历ptype_base,如果是IP报文,就会调用ip_rcv。这个链表初始化写了几种,但在这个版本中只支持IP报文
    for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) {
        if (ptype->type == type || ptype->type == NET16(ETH_P_ALL)) {
            struct sk_buff *skb2;

            if (ptype->type==NET16(ETH_P_ALL))
                nitcount--;
            if (ptype->copy || nitcount) {    /* copy if we need to    */
                skb2 = alloc_skb(skb->mem_len, GFP_ATOMIC);
                if (skb2 == NULL)
                    continue;
                memcpy(skb2, (const void *) skb, skb->mem_len);
                skb2->mem_addr = skb2;
                skb2->h.raw = (unsigned char *)(
                 (unsigned long) skb2 +
                 (unsigned long) skb->h.raw -
                 (unsigned long) skb
                );
                skb2->free = 1;
            } else {
                skb2 = skb;
            }

            /* This used to be in the 'else' part, but then
             * we don't have this flag set when we get a
             * protocol that *does* require copying... -FvK
             */

            flag = 1;

            /* Kick the protocol handler. */
            ptype->func(skb2, skb->dev, ptype);//如果是IP报文,调用ip_rcv,同理,在ip_rcv中,根据ip报文承载的协议,决定调用哪种,比如tcp_rcv/udp_rcv/icmp_rcv等。
        }
    }

    /*
     * That's odd. We got an unknown packet. Who's using
     * stuff like Novell or Amoeba on this network??
     */

    if (!flag) {
        DPRINTF((DBG_DEV,
            "INET: unknown packet type 0x%04X (ignored)\n", type));
        skb->sk = NULL;
        kfree_skb(skb, FREE_WRITE);
    }

    /* Again, see if we can transmit anything now. */
    dev_transmit();//感觉好像是亡羊补牢?还是实在不知道在什么地方加?
    cli();
  }
  in_bh = 0;
  sti();
  dev_transmit();
}


/* This function receives all incoming IP datagrams. */
int
ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
  struct iphdr *iph = skb->h.iph;//这里在传入是已经设置好了,指向IP报文头
  unsigned char hash;
  unsigned char flag = 0;
  unsigned char opts_p = 0;    /* Set iff the packet has options. */
  struct inet_protocol *ipprot;
  static struct options opt; /* since we don't use these yet, and they
                take up stack space. */

  int brd;
  int is_frag=0;

  DPRINTF((DBG_IP, "<<\n"));

  skb->ip_hdr = iph;        /* Fragments can cause ICMP errors too! */
  /* Is the datagram acceptable? */
  if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0) {
    DPRINTF((DBG_IP, "\nIP: *** datagram error ***\n"));
    DPRINTF((DBG_IP, " SRC = %s ", in_ntoa(iph->saddr)));
    DPRINTF((DBG_IP, " DST = %s (ignored)\n", in_ntoa(iph->daddr)));
    skb->sk = NULL;
    kfree_skb(skb, FREE_WRITE);
    return(0);
  }
  
  if (iph->ihl != 5) {     /* Fast path for the typical optionless IP packet. */
      ip_print(iph);        /* Bogus, only for debugging. */
      memset((char *) &opt, 0, sizeof(opt));
      if (do_options(iph, &opt) != 0)
     return 0;
      opts_p = 1;
  }

  if (iph->frag_off & 0x0020)
      is_frag|=1;
  if (ntohs(iph->frag_off) & 0x1fff)
      is_frag|=2;
      
  /* Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. */
  if ((brd = chk_addr(iph->daddr)) == 0) {//如果不是本地IP,需要转发,转发在上一个小文里已经说过了,调用rt_route查询路由表,获取下一跳的物理接口,并发送出去
#ifdef CONFIG_IP_FORWARD
    ip_forward(skb, dev, is_frag);
#else
    printk("Machine %x tried to use us as a forwarder to %x but we have forwarding disabled!\n",
            iph->saddr,iph->daddr);
#endif            
    skb->sk = NULL;
    kfree_skb(skb, FREE_WRITE);
    return(0);
  }

  /*
   * Reassemble IP fragments.
   */


  if(is_frag)//如果是分片报文,需要重组。这个后续单说一下ip_defrag和ip_fragment这两个函数
  {
#ifdef CONFIG_IP_DEFRAG
        skb=ip_defrag(iph,skb,dev);//在没有接收完一个完整的报文之前,这里返回null
        if(skb==NULL)
        {
            return 0;
        }
        iph=skb->h.iph;
#else
    printk("\nIP: *** datagram fragmentation not yet implemented ***\n");
    printk(" SRC = %s ", in_ntoa(iph->saddr));
    printk(" DST = %s (ignored)\n", in_ntoa(iph->daddr));
    icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
    skb->sk = NULL;
    kfree_skb(skb, FREE_WRITE);
    return(0);
#endif
  }



  if(brd==IS_INVBCAST)
  {
/*    printk("Invalid broadcast address from %x [target %x] (Probably they have a wrong netmask)\n",
        iph->saddr,iph->daddr);*/

      skb->sk=NULL;
      kfree_skb(skb,FREE_WRITE);
      return(0);
  }
  
  /* Point into the IP datagram, just past the header. */

  skb->ip_hdr = iph;
  skb->h.raw += iph->ihl*4;
  hash = iph->protocol & (MAX_INET_PROTOS -1);
  for (ipprot = (struct inet_protocol *)inet_protos[hash];
       ipprot != NULL;
       ipprot=(struct inet_protocol *)ipprot->next)
    {
       struct sk_buff *skb2;

       if (ipprot->protocol != iph->protocol) continue;
       DPRINTF((DBG_IP, "Using protocol = %X:\n", ipprot));
       print_ipprot(ipprot);

       /*
    * See if we need to make a copy of it. This will
    * only be set if more than one protocol wants it.
    * and then not for the last one.
    */

       if (ipprot->copy) {
        skb2 = alloc_skb(skb->mem_len, GFP_ATOMIC);
        if (skb2 == NULL)
            continue;
        memcpy(skb2, skb, skb->mem_len);
        skb2->mem_addr = skb2;
        skb2->ip_hdr = (struct iphdr *)(
                (unsigned long)skb2 +
                (unsigned long) skb->ip_hdr -
                (unsigned long)skb);
        skb2->h.raw = (unsigned char *)(
                (unsigned long)skb2 +
                (unsigned long) skb->h.raw -
                (unsigned long)skb);
        skb2->free=1;
    } else {
        skb2 = skb;
    }
    flag = 1;

       /*
    * Pass on the datagram to each protocol that wants it,
    * based on the datagram protocol. We should really
    * check the protocol handler's return values here...
    */

    //这里调用四层报文接收处理函数处理,如果是tcp报文,调用我们之前说过的tcp_rcv
    ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
            (ntohs(iph->tot_len) - (iph->ihl * 4)),
            iph->saddr, 0, ipprot);

  }

  /*
   * All protocols checked.
   * If this packet was a broadcast, we may *not* reply to it, since that
   * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
   * ICMP reply messages get queued up for transmission...)
   */

  if (!flag) {
    if (brd != IS_BROADCAST)
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
    skb->sk = NULL;
    kfree_skb(skb, FREE_WRITE);
  }

  return(0);
}


阅读(1747) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~