Chinaunix首页 | 论坛 | 博客
  • 博客访问: 362605
  • 博文数量: 64
  • 博客积分: 2975
  • 博客等级: 少校
  • 技术积分: 831
  • 用 户 组: 普通用户
  • 注册时间: 2007-01-14 10:59
文章存档

2014年(2)

2012年(7)

2010年(40)

2009年(5)

2008年(8)

2007年(2)

分类: LINUX

2010-04-09 13:35:48

align(3)

确定各段大小并写出各段


1.经过relax后,各个段的大小可以确定了,接下来的工作就是确定各段大小

void
write_object_file (void)
{

  bfd_map_over_sections (stdoutput, size_seg, (char *) 0);
 
}

对于每个段调用size_seg


2.size_seg
write_object_file->size_seg

static void
size_seg (bfd *abfd, asection *sec, void *xxx ATTRIBUTE_UNUSED)
{
  flagword flags;
  fragS *fragp;
  segment_info_type *seginfo;
  int x;
  valueT size, newsize;

  subseg_change (sec, 0);//切换到该段第0子段

  seginfo = seg_info (sec);//取段信息
  if (seginfo && seginfo->frchainP)//如果有子段
    {
      for (fragp = seginfo->frchainP->frch_root; fragp; fragp = fragp->fr_next)//对于该段中的所有分片,调用cvt_frag_to_fill
    cvt_frag_to_fill (sec, fragp);
   
   
3.cvt_frag_to_fill
write_object_file->size_seg->cvt_frag_to_fill
将分片固定下来,即全部转换成rs_fill类型

static void
cvt_frag_to_fill (segT sec ATTRIBUTE_UNUSED, fragS *fragP)
{
  switch (fragP->fr_type)
    {
    case rs_align:
    case rs_align_code:
    case rs_align_test:
    case rs_org:
    case rs_space:
#ifdef HANDLE_ALIGN
      HANDLE_ALIGN (fragP);
#endif


i386平台下
#define HANDLE_ALIGN(fragP)                        \
if (fragP->fr_type == rs_align_code)                     \
  i386_align_code (fragP, (fragP->fr_next->fr_address            \
               - fragP->fr_address                \
               - fragP->fr_fix));

注意这里fragP->fr_next->fr_address - fragP->fr_address - fragP->fr_fix是在计算本分片可变部分大小


4.386_align_code
write_object_file->size_seg->cvt_frag_to_fill->386_align_code

void
i386_align_code (fragS *fragP, int count)
{
  /* Various efficient no-op patterns for aligning code labels.
  各种有效的空操作模式用户对齐代码
     Note: Don't try to assemble the instructions in the comments.
     0L and 0w are not legal.  */
  static const char f32_1[] =
    {0x90};                    /* nop            */
  static const char f32_2[] =
    {0x66,0x90};                /* xchg %ax,%ax */
  static const char f32_3[] =
    {0x8d,0x76,0x00};                /* leal 0(%esi),%esi    */
  static const char f32_4[] =
    {0x8d,0x74,0x26,0x00};            /* leal 0(%esi,1),%esi    */
  static const char f32_5[] =
    {0x90,                    /* nop            */
     0x8d,0x74,0x26,0x00};            /* leal 0(%esi,1),%esi    */
  static const char f32_6[] =
    {0x8d,0xb6,0x00,0x00,0x00,0x00};        /* leal 0L(%esi),%esi    */
  static const char f32_7[] =
    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};    /* leal 0L(%esi,1),%esi */
  static const char f32_8[] =
    {0x90,                    /* nop            */
     0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};    /* leal 0L(%esi,1),%esi */
  static const char f32_9[] =
    {0x89,0xf6,                    /* movl %esi,%esi    */
     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
  static const char f32_10[] =
    {0x8d,0x76,0x00,                /* leal 0(%esi),%esi    */
     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
  static const char f32_11[] =
    {0x8d,0x74,0x26,0x00,            /* leal 0(%esi,1),%esi    */
     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
  static const char f32_12[] =
    {0x8d,0xb6,0x00,0x00,0x00,0x00,        /* leal 0L(%esi),%esi    */
     0x8d,0xbf,0x00,0x00,0x00,0x00};        /* leal 0L(%edi),%edi    */
  static const char f32_13[] =
    {0x8d,0xb6,0x00,0x00,0x00,0x00,        /* leal 0L(%esi),%esi    */
     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
  static const char f32_14[] =
    {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00,    /* leal 0L(%esi,1),%esi */
     0x8d,0xbc,0x27,0x00,0x00,0x00,0x00};    /* leal 0L(%edi,1),%edi */
  static const char f16_3[] =
    {0x8d,0x74,0x00};                /* lea 0(%esi),%esi    */
  static const char f16_4[] =
    {0x8d,0xb4,0x00,0x00};            /* lea 0w(%si),%si    */
  static const char f16_5[] =
    {0x90,                    /* nop            */
     0x8d,0xb4,0x00,0x00};            /* lea 0w(%si),%si    */
  static const char f16_6[] =
    {0x89,0xf6,                    /* mov %si,%si        */
     0x8d,0xbd,0x00,0x00};            /* lea 0w(%di),%di    */
  static const char f16_7[] =
    {0x8d,0x74,0x00,                /* lea 0(%si),%si    */
     0x8d,0xbd,0x00,0x00};            /* lea 0w(%di),%di    */
  static const char f16_8[] =
    {0x8d,0xb4,0x00,0x00,            /* lea 0w(%si),%si    */
     0x8d,0xbd,0x00,0x00};            /* lea 0w(%di),%di    */
  static const char jump_31[] =
    {0xeb,0x1d,0x90,0x90,0x90,0x90,0x90,    /* jmp .+31; lotsa nops    */
     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
     0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
  static const char *const f32_patt[] = {
    f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
    f32_9, f32_10, f32_11, f32_12, f32_13, f32_14
  };
  static const char *const f16_patt[] = {
    f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8
  };
  /* nopl (%[re]ax) */
  static const char alt_3[] =
    {0x0f,0x1f,0x00};
  /* nopl 0(%[re]ax) */
  static const char alt_4[] =
    {0x0f,0x1f,0x40,0x00};
  /* nopl 0(%[re]ax,%[re]ax,1) */
  static const char alt_5[] =
    {0x0f,0x1f,0x44,0x00,0x00};
  /* nopw 0(%[re]ax,%[re]ax,1) */
  static const char alt_6[] =
    {0x66,0x0f,0x1f,0x44,0x00,0x00};
  /* nopl 0L(%[re]ax) */
  static const char alt_7[] =
    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
  /* nopl 0L(%[re]ax,%[re]ax,1) */
  static const char alt_8[] =
    {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* nopw 0L(%[re]ax,%[re]ax,1) */
  static const char alt_9[] =
    {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_10[] =
    {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* data16
     nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_long_11[] =
    {0x66,
     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* data16
     data16
     nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_long_12[] =
    {0x66,
     0x66,
     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* data16
     data16
     data16
     nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_long_13[] =
    {0x66,
     0x66,
     0x66,
     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* data16
     data16
     data16
     data16
     nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_long_14[] =
    {0x66,
     0x66,
     0x66,
     0x66,
     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* data16
     data16
     data16
     data16
     data16
     nopw %cs:0L(%[re]ax,%[re]ax,1) */
  static const char alt_long_15[] =
    {0x66,
     0x66,
     0x66,
     0x66,
     0x66,
     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  /* nopl 0(%[re]ax,%[re]ax,1)
     nopw 0(%[re]ax,%[re]ax,1) */
  static const char alt_short_11[] =
    {0x0f,0x1f,0x44,0x00,0x00,
     0x66,0x0f,0x1f,0x44,0x00,0x00};
  /* nopw 0(%[re]ax,%[re]ax,1)
     nopw 0(%[re]ax,%[re]ax,1) */
  static const char alt_short_12[] =
    {0x66,0x0f,0x1f,0x44,0x00,0x00,
     0x66,0x0f,0x1f,0x44,0x00,0x00};
  /* nopw 0(%[re]ax,%[re]ax,1)
     nopl 0L(%[re]ax) */
  static const char alt_short_13[] =
    {0x66,0x0f,0x1f,0x44,0x00,0x00,
     0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
  /* nopl 0L(%[re]ax)
     nopl 0L(%[re]ax) */
  static const char alt_short_14[] =
    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
     0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
  /* nopl 0L(%[re]ax)
     nopl 0L(%[re]ax,%[re]ax,1) */
  static const char alt_short_15[] =
    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
     0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
  static const char *const alt_short_patt[] = {
    f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
    alt_9, alt_10, alt_short_11, alt_short_12, alt_short_13,
    alt_short_14, alt_short_15
  };
  static const char *const alt_long_patt[] = {
    f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
    alt_9, alt_10, alt_long_11, alt_long_12, alt_long_13,
    alt_long_14, alt_long_15
  };

这里定义了各种空操作模式,用于不同的cpu.
可以看出f16_patt和f32_patt主要使用lea指令,如果填充模式使用了两个lea指令,则操作数部分使用了不同寄存器,减少寄存器争用
alt_long_patt和alt_short_patt主要使用扩展的nop指令.两者的区别是alt_long_patt用一条nop和大量的0x66前缀,alt_short_patt用两条nop.
alt_long_patt为intel cpu准备,alt_short_patt为amd cpu准备。
至于更详细的优化细节可以查看intel和amd的技术文档。

  /* Only align for at least a positive non-zero boundary. */
  if (count <= 0 || count > MAX_MEM_FOR_RS_ALIGN_CODE)//可变部分大小<=0,不处理,如果>=31,则不使用本函数中定义的各种空操作模式来填充,而使用默认的nop(0x90)填空
    return;

  /* We need to decide which NOP sequence to use for 32bit and
     64bit. When -mtune= is used:
       决定使用哪种nop序列
      
     1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
     PROCESSOR_GENERIC32, f32_patt will be used.
     2. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA,
     PROCESSOR_CORE, PROCESSOR_CORE2, and PROCESSOR_GENERIC64,
     alt_long_patt will be used.
     3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and
     PROCESSOR_AMDFAM10, alt_short_patt will be used.

     When -mtune= isn't used, alt_long_patt will be used if
     cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will
     be used.

     When -march= or .arch is used, we can't use anything beyond
     cpu_arch_isa_flags.   */

  if (flag_code == CODE_16BIT)//16位模式
    {
      if (count > 8)//超过8字节
    {
      memcpy (fragP->fr_literal + fragP->fr_fix,
          jump_31, count);/使用jump_31模板
      /* Adjust jump offset.  */
      fragP->fr_literal[fragP->fr_fix + 1] = count - 2;//修改rel8off,减去两字节即jmp rel8off指令长度
    }
      else
    memcpy (fragP->fr_literal + fragP->fr_fix,
        f16_patt[count - 1], count);//使用对应的f16_patt模板
    }
  else
    {
    //下面根据不同的isa(instruction set architecture)来决定使用哪个模板
      const char *const *patt = NULL;

      if (cpu_arch_isa == PROCESSOR_UNKNOWN)//isa未知
    {
      /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
      switch (cpu_arch_tune)
        {
        case PROCESSOR_UNKNOWN:
          /* We use cpu_arch_isa_flags to check if we SHOULD
         optimize for Cpu686.  */
          if ((cpu_arch_isa_flags & Cpu686) != 0)
        patt = alt_long_patt;
          else
        patt = f32_patt;
          break;
        case PROCESSOR_PENTIUMPRO:
        case PROCESSOR_PENTIUM4:
        case PROCESSOR_NOCONA:
        case PROCESSOR_CORE:
        case PROCESSOR_CORE2:
        case PROCESSOR_GENERIC64:
          patt = alt_long_patt;
          break;
        case PROCESSOR_K6:
        case PROCESSOR_ATHLON:
        case PROCESSOR_K8:
        case PROCESSOR_AMDFAM10:
          patt = alt_short_patt;
          break;
        case PROCESSOR_I386:
        case PROCESSOR_I486:
        case PROCESSOR_PENTIUM:
        case PROCESSOR_GENERIC32:
          patt = f32_patt;
          break;
        }
    }
      else
    {
      switch (cpu_arch_tune)//isa已知情况下
        {
        case PROCESSOR_UNKNOWN:
          /* When cpu_arch_isa is net, cpu_arch_tune shouldn't be
         PROCESSOR_UNKNOWN.  */
          abort ();
          break;

        case PROCESSOR_I386:
        case PROCESSOR_I486:
        case PROCESSOR_PENTIUM:
        case PROCESSOR_K6:
        case PROCESSOR_ATHLON:
        case PROCESSOR_K8:
        case PROCESSOR_AMDFAM10:
        case PROCESSOR_GENERIC32:
          /* We use cpu_arch_isa_flags to check if we CAN optimize
         for Cpu686.  */
          if ((cpu_arch_isa_flags & Cpu686) != 0)
        patt = alt_short_patt;
          else
        patt = f32_patt;
          break;
        case PROCESSOR_PENTIUMPRO:
        case PROCESSOR_PENTIUM4:
        case PROCESSOR_NOCONA:
        case PROCESSOR_CORE:
        case PROCESSOR_CORE2:
          if ((cpu_arch_isa_flags & Cpu686) != 0)
        patt = alt_long_patt;
          else
        patt = f32_patt;
          break;
        case PROCESSOR_GENERIC64:
          patt = alt_long_patt;
          break;
        }
    }

      if (patt == f32_patt)//使用了f32_patt
    {
      /* If the padding is less than 15 bytes, we use the normal
         ones.  Otherwise, we use a jump instruction and adjust
         its offset.  */
      if (count < 15)
        memcpy (fragP->fr_literal + fragP->fr_fix,
            patt[count - 1], count);
      else
        {
          memcpy (fragP->fr_literal + fragP->fr_fix,
              jump_31, count);
          /* Adjust jump offset.  */
          fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
        }
    }
      else
    {
      /* Maximum length of an instruction is 15 byte.  If the
         padding is greater than 15 bytes and we don't use jump,
         如果填充大于15字节,不使用jmp,而是分片填充
         we have to break it into smaller pieces.  */
      int padding = count;
      while (padding > 15)
        {
          padding -= 15;
          memcpy (fragP->fr_literal + fragP->fr_fix + padding,
              patt [14], 15);
        }

      if (padding)
        memcpy (fragP->fr_literal + fragP->fr_fix,
            patt [padding - 1], padding);
    }
    }
  fragP->fr_var = count;//修改fr_var(填充模式长度)为count
}


5.返回cvt_frag_to_fill
write_object_file->size_seg->cvt_frag_to_fill

      know (fragP->fr_next != NULL);
      fragP->fr_offset = (fragP->fr_next->fr_address
              - fragP->fr_address
              - fragP->fr_fix) / fragP->fr_var;//fr_var是填充模式长度,此处计算填充模式需要被重复的次数
      if (fragP->fr_offset < 0)
    {
      as_bad_where (fragP->fr_file, fragP->fr_line,
            _("attempt to .org/.space backwards? (%ld)"),
            (long) fragP->fr_offset);
      fragP->fr_offset = 0;
    }
      fragP->fr_type = rs_fill;//修改类型为rs_fill
      break;

    case rs_fill:
      break;

6.返回size_seg
write_object_file->size_seg

      for (fragp = seginfo->frchainP->frch_root;
       fragp->fr_next;
       fragp = fragp->fr_next)
    /* Walk to last elt.  找到最后一个分片*/
    ;
      size = fragp->fr_address + fragp->fr_fix;//计算出本段大小
    }
  else
    size = 0;//否则本段大小为0

  flags = bfd_get_section_flags (abfd, sec);

  if (size > 0 && ! seginfo->bss)//不是未初始化段
    flags |= SEC_HAS_CONTENTS;//本段有内容

  flags &= ~SEC_RELOC;//取消reloc属性
  x = bfd_set_section_flags (abfd, sec, flags);
  assert (x);

  newsize = md_section_align (sec, size);//对于elf文件格式是空操作
  x = bfd_set_section_size (abfd, sec, newsize);
  assert (x);

  /* If the size had to be rounded up, add some padding in the last
     non-empty frag.  */
  assert (newsize >= size);
  if (size != newsize)
    {
      fragS *last = seginfo->frchainP->frch_last;
      fragp = seginfo->frchainP->frch_root;
      while (fragp->fr_next != last)
    fragp = fragp->fr_next;
      last->fr_address = size;
      if ((newsize - size) % fragp->fr_var == 0)
    fragp->fr_offset += (newsize - size) / fragp->fr_var;
      else
    /* If we hit this abort, it's likely due to subsegs_finish not
       providing sufficient alignment on the last frag, and the
       machine dependent code using alignment frags with fr_var
       greater than 1.  */
    abort ();
    }

#ifdef tc_frob_section
  tc_frob_section (sec);
#endif
#ifdef obj_frob_section
  obj_frob_section (sec);
#endif
}

7.返回write_object_file

    bfd_map_over_sections (stdoutput, write_contents, (char *) 0);//最后写出各个段

8.write_contents
write_object_file->write_contents

static void
write_contents (bfd *abfd ATTRIBUTE_UNUSED,
        asection *sec,
        void *xxx ATTRIBUTE_UNUSED)
{
  segment_info_type *seginfo = seg_info (sec);
  addressT offset = 0;
  fragS *f;

  /* Write out the frags.  */
  if (seginfo == NULL//没有段信息
      || !(bfd_get_section_flags (abfd, sec) & SEC_HAS_CONTENTS))//或者没有内容
    return;//直接返回

  for (f = seginfo->frchainP->frch_root;
       f;
       f = f->fr_next)//对于每一个分片
    {
      int x;
      addressT fill_size;
      char *fill_literal;
      offsetT count;

      assert (f->fr_type == rs_fill);//肯定是rs_fill
      if (f->fr_fix)//有固定部分
    {
      x = bfd_set_section_contents (stdoutput, sec,
                    f->fr_literal, (file_ptr) offset,
                    (bfd_size_type) f->fr_fix);//写出
      if (!x)
        as_fatal (_("can't write %s: %s"), stdoutput->filename,
              bfd_errmsg (bfd_get_error ()));
      offset += f->fr_fix;
    }
    //再写可变部分
      fill_literal = f->fr_literal + f->fr_fix;//填充模式起始地址
      fill_size = f->fr_var;//填充模式长度
      count = f->fr_offset;//模式重复填充次数
      assert (count >= 0);
      if (fill_size && count)
    {
      char buf[256];
      if (fill_size > sizeof (buf))//填充模式长度超过256字节
        {
          /* Do it the old way. Can this ever happen?  */
          while (count--)
        {
          x = bfd_set_section_contents (stdoutput, sec,
                        fill_literal,
                        (file_ptr) offset,
                        (bfd_size_type) fill_size);//重复填充
          if (!x)
            as_fatal (_("can't write %s: %s"), stdoutput->filename,
                  bfd_errmsg (bfd_get_error ()));
          offset += fill_size;
        }
        }
      else//否则填充模式长度小于256字节,先填充buf,再用buf填充,加快填充速度
        {
          /* Build a buffer full of fill objects and output it as
         often as necessary. This saves on the overhead of
         potentially lots of bfd_set_section_contents calls.  */
          int n_per_buf, i;
          if (fill_size == 1)//填充模式长度为1
        {
          n_per_buf = sizeof (buf);
          memset (buf, *fill_literal, n_per_buf);//先在buf里填充
        }
          else
        {
          char *bufp;
          n_per_buf = sizeof (buf) / fill_size;
          for (i = n_per_buf, bufp = buf; i; i--, bufp += fill_size)
            memcpy (bufp, fill_literal, fill_size);//先在buf里填充
        }
          for (; count > 0; count -= n_per_buf)//再使用buf填充
        {
          n_per_buf = n_per_buf > count ? count : n_per_buf;
          x = bfd_set_section_contents
            (stdoutput, sec, buf, (file_ptr) offset,
             (bfd_size_type) n_per_buf * fill_size);
          if (!x)
            as_fatal (_("cannot write to output file"));
          offset += n_per_buf * fill_size;
        }
        }
    }
    }
}
 
至此,align涉及到的主要操作全部介绍完了。
阅读(2024) | 评论(0) | 转发(0) |
0

上一篇:align(2)

下一篇:collect2源码分析

给主人留下些什么吧!~~