Chinaunix首页 | 论坛 | 博客
  • 博客访问: 372373
  • 博文数量: 64
  • 博客积分: 2975
  • 博客等级: 少校
  • 技术积分: 831
  • 用 户 组: 普通用户
  • 注册时间: 2007-01-14 10:59
文章存档

2014年(2)

2012年(7)

2010年(40)

2009年(5)

2008年(8)

2007年(2)

分类: LINUX

2010-03-31 17:59:08

align

前面我们分析了.align 4只是记录了对齐的有关信息,并没有进行实质的对齐动作,那么实质的操作又在哪里呢?答案是在汇编的最后写出目标文件阶段。


1.relax阶段

relax的由来?

我们知道,i386的跳转指令有近跳和远跳区分。近跳转都是相对跳转,相对于下一条指令地址。而近跳转的范围有3种,分别是8位有符号数,16位有符号数和32为有符号数
Mnemonic     Opcode  Description
JMP rel8off  EB cb   Short jump with the target specified by an 8-bit signed displacement.
JMP rel16off E9 cw   Near jump with the target specified by a 16-bit signed displacement.
JMP rel32off E9 cd   Near jump with the target specified by a 32-bit signeddisplacement.

8位有符号数的范围是 -128~+127
16位有符号数范围是  -32768~+32787
32位有符号数范围是  -2^31~2^31-1


这样在汇编jmp指令时根据模式的不同就有了至少两种选择
16位时,使用jmp rel8off或jmp rel16off
32位时,使用jmp rel8off或jmp rel32off

as的选择是先使用8位,如果不行再使用更多位,尽量缩短编码长度,减少内存占用,也能提高指令执行速度。

考虑如下的情况1:

L1:
....
L2:
jmp L1

如果L1->L2之间的指令长度都是固定,的那么jmp L1的指令选取将根据L2-L1的大小一次性就能决定。


考虑如下的情况2:

L1:
....
L2:
jmp L4
....
L3:
jmp L1
...
L4:


因为jmp L4的存在,导致L1->L3之间的指令长度在一开始不能确定,因而jmp L1指令的编码模式和长度也不能确定,同理jmp L4也存在同样的问题,如果这里还有
.align 4伪指令,也会有影响。

所以前面使用frag_var为.align 4伪指令在本分片中创建了一个可变部分.

这个确定的过程需要推迟,推迟到哪里?就是本文要讨论的relax阶段.其实我觉得叫freeze更合适吧。

2.一些概念

elf目标文件是由许多section(as又叫segment)组成的,
每个section又可分成许多subsection(as又叫subsegment).每个subsegment都有一个唯一编号.
每个subsection又由许多frag分片组成.每个分片有一个固定部分和最多1个可变(var)部分(显然都在分片的结尾).


3.write_object_file

main->write_object_file


  bfd_map_over_sections (stdoutput, chain_frchains_together, (char *) 0);//将所有的子段连在一起
 
  /* We have two segments. If user gave -R flag, then we must put the
     data frags into the text segment. Do this before relaxing so
     we know to take advantage of -R and make shorter addresses.  */
  if (flag_readonly_data_in_text)//要求将数据段并入代码段
    {
      merge_data_into_text ();
    }

  rsi.pass = 0;
  while (1)
    {
#ifndef WORKING_DOT_WORD//定义了,忽略
      /* We need to reset the markers in the broken word list and
     associated frags between calls to relax_segment (via
     relax_seg).  Since the broken word list is global, we do it
     once per round, rather than locally in relax_segment for each
     segment.  */
      struct broken_word *brokp;

      for (brokp = broken_words;
       brokp != (struct broken_word *) NULL;
       brokp = brokp->next_broken_word)
    {
      brokp->added = 0;

      if (brokp->dispfrag != (fragS *) NULL
          && brokp->dispfrag->fr_type == rs_broken_word)
        brokp->dispfrag->fr_subtype = 0;
    }
#endif

      rsi.changed = 0;
      bfd_map_over_sections (stdoutput, relax_seg, &rsi);//对于每个节都调用relax_seg
      rsi.pass++;
      if (!rsi.changed)//如果本次循环所有的节没有发生改动,跳出,否则继续下一轮relax
    break;
    }
   
3.relax_seg   
main->write_object_file->relax_seg   

static void
relax_seg (bfd *abfd ATTRIBUTE_UNUSED, asection *sec, void *xxx)
{
  segment_info_type *seginfo = seg_info (sec);
  struct relax_seg_info *info = (struct relax_seg_info *) xxx;

  if (seginfo && seginfo->frchainP//有端信息,有数据
      && relax_segment (seginfo->frchainP->frch_root, sec, info->pass))//调用relax_segment
    info->changed = 1;//发生改变
}

4.先确定每个分片的起始地址
main->write_object_file->relax_seg->relax_segment   

/* Now we have a segment, not a crowd of sub-segments, we can make
   fr_address values.

   Relax the frags.

   After this, all frags in this segment have addresses that are correct
   within the segment. Since segments live in different file addresses,
   these frag addresses may not be the same as final object-file
   addresses.  */

int
relax_segment (struct frag *segment_frag_root, segT segment, int pass)
{
  unsigned long frag_count;
  struct frag *fragP;
  relax_addressT address;
  int ret;

  /* In case md_estimate_size_before_relax() wants to make fixSs.  */
  subseg_change (segment, 0);//切换到0子段,为md_estimate_size_before_relax准备

  /* For each frag in segment: count and store  (a 1st guess of)
     fr_address.  */
  address = 0;
  //计算本节中所有分片的起始地址和大小
  for (frag_count = 0, fragP = segment_frag_root;
       fragP;
       fragP = fragP->fr_next, frag_count ++)
    {
      fragP->relax_marker = 0;
      fragP->fr_address = address;//本frag起始地址
      address += fragP->fr_fix;//固定大小

      switch (fragP->fr_type)
    {
    case rs_fill://对于//.fill repeat,size,value伪指令, fr_type=rs_fill,fr_var=size,fr_offset=repeat 
      address += fragP->fr_offset * fragP->fr_var;//计入可变部分大小
      break;

    case rs_align:
    case rs_align_code:
    case rs_align_test:
      {
      /*
      对于前面的情景,.align 4
      fr_type->对齐类型
      fr_subtype->max
      fr_var->填充模式长度
      fr_offset->alignment
      */
        addressT offset = relax_align (address, (int) fragP->fr_offset);

4.relax_align
main->write_object_file->relax_seg->relax_segment->relax_align   

/* Relax_align. Advance location counter to next address that has 'alignment'
   lowest order bits all 0s, return size of adjustment made.  */
static relax_addressT
relax_align (register relax_addressT address,    /* Address now.  */
         register int alignment    /* Alignment (binary).  */)
{
  relax_addressT mask;
  relax_addressT new_address;

  mask = ~((~0) << alignment);
  new_address = (address + mask) & (~mask);
#ifdef LINKER_RELAXING_SHRINKS_ONLY//忽略
  if (linkrelax)
    /* We must provide lots of padding, so the linker can discard it
       when needed.  The linker will not add extra space, ever.  */
    new_address += (1 << alignment);
#endif
  return (new_address - address);//因为对齐导致的扩展大小
}

5.返回relax_segment
main->write_object_file->relax_seg->relax_segment   

      /*
      对于前面的情景,.align 4
      fr_type->对齐类型
      fr_subtype->max
      fr_var->填充模式长度
      fr_offset->alignment
      */
     
        if (fragP->fr_subtype != 0 && offset > fragP->fr_subtype)//超过最大长度
          offset = 0;//不对齐

        if (offset % fragP->fr_var != 0)//不是fill pattern的整数倍
          {
        as_bad_where (fragP->fr_file, fragP->fr_line,
                  _("alignment padding (%lu bytes) not a multiple of %ld"),
                  (unsigned long) offset, (long) fragP->fr_var);//bad
        offset -= (offset % fragP->fr_var);
          }

        address += offset;//计入address
      }
      break;

    case rs_org:
    case rs_space:
      /* Assume .org is nugatory. It will grow with 1st relax.  */
      break;

    case rs_machine_dependent:
      /* If fr_symbol is an expression, this call to
         resolve_symbol_value sets up the correct segment, which will
         likely be needed in md_estimate_size_before_relax.  */
      if (fragP->fr_symbol)//有符号,先解析符号的值
        resolve_symbol_value (fragP->fr_symbol);
     
       //前面的jmp等指令等虽然不能知道确切的大小,可以先估计
      address += md_estimate_size_before_relax (fragP, segment);
      break;


6.开始relax过程


#ifndef WORKING_DOT_WORD//忽略
      /* Broken words don't concern us yet.  */
    case rs_broken_word:
      break;
#endif

    case rs_leb128:
      /* Initial guess is always 1; doing otherwise can result in
         stable solutions that are larger than the minimum.  */
      address += fragP->fr_offset = 1;
      break;

    case rs_cfa:
      address += eh_frame_estimate_size_before_relax (fragP);
      break;

    case rs_dwarf2dbg:
      address += dwarf2dbg_estimate_size_before_relax (fragP);
      break;

    default:
      BAD_CASE (fragP->fr_type);
      break;
    }
    }

  /* Do relax().  */
  {
    unsigned long max_iterations;

    /* Cumulative address adjustment.  */
    offsetT stretch;

    /* Have we made any adjustment this pass?  We can't just test
       stretch because one piece of code may have grown and another
       shrank.  */
    int stretched;

    /* Most horrible, but gcc may give us some exception data that
       is impossible to assemble, of the form

       .align 4
       .byte 0, 0
       .uleb128 end - start
       start:
       .space 128*128 - 1
       .align 4
       end:

       If the leb128 is two bytes in size, then end-start is 128*128,
       which requires a three byte leb128.  If the leb128 is three
       bytes in size, then end-start is 128*128-1, which requires a
       two byte leb128.  We work around this dilemma by inserting
       an extra 4 bytes of alignment just after the .align.  This
       works because the data after the align is accessed relative to
       the end label.

       This counter is used in a tiny state machine to detect
       whether a leb128 followed by an align is impossible to
       relax.  */
    int rs_leb128_fudge = 0;

    /* We want to prevent going into an infinite loop where one frag grows
       depending upon the location of a symbol which is in turn moved by
       the growing frag.  eg:

     foo = .
     .org foo+16
     foo = .

       So we dictate that this algorithm can be at most O2.  */
    max_iterations = frag_count * frag_count;//为了realx可能要循环frag_count平方次
    /* Check for overflow.  */
    if (max_iterations < frag_count)//溢出
      max_iterations = frag_count;

    ret = 0;
    do
      {
    stretch = 0;
    stretched = 0;

    for (fragP = segment_frag_root; fragP; fragP = fragP->fr_next)
      {
        offsetT growth = 0;
        addressT was_address;
        offsetT offset;
        symbolS *symbolP;

        fragP->relax_marker ^= 1;
        was_address = fragP->fr_address;//变动前的地址
        address = fragP->fr_address += stretch;//变动后的地址
        symbolP = fragP->fr_symbol;
        offset = fragP->fr_offset;//不同类型的frag,offset意义不同

        switch (fragP->fr_type)
          {
          case rs_fill:    /* .fill never relaxes. 已经固定,不需要relax */
        growth = 0;
        break;

#ifndef WORKING_DOT_WORD//忽略
        /* JF:  This is RMS's idea.  I do *NOT* want to be blamed
           for it I do not want to write it.  I do not want to have
           anything to do with it.  This is not the proper way to
           implement this misfeature.  */
          case rs_broken_word:
        {
          struct broken_word *lie;
          struct broken_word *untruth;

          /* Yes this is ugly (storing the broken_word pointer
             in the symbol slot).  Still, this whole chunk of
             code is ugly, and I don't feel like doing anything
             about it.  Think of it as stubbornness in action.  */
          growth = 0;
          for (lie = (struct broken_word *) (fragP->fr_symbol);
               lie && lie->dispfrag == fragP;
               lie = lie->next_broken_word)
            {

              if (lie->added)
            continue;

              offset = (S_GET_VALUE (lie->add)
                + lie->addnum
                - S_GET_VALUE (lie->sub));
              if (offset <= -32768 || offset >= 32767)
            {
              if (flag_warn_displacement)
                {
                  char buf[50];
                  sprint_value (buf, (addressT) lie->addnum);
                  as_warn_where (fragP->fr_file, fragP->fr_line,
                         _(".word %s-%s+%s didn't fit"),
                         S_GET_NAME (lie->add),
                         S_GET_NAME (lie->sub),
                         buf);
                }
              lie->added = 1;
              if (fragP->fr_subtype == 0)
                {
                  fragP->fr_subtype++;
                  growth += md_short_jump_size;
                }
              for (untruth = lie->next_broken_word;
                   untruth && untruth->dispfrag == lie->dispfrag;
                   untruth = untruth->next_broken_word)
                if ((symbol_get_frag (untruth->add)
                 == symbol_get_frag (lie->add))
                && (S_GET_VALUE (untruth->add)
                    == S_GET_VALUE (lie->add)))
                  {
                untruth->added = 2;
                untruth->use_jump = lie;
                  }
              growth += md_long_jump_size;
            }
            }

          break;
        }        /* case rs_broken_word  */
#endif
          case rs_align:
          case rs_align_code://.align 4
          case rs_align_test:
        {
          addressT oldoff, newoff;
      /*
      对于前面的情景,.align 4
      fr_type->对齐类型
      fr_subtype->max
      fr_var->填充模式长度
      fr_offset->alignment
      */
          oldoff = relax_align (was_address + fragP->fr_fix,
                    (int) offset);//变动前的扩展
          newoff = relax_align (address + fragP->fr_fix,
                    (int) offset);//变动后的扩展

          if (fragP->fr_subtype != 0)//有max
            {
              if (oldoff > fragP->fr_subtype)//超过
            oldoff = 0;
              if (newoff > fragP->fr_subtype)//超过
            newoff = 0;
            }

          growth = newoff - oldoff;

          /* If this align happens to follow a leb128 and
             we have determined that the leb128 is bouncing
             in size, then break the cycle by inserting an
             extra alignment.  */
          if (growth < 0//暂不考虑
              && (rs_leb128_fudge & 16) != 0
              && (rs_leb128_fudge & 15) >= 2)
            {
              segment_info_type *seginfo = seg_info (segment);
              struct obstack *ob = &seginfo->frchainP->frch_obstack;
              struct frag *newf;

              newf = frag_alloc (ob);
              obstack_blank_fast (ob, fragP->fr_var);
              obstack_finish (ob);
              memcpy (newf, fragP, SIZEOF_STRUCT_FRAG);
              memcpy (newf->fr_literal,
                  fragP->fr_literal + fragP->fr_fix,
                  fragP->fr_var);
              newf->fr_type = rs_fill;
              newf->fr_fix = 0;
              newf->fr_offset = (((offsetT) 1 << fragP->fr_offset)
                     / fragP->fr_var);
              if (newf->fr_offset * newf->fr_var
              != (offsetT) 1 << fragP->fr_offset)
            {
              newf->fr_offset = (offsetT) 1 << fragP->fr_offset;
              newf->fr_var = 1;
            }
              /* Include growth of new frag, because rs_fill
             frags don't normally grow.  */
              growth += newf->fr_offset * newf->fr_var;
              /* The new frag address is newoff.  Adjust this
             for the amount we'll add when we process the
             new frag.  */
              newf->fr_address = newoff - stretch - growth;
              newf->relax_marker ^= 1;
              fragP->fr_next = newf;
#ifdef DEBUG
              as_warn (_("padding added"));
#endif
            }
        }
        break;
       
          case rs_org:
        {
          addressT target = offset;
          addressT after;

          if (symbolP)
            {
              /* Convert from an actual address to an octet offset
             into the section.  Here it is assumed that the
             section's VMA is zero, and can omit subtracting it
             from the symbol's value to get the address offset.  */
              know (S_GET_SEGMENT (symbolP)->vma == 0);
              target += S_GET_VALUE (symbolP) * OCTETS_PER_BYTE;
            }

          know (fragP->fr_next);
          after = fragP->fr_next->fr_address;
          growth = target - after;
          if (growth < 0)
            {
              growth = 0;

              /* Don't error on first few frag relax passes.
             The symbol might be an expression involving
             symbol values from other sections.  If those
             sections have not yet been processed their
             frags will all have zero addresses, so we
             will calculate incorrect values for them.  The
             number of passes we allow before giving an
             error is somewhat arbitrary.  It should be at
             least one, with larger values requiring
             increasingly contrived dependencies between
             frags to trigger a false error.  */
              if (pass < 2)
            {
              /* Force another pass.  */
              ret = 1;
              break;
            }

              /* Growth may be negative, but variable part of frag
             cannot have fewer than 0 chars.  That is, we can't
             .org backwards.  */
              as_bad_where (fragP->fr_file, fragP->fr_line,
                    _("attempt to move .org backwards"));

              /* We've issued an error message.  Change the
             frag to avoid cascading errors.  */
              fragP->fr_type = rs_align;
              fragP->fr_subtype = 0;
              fragP->fr_offset = 0;
              fragP->fr_fix = after - was_address;
              break;
            }

          /* This is an absolute growth factor  */
          growth -= stretch;
          break;
        }

          case rs_space:
        growth = 0;
        if (symbolP)
          {
            offsetT amount;

            amount = S_GET_VALUE (symbolP);
            if (S_GET_SEGMENT (symbolP) != absolute_section
            || S_IS_COMMON (symbolP)
            || ! S_IS_DEFINED (symbolP))
              {
            as_bad_where (fragP->fr_file, fragP->fr_line,
                      _(".space specifies non-absolute value"));
            /* Prevent repeat of this error message.  */
            fragP->fr_symbol = 0;
              }
            else if (amount < 0)
              {
            /* Don't error on first few frag relax passes.
               See rs_org comment for a longer explanation.  */
            if (pass < 2)
              {
                ret = 1;
                break;
              }

            as_warn_where (fragP->fr_file, fragP->fr_line,
                       _(".space or .fill with negative value, ignored"));
            fragP->fr_symbol = 0;
              }
            else
              growth = (was_address + fragP->fr_fix + amount
                - fragP->fr_next->fr_address);
          }
        break;

          case rs_machine_dependent:
#ifdef md_relax_frag
        growth = md_relax_frag (segment, fragP, stretch);
#else
#ifdef TC_GENERIC_RELAX_TABLE
        /* The default way to relax a frag is to look through
           TC_GENERIC_RELAX_TABLE.  */
        growth = relax_frag (segment, fragP, stretch);//relax含有jmp等指令分片,策略是由小到大
#endif /* TC_GENERIC_RELAX_TABLE  */
#endif
        break;
          case rs_leb128:
        {
          valueT value;
          offsetT size;

          value = resolve_symbol_value (fragP->fr_symbol);
          size = sizeof_leb128 (value, fragP->fr_subtype);
          growth = size - fragP->fr_offset;
          fragP->fr_offset = size;
        }
        break;

          case rs_cfa:
        growth = eh_frame_relax_frag (fragP);
        break;

          case rs_dwarf2dbg:
        growth = dwarf2dbg_relax_frag (fragP);
        break;

          default:
        BAD_CASE (fragP->fr_type);
        break;
          }
        if (growth)//有变动
          {
        stretch += growth;//加到stretch中
        stretched = 1;
        if (fragP->fr_type == rs_leb128)
          rs_leb128_fudge += 16;
        else if (fragP->fr_type == rs_align
             && (rs_leb128_fudge & 16) != 0
             && stretch == 0)
          rs_leb128_fudge += 16;
        else
          rs_leb128_fudge = 0;
          }
      }

    if (stretch == 0
        && (rs_leb128_fudge & 16) == 0
        && (rs_leb128_fudge & -16) != 0)
      rs_leb128_fudge += 1;
    else
      rs_leb128_fudge = 0;
      }
    /* Until nothing further to relax.  */
    while (stretched && -- max_iterations);//循环直到没有变动或超过规定的次数

    if (stretched)//无限循环???
      as_fatal (_("Infinite loop encountered whilst attempting to compute the addresses of symbols in section %s"),
        segment_name (segment));
  }

  for (fragP = segment_frag_root; fragP; fragP = fragP->fr_next)
    if (fragP->last_fr_address != fragP->fr_address)
      {
    fragP->last_fr_address = fragP->fr_address;//修正最终地址
    ret = 1;
      }
  return ret;
}       

这样,经过多轮循环,各个分片的大小和起始地址都固定下来了。
阅读(3155) | 评论(0) | 转发(1) |
0

上一篇:align(1)

下一篇:align(3)

给主人留下些什么吧!~~