align
前面我们分析了.align 4只是记录了对齐的有关信息,并没有进行实质的对齐动作,那么实质的操作又在哪里呢?答案是在汇编的最后写出目标文件阶段。
1.relax阶段
relax的由来?
我们知道,i386的跳转指令有近跳和远跳区分。近跳转都是相对跳转,相对于下一条指令地址。而近跳转的范围有3种,分别是8位有符号数,16位有符号数和32为有符号数
Mnemonic Opcode Description
JMP rel8off EB cb Short jump with the target specified by an 8-bit signed displacement.
JMP rel16off E9 cw Near jump with the target specified by a 16-bit signed displacement.
JMP rel32off E9 cd Near jump with the target specified by a 32-bit signeddisplacement.
8位有符号数的范围是 -128~+127
16位有符号数范围是 -32768~+32787
32位有符号数范围是 -2^31~2^31-1
这样在汇编jmp指令时根据模式的不同就有了至少两种选择
16位时,使用jmp rel8off或jmp rel16off
32位时,使用jmp rel8off或jmp rel32off
as的选择是先使用8位,如果不行再使用更多位,尽量缩短编码长度,减少内存占用,也能提高指令执行速度。
考虑如下的情况1:
L1:
....
L2:
jmp L1
如果L1->L2之间的指令长度都是固定,的那么jmp L1的指令选取将根据L2-L1的大小一次性就能决定。
考虑如下的情况2:
L1:
....
L2:
jmp L4
....
L3:
jmp L1
...
L4:
因为jmp L4的存在,导致L1->L3之间的指令长度在一开始不能确定,因而jmp L1指令的编码模式和长度也不能确定,同理jmp L4也存在同样的问题,如果这里还有
.align 4伪指令,也会有影响。
所以前面使用frag_var为.align 4伪指令在本分片中创建了一个可变部分.
这个确定的过程需要推迟,推迟到哪里?就是本文要讨论的relax阶段.其实我觉得叫freeze更合适吧。
2.一些概念
elf目标文件是由许多section(as又叫segment)组成的,
每个section又可分成许多subsection(as又叫subsegment).每个subsegment都有一个唯一编号.
每个subsection又由许多frag分片组成.每个分片有一个固定部分和最多1个可变(var)部分(显然都在分片的结尾).
3.write_object_file
main->write_object_file
bfd_map_over_sections (stdoutput, chain_frchains_together, (char *) 0);//将所有的子段连在一起
/* We have two segments. If user gave -R flag, then we must put the
data frags into the text segment. Do this before relaxing so
we know to take advantage of -R and make shorter addresses. */
if (flag_readonly_data_in_text)//要求将数据段并入代码段
{
merge_data_into_text ();
}
rsi.pass = 0;
while (1)
{
#ifndef WORKING_DOT_WORD//定义了,忽略
/* We need to reset the markers in the broken word list and
associated frags between calls to relax_segment (via
relax_seg). Since the broken word list is global, we do it
once per round, rather than locally in relax_segment for each
segment. */
struct broken_word *brokp;
for (brokp = broken_words;
brokp != (struct broken_word *) NULL;
brokp = brokp->next_broken_word)
{
brokp->added = 0;
if (brokp->dispfrag != (fragS *) NULL
&& brokp->dispfrag->fr_type == rs_broken_word)
brokp->dispfrag->fr_subtype = 0;
}
#endif
rsi.changed = 0;
bfd_map_over_sections (stdoutput, relax_seg, &rsi);//对于每个节都调用relax_seg
rsi.pass++;
if (!rsi.changed)//如果本次循环所有的节没有发生改动,跳出,否则继续下一轮relax
break;
}
3.relax_seg
main->write_object_file->relax_seg
static void
relax_seg (bfd *abfd ATTRIBUTE_UNUSED, asection *sec, void *xxx)
{
segment_info_type *seginfo = seg_info (sec);
struct relax_seg_info *info = (struct relax_seg_info *) xxx;
if (seginfo && seginfo->frchainP//有端信息,有数据
&& relax_segment (seginfo->frchainP->frch_root, sec, info->pass))//调用relax_segment
info->changed = 1;//发生改变
}
4.先确定每个分片的起始地址
main->write_object_file->relax_seg->relax_segment
/* Now we have a segment, not a crowd of sub-segments, we can make
fr_address values.
Relax the frags.
After this, all frags in this segment have addresses that are correct
within the segment. Since segments live in different file addresses,
these frag addresses may not be the same as final object-file
addresses. */
int
relax_segment (struct frag *segment_frag_root, segT segment, int pass)
{
unsigned long frag_count;
struct frag *fragP;
relax_addressT address;
int ret;
/* In case md_estimate_size_before_relax() wants to make fixSs. */
subseg_change (segment, 0);//切换到0子段,为md_estimate_size_before_relax准备
/* For each frag in segment: count and store (a 1st guess of)
fr_address. */
address = 0;
//计算本节中所有分片的起始地址和大小
for (frag_count = 0, fragP = segment_frag_root;
fragP;
fragP = fragP->fr_next, frag_count ++)
{
fragP->relax_marker = 0;
fragP->fr_address = address;//本frag起始地址
address += fragP->fr_fix;//固定大小
switch (fragP->fr_type)
{
case rs_fill://对于//.fill repeat,size,value伪指令, fr_type=rs_fill,fr_var=size,fr_offset=repeat
address += fragP->fr_offset * fragP->fr_var;//计入可变部分大小
break;
case rs_align:
case rs_align_code:
case rs_align_test:
{
/*
对于前面的情景,.align 4
fr_type->对齐类型
fr_subtype->max
fr_var->填充模式长度
fr_offset->alignment
*/
addressT offset = relax_align (address, (int) fragP->fr_offset);
4.relax_align
main->write_object_file->relax_seg->relax_segment->relax_align
/* Relax_align. Advance location counter to next address that has 'alignment'
lowest order bits all 0s, return size of adjustment made. */
static relax_addressT
relax_align (register relax_addressT address, /* Address now. */
register int alignment /* Alignment (binary). */)
{
relax_addressT mask;
relax_addressT new_address;
mask = ~((~0) << alignment);
new_address = (address + mask) & (~mask);
#ifdef LINKER_RELAXING_SHRINKS_ONLY//忽略
if (linkrelax)
/* We must provide lots of padding, so the linker can discard it
when needed. The linker will not add extra space, ever. */
new_address += (1 << alignment);
#endif
return (new_address - address);//因为对齐导致的扩展大小
}
5.返回relax_segment
main->write_object_file->relax_seg->relax_segment
/*
对于前面的情景,.align 4
fr_type->对齐类型
fr_subtype->max
fr_var->填充模式长度
fr_offset->alignment
*/
if (fragP->fr_subtype != 0 && offset > fragP->fr_subtype)//超过最大长度
offset = 0;//不对齐
if (offset % fragP->fr_var != 0)//不是fill pattern的整数倍
{
as_bad_where (fragP->fr_file, fragP->fr_line,
_("alignment padding (%lu bytes) not a multiple of %ld"),
(unsigned long) offset, (long) fragP->fr_var);//bad
offset -= (offset % fragP->fr_var);
}
address += offset;//计入address
}
break;
case rs_org:
case rs_space:
/* Assume .org is nugatory. It will grow with 1st relax. */
break;
case rs_machine_dependent:
/* If fr_symbol is an expression, this call to
resolve_symbol_value sets up the correct segment, which will
likely be needed in md_estimate_size_before_relax. */
if (fragP->fr_symbol)//有符号,先解析符号的值
resolve_symbol_value (fragP->fr_symbol);
//前面的jmp等指令等虽然不能知道确切的大小,可以先估计
address += md_estimate_size_before_relax (fragP, segment);
break;
6.开始relax过程
#ifndef WORKING_DOT_WORD//忽略
/* Broken words don't concern us yet. */
case rs_broken_word:
break;
#endif
case rs_leb128:
/* Initial guess is always 1; doing otherwise can result in
stable solutions that are larger than the minimum. */
address += fragP->fr_offset = 1;
break;
case rs_cfa:
address += eh_frame_estimate_size_before_relax (fragP);
break;
case rs_dwarf2dbg:
address += dwarf2dbg_estimate_size_before_relax (fragP);
break;
default:
BAD_CASE (fragP->fr_type);
break;
}
}
/* Do relax(). */
{
unsigned long max_iterations;
/* Cumulative address adjustment. */
offsetT stretch;
/* Have we made any adjustment this pass? We can't just test
stretch because one piece of code may have grown and another
shrank. */
int stretched;
/* Most horrible, but gcc may give us some exception data that
is impossible to assemble, of the form
.align 4
.byte 0, 0
.uleb128 end - start
start:
.space 128*128 - 1
.align 4
end:
If the leb128 is two bytes in size, then end-start is 128*128,
which requires a three byte leb128. If the leb128 is three
bytes in size, then end-start is 128*128-1, which requires a
two byte leb128. We work around this dilemma by inserting
an extra 4 bytes of alignment just after the .align. This
works because the data after the align is accessed relative to
the end label.
This counter is used in a tiny state machine to detect
whether a leb128 followed by an align is impossible to
relax. */
int rs_leb128_fudge = 0;
/* We want to prevent going into an infinite loop where one frag grows
depending upon the location of a symbol which is in turn moved by
the growing frag. eg:
foo = .
.org foo+16
foo = .
So we dictate that this algorithm can be at most O2. */
max_iterations = frag_count * frag_count;//为了realx可能要循环frag_count平方次
/* Check for overflow. */
if (max_iterations < frag_count)//溢出
max_iterations = frag_count;
ret = 0;
do
{
stretch = 0;
stretched = 0;
for (fragP = segment_frag_root; fragP; fragP = fragP->fr_next)
{
offsetT growth = 0;
addressT was_address;
offsetT offset;
symbolS *symbolP;
fragP->relax_marker ^= 1;
was_address = fragP->fr_address;//变动前的地址
address = fragP->fr_address += stretch;//变动后的地址
symbolP = fragP->fr_symbol;
offset = fragP->fr_offset;//不同类型的frag,offset意义不同
switch (fragP->fr_type)
{
case rs_fill: /* .fill never relaxes. 已经固定,不需要relax */
growth = 0;
break;
#ifndef WORKING_DOT_WORD//忽略
/* JF: This is RMS's idea. I do *NOT* want to be blamed
for it I do not want to write it. I do not want to have
anything to do with it. This is not the proper way to
implement this misfeature. */
case rs_broken_word:
{
struct broken_word *lie;
struct broken_word *untruth;
/* Yes this is ugly (storing the broken_word pointer
in the symbol slot). Still, this whole chunk of
code is ugly, and I don't feel like doing anything
about it. Think of it as stubbornness in action. */
growth = 0;
for (lie = (struct broken_word *) (fragP->fr_symbol);
lie && lie->dispfrag == fragP;
lie = lie->next_broken_word)
{
if (lie->added)
continue;
offset = (S_GET_VALUE (lie->add)
+ lie->addnum
- S_GET_VALUE (lie->sub));
if (offset <= -32768 || offset >= 32767)
{
if (flag_warn_displacement)
{
char buf[50];
sprint_value (buf, (addressT) lie->addnum);
as_warn_where (fragP->fr_file, fragP->fr_line,
_(".word %s-%s+%s didn't fit"),
S_GET_NAME (lie->add),
S_GET_NAME (lie->sub),
buf);
}
lie->added = 1;
if (fragP->fr_subtype == 0)
{
fragP->fr_subtype++;
growth += md_short_jump_size;
}
for (untruth = lie->next_broken_word;
untruth && untruth->dispfrag == lie->dispfrag;
untruth = untruth->next_broken_word)
if ((symbol_get_frag (untruth->add)
== symbol_get_frag (lie->add))
&& (S_GET_VALUE (untruth->add)
== S_GET_VALUE (lie->add)))
{
untruth->added = 2;
untruth->use_jump = lie;
}
growth += md_long_jump_size;
}
}
break;
} /* case rs_broken_word */
#endif
case rs_align:
case rs_align_code://.align 4
case rs_align_test:
{
addressT oldoff, newoff;
/*
对于前面的情景,.align 4
fr_type->对齐类型
fr_subtype->max
fr_var->填充模式长度
fr_offset->alignment
*/
oldoff = relax_align (was_address + fragP->fr_fix,
(int) offset);//变动前的扩展
newoff = relax_align (address + fragP->fr_fix,
(int) offset);//变动后的扩展
if (fragP->fr_subtype != 0)//有max
{
if (oldoff > fragP->fr_subtype)//超过
oldoff = 0;
if (newoff > fragP->fr_subtype)//超过
newoff = 0;
}
growth = newoff - oldoff;
/* If this align happens to follow a leb128 and
we have determined that the leb128 is bouncing
in size, then break the cycle by inserting an
extra alignment. */
if (growth < 0//暂不考虑
&& (rs_leb128_fudge & 16) != 0
&& (rs_leb128_fudge & 15) >= 2)
{
segment_info_type *seginfo = seg_info (segment);
struct obstack *ob = &seginfo->frchainP->frch_obstack;
struct frag *newf;
newf = frag_alloc (ob);
obstack_blank_fast (ob, fragP->fr_var);
obstack_finish (ob);
memcpy (newf, fragP, SIZEOF_STRUCT_FRAG);
memcpy (newf->fr_literal,
fragP->fr_literal + fragP->fr_fix,
fragP->fr_var);
newf->fr_type = rs_fill;
newf->fr_fix = 0;
newf->fr_offset = (((offsetT) 1 << fragP->fr_offset)
/ fragP->fr_var);
if (newf->fr_offset * newf->fr_var
!= (offsetT) 1 << fragP->fr_offset)
{
newf->fr_offset = (offsetT) 1 << fragP->fr_offset;
newf->fr_var = 1;
}
/* Include growth of new frag, because rs_fill
frags don't normally grow. */
growth += newf->fr_offset * newf->fr_var;
/* The new frag address is newoff. Adjust this
for the amount we'll add when we process the
new frag. */
newf->fr_address = newoff - stretch - growth;
newf->relax_marker ^= 1;
fragP->fr_next = newf;
#ifdef DEBUG
as_warn (_("padding added"));
#endif
}
}
break;
case rs_org:
{
addressT target = offset;
addressT after;
if (symbolP)
{
/* Convert from an actual address to an octet offset
into the section. Here it is assumed that the
section's VMA is zero, and can omit subtracting it
from the symbol's value to get the address offset. */
know (S_GET_SEGMENT (symbolP)->vma == 0);
target += S_GET_VALUE (symbolP) * OCTETS_PER_BYTE;
}
know (fragP->fr_next);
after = fragP->fr_next->fr_address;
growth = target - after;
if (growth < 0)
{
growth = 0;
/* Don't error on first few frag relax passes.
The symbol might be an expression involving
symbol values from other sections. If those
sections have not yet been processed their
frags will all have zero addresses, so we
will calculate incorrect values for them. The
number of passes we allow before giving an
error is somewhat arbitrary. It should be at
least one, with larger values requiring
increasingly contrived dependencies between
frags to trigger a false error. */
if (pass < 2)
{
/* Force another pass. */
ret = 1;
break;
}
/* Growth may be negative, but variable part of frag
cannot have fewer than 0 chars. That is, we can't
.org backwards. */
as_bad_where (fragP->fr_file, fragP->fr_line,
_("attempt to move .org backwards"));
/* We've issued an error message. Change the
frag to avoid cascading errors. */
fragP->fr_type = rs_align;
fragP->fr_subtype = 0;
fragP->fr_offset = 0;
fragP->fr_fix = after - was_address;
break;
}
/* This is an absolute growth factor */
growth -= stretch;
break;
}
case rs_space:
growth = 0;
if (symbolP)
{
offsetT amount;
amount = S_GET_VALUE (symbolP);
if (S_GET_SEGMENT (symbolP) != absolute_section
|| S_IS_COMMON (symbolP)
|| ! S_IS_DEFINED (symbolP))
{
as_bad_where (fragP->fr_file, fragP->fr_line,
_(".space specifies non-absolute value"));
/* Prevent repeat of this error message. */
fragP->fr_symbol = 0;
}
else if (amount < 0)
{
/* Don't error on first few frag relax passes.
See rs_org comment for a longer explanation. */
if (pass < 2)
{
ret = 1;
break;
}
as_warn_where (fragP->fr_file, fragP->fr_line,
_(".space or .fill with negative value, ignored"));
fragP->fr_symbol = 0;
}
else
growth = (was_address + fragP->fr_fix + amount
- fragP->fr_next->fr_address);
}
break;
case rs_machine_dependent:
#ifdef md_relax_frag
growth = md_relax_frag (segment, fragP, stretch);
#else
#ifdef TC_GENERIC_RELAX_TABLE
/* The default way to relax a frag is to look through
TC_GENERIC_RELAX_TABLE. */
growth = relax_frag (segment, fragP, stretch);//relax含有jmp等指令分片,策略是由小到大
#endif /* TC_GENERIC_RELAX_TABLE */
#endif
break;
case rs_leb128:
{
valueT value;
offsetT size;
value = resolve_symbol_value (fragP->fr_symbol);
size = sizeof_leb128 (value, fragP->fr_subtype);
growth = size - fragP->fr_offset;
fragP->fr_offset = size;
}
break;
case rs_cfa:
growth = eh_frame_relax_frag (fragP);
break;
case rs_dwarf2dbg:
growth = dwarf2dbg_relax_frag (fragP);
break;
default:
BAD_CASE (fragP->fr_type);
break;
}
if (growth)//有变动
{
stretch += growth;//加到stretch中
stretched = 1;
if (fragP->fr_type == rs_leb128)
rs_leb128_fudge += 16;
else if (fragP->fr_type == rs_align
&& (rs_leb128_fudge & 16) != 0
&& stretch == 0)
rs_leb128_fudge += 16;
else
rs_leb128_fudge = 0;
}
}
if (stretch == 0
&& (rs_leb128_fudge & 16) == 0
&& (rs_leb128_fudge & -16) != 0)
rs_leb128_fudge += 1;
else
rs_leb128_fudge = 0;
}
/* Until nothing further to relax. */
while (stretched && -- max_iterations);//循环直到没有变动或超过规定的次数
if (stretched)//无限循环???
as_fatal (_("Infinite loop encountered whilst attempting to compute the addresses of symbols in section %s"),
segment_name (segment));
}
for (fragP = segment_frag_root; fragP; fragP = fragP->fr_next)
if (fragP->last_fr_address != fragP->fr_address)
{
fragP->last_fr_address = fragP->fr_address;//修正最终地址
ret = 1;
}
return ret;
}
这样,经过多轮循环,各个分片的大小和起始地址都固定下来了。
阅读(3146) | 评论(0) | 转发(1) |