align(3)
确定各段大小并写出各段
1.经过relax后,各个段的大小可以确定了,接下来的工作就是确定各段大小
void
write_object_file (void)
{
bfd_map_over_sections (stdoutput, size_seg, (char *) 0);
}
对于每个段调用size_seg
2.size_seg
write_object_file->size_seg
static void
size_seg (bfd *abfd, asection *sec, void *xxx ATTRIBUTE_UNUSED)
{
flagword flags;
fragS *fragp;
segment_info_type *seginfo;
int x;
valueT size, newsize;
subseg_change (sec, 0);//切换到该段第0子段
seginfo = seg_info (sec);//取段信息
if (seginfo && seginfo->frchainP)//如果有子段
{
for (fragp = seginfo->frchainP->frch_root; fragp; fragp = fragp->fr_next)//对于该段中的所有分片,调用cvt_frag_to_fill
cvt_frag_to_fill (sec, fragp);
3.cvt_frag_to_fill
write_object_file->size_seg->cvt_frag_to_fill
将分片固定下来,即全部转换成rs_fill类型
static void
cvt_frag_to_fill (segT sec ATTRIBUTE_UNUSED, fragS *fragP)
{
switch (fragP->fr_type)
{
case rs_align:
case rs_align_code:
case rs_align_test:
case rs_org:
case rs_space:
#ifdef HANDLE_ALIGN
HANDLE_ALIGN (fragP);
#endif
i386平台下
#define HANDLE_ALIGN(fragP) \
if (fragP->fr_type == rs_align_code) \
i386_align_code (fragP, (fragP->fr_next->fr_address \
- fragP->fr_address \
- fragP->fr_fix));
注意这里fragP->fr_next->fr_address - fragP->fr_address - fragP->fr_fix是在计算本分片可变部分大小
4.386_align_code
write_object_file->size_seg->cvt_frag_to_fill->386_align_code
void
i386_align_code (fragS *fragP, int count)
{
/* Various efficient no-op patterns for aligning code labels.
各种有效的空操作模式用户对齐代码
Note: Don't try to assemble the instructions in the comments.
0L and 0w are not legal. */
static const char f32_1[] =
{0x90}; /* nop */
static const char f32_2[] =
{0x66,0x90}; /* xchg %ax,%ax */
static const char f32_3[] =
{0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
static const char f32_4[] =
{0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
static const char f32_5[] =
{0x90, /* nop */
0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
static const char f32_6[] =
{0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
static const char f32_7[] =
{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
static const char f32_8[] =
{0x90, /* nop */
0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
static const char f32_9[] =
{0x89,0xf6, /* movl %esi,%esi */
0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
static const char f32_10[] =
{0x8d,0x76,0x00, /* leal 0(%esi),%esi */
0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
static const char f32_11[] =
{0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
static const char f32_12[] =
{0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
static const char f32_13[] =
{0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
static const char f32_14[] =
{0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
static const char f16_3[] =
{0x8d,0x74,0x00}; /* lea 0(%esi),%esi */
static const char f16_4[] =
{0x8d,0xb4,0x00,0x00}; /* lea 0w(%si),%si */
static const char f16_5[] =
{0x90, /* nop */
0x8d,0xb4,0x00,0x00}; /* lea 0w(%si),%si */
static const char f16_6[] =
{0x89,0xf6, /* mov %si,%si */
0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
static const char f16_7[] =
{0x8d,0x74,0x00, /* lea 0(%si),%si */
0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
static const char f16_8[] =
{0x8d,0xb4,0x00,0x00, /* lea 0w(%si),%si */
0x8d,0xbd,0x00,0x00}; /* lea 0w(%di),%di */
static const char jump_31[] =
{0xeb,0x1d,0x90,0x90,0x90,0x90,0x90, /* jmp .+31; lotsa nops */
0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,
0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
static const char *const f32_patt[] = {
f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
f32_9, f32_10, f32_11, f32_12, f32_13, f32_14
};
static const char *const f16_patt[] = {
f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8
};
/* nopl (%[re]ax) */
static const char alt_3[] =
{0x0f,0x1f,0x00};
/* nopl 0(%[re]ax) */
static const char alt_4[] =
{0x0f,0x1f,0x40,0x00};
/* nopl 0(%[re]ax,%[re]ax,1) */
static const char alt_5[] =
{0x0f,0x1f,0x44,0x00,0x00};
/* nopw 0(%[re]ax,%[re]ax,1) */
static const char alt_6[] =
{0x66,0x0f,0x1f,0x44,0x00,0x00};
/* nopl 0L(%[re]ax) */
static const char alt_7[] =
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
/* nopl 0L(%[re]ax,%[re]ax,1) */
static const char alt_8[] =
{0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* nopw 0L(%[re]ax,%[re]ax,1) */
static const char alt_9[] =
{0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_10[] =
{0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* data16
nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_long_11[] =
{0x66,
0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* data16
data16
nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_long_12[] =
{0x66,
0x66,
0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* data16
data16
data16
nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_long_13[] =
{0x66,
0x66,
0x66,
0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* data16
data16
data16
data16
nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_long_14[] =
{0x66,
0x66,
0x66,
0x66,
0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* data16
data16
data16
data16
data16
nopw %cs:0L(%[re]ax,%[re]ax,1) */
static const char alt_long_15[] =
{0x66,
0x66,
0x66,
0x66,
0x66,
0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* nopl 0(%[re]ax,%[re]ax,1)
nopw 0(%[re]ax,%[re]ax,1) */
static const char alt_short_11[] =
{0x0f,0x1f,0x44,0x00,0x00,
0x66,0x0f,0x1f,0x44,0x00,0x00};
/* nopw 0(%[re]ax,%[re]ax,1)
nopw 0(%[re]ax,%[re]ax,1) */
static const char alt_short_12[] =
{0x66,0x0f,0x1f,0x44,0x00,0x00,
0x66,0x0f,0x1f,0x44,0x00,0x00};
/* nopw 0(%[re]ax,%[re]ax,1)
nopl 0L(%[re]ax) */
static const char alt_short_13[] =
{0x66,0x0f,0x1f,0x44,0x00,0x00,
0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
/* nopl 0L(%[re]ax)
nopl 0L(%[re]ax) */
static const char alt_short_14[] =
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
/* nopl 0L(%[re]ax)
nopl 0L(%[re]ax,%[re]ax,1) */
static const char alt_short_15[] =
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
static const char *const alt_short_patt[] = {
f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
alt_9, alt_10, alt_short_11, alt_short_12, alt_short_13,
alt_short_14, alt_short_15
};
static const char *const alt_long_patt[] = {
f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
alt_9, alt_10, alt_long_11, alt_long_12, alt_long_13,
alt_long_14, alt_long_15
};
这里定义了各种空操作模式,用于不同的cpu.
可以看出f16_patt和f32_patt主要使用lea指令,如果填充模式使用了两个lea指令,则操作数部分使用了不同寄存器,减少寄存器争用
alt_long_patt和alt_short_patt主要使用扩展的nop指令.两者的区别是alt_long_patt用一条nop和大量的0x66前缀,alt_short_patt用两条nop.
alt_long_patt为intel cpu准备,alt_short_patt为amd cpu准备。
至于更详细的优化细节可以查看intel和amd的技术文档。
/* Only align for at least a positive non-zero boundary. */
if (count <= 0 || count > MAX_MEM_FOR_RS_ALIGN_CODE)//可变部分大小<=0,不处理,如果>=31,则不使用本函数中定义的各种空操作模式来填充,而使用默认的nop(0x90)填空
return;
/* We need to decide which NOP sequence to use for 32bit and
64bit. When -mtune= is used:
决定使用哪种nop序列
1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
PROCESSOR_GENERIC32, f32_patt will be used.
2. For PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUM4, PROCESSOR_NOCONA,
PROCESSOR_CORE, PROCESSOR_CORE2, and PROCESSOR_GENERIC64,
alt_long_patt will be used.
3. For PROCESSOR_ATHLON, PROCESSOR_K6, PROCESSOR_K8 and
PROCESSOR_AMDFAM10, alt_short_patt will be used.
When -mtune= isn't used, alt_long_patt will be used if
cpu_arch_isa_flags has Cpu686. Otherwise, f32_patt will
be used.
When -march= or .arch is used, we can't use anything beyond
cpu_arch_isa_flags. */
if (flag_code == CODE_16BIT)//16位模式
{
if (count > 8)//超过8字节
{
memcpy (fragP->fr_literal + fragP->fr_fix,
jump_31, count);/使用jump_31模板
/* Adjust jump offset. */
fragP->fr_literal[fragP->fr_fix + 1] = count - 2;//修改rel8off,减去两字节即jmp rel8off指令长度
}
else
memcpy (fragP->fr_literal + fragP->fr_fix,
f16_patt[count - 1], count);//使用对应的f16_patt模板
}
else
{
//下面根据不同的isa(instruction set architecture)来决定使用哪个模板
const char *const *patt = NULL;
if (cpu_arch_isa == PROCESSOR_UNKNOWN)//isa未知
{
/* PROCESSOR_UNKNOWN means that all ISAs may be used. */
switch (cpu_arch_tune)
{
case PROCESSOR_UNKNOWN:
/* We use cpu_arch_isa_flags to check if we SHOULD
optimize for Cpu686. */
if ((cpu_arch_isa_flags & Cpu686) != 0)
patt = alt_long_patt;
else
patt = f32_patt;
break;
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_NOCONA:
case PROCESSOR_CORE:
case PROCESSOR_CORE2:
case PROCESSOR_GENERIC64:
patt = alt_long_patt;
break;
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
patt = alt_short_patt;
break;
case PROCESSOR_I386:
case PROCESSOR_I486:
case PROCESSOR_PENTIUM:
case PROCESSOR_GENERIC32:
patt = f32_patt;
break;
}
}
else
{
switch (cpu_arch_tune)//isa已知情况下
{
case PROCESSOR_UNKNOWN:
/* When cpu_arch_isa is net, cpu_arch_tune shouldn't be
PROCESSOR_UNKNOWN. */
abort ();
break;
case PROCESSOR_I386:
case PROCESSOR_I486:
case PROCESSOR_PENTIUM:
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
case PROCESSOR_GENERIC32:
/* We use cpu_arch_isa_flags to check if we CAN optimize
for Cpu686. */
if ((cpu_arch_isa_flags & Cpu686) != 0)
patt = alt_short_patt;
else
patt = f32_patt;
break;
case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_NOCONA:
case PROCESSOR_CORE:
case PROCESSOR_CORE2:
if ((cpu_arch_isa_flags & Cpu686) != 0)
patt = alt_long_patt;
else
patt = f32_patt;
break;
case PROCESSOR_GENERIC64:
patt = alt_long_patt;
break;
}
}
if (patt == f32_patt)//使用了f32_patt
{
/* If the padding is less than 15 bytes, we use the normal
ones. Otherwise, we use a jump instruction and adjust
its offset. */
if (count < 15)
memcpy (fragP->fr_literal + fragP->fr_fix,
patt[count - 1], count);
else
{
memcpy (fragP->fr_literal + fragP->fr_fix,
jump_31, count);
/* Adjust jump offset. */
fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
}
}
else
{
/* Maximum length of an instruction is 15 byte. If the
padding is greater than 15 bytes and we don't use jump,
如果填充大于15字节,不使用jmp,而是分片填充
we have to break it into smaller pieces. */
int padding = count;
while (padding > 15)
{
padding -= 15;
memcpy (fragP->fr_literal + fragP->fr_fix + padding,
patt [14], 15);
}
if (padding)
memcpy (fragP->fr_literal + fragP->fr_fix,
patt [padding - 1], padding);
}
}
fragP->fr_var = count;//修改fr_var(填充模式长度)为count
}
5.返回cvt_frag_to_fill
write_object_file->size_seg->cvt_frag_to_fill
know (fragP->fr_next != NULL);
fragP->fr_offset = (fragP->fr_next->fr_address
- fragP->fr_address
- fragP->fr_fix) / fragP->fr_var;//fr_var是填充模式长度,此处计算填充模式需要被重复的次数
if (fragP->fr_offset < 0)
{
as_bad_where (fragP->fr_file, fragP->fr_line,
_("attempt to .org/.space backwards? (%ld)"),
(long) fragP->fr_offset);
fragP->fr_offset = 0;
}
fragP->fr_type = rs_fill;//修改类型为rs_fill
break;
case rs_fill:
break;
6.返回size_seg
write_object_file->size_seg
for (fragp = seginfo->frchainP->frch_root;
fragp->fr_next;
fragp = fragp->fr_next)
/* Walk to last elt. 找到最后一个分片*/
;
size = fragp->fr_address + fragp->fr_fix;//计算出本段大小
}
else
size = 0;//否则本段大小为0
flags = bfd_get_section_flags (abfd, sec);
if (size > 0 && ! seginfo->bss)//不是未初始化段
flags |= SEC_HAS_CONTENTS;//本段有内容
flags &= ~SEC_RELOC;//取消reloc属性
x = bfd_set_section_flags (abfd, sec, flags);
assert (x);
newsize = md_section_align (sec, size);//对于elf文件格式是空操作
x = bfd_set_section_size (abfd, sec, newsize);
assert (x);
/* If the size had to be rounded up, add some padding in the last
non-empty frag. */
assert (newsize >= size);
if (size != newsize)
{
fragS *last = seginfo->frchainP->frch_last;
fragp = seginfo->frchainP->frch_root;
while (fragp->fr_next != last)
fragp = fragp->fr_next;
last->fr_address = size;
if ((newsize - size) % fragp->fr_var == 0)
fragp->fr_offset += (newsize - size) / fragp->fr_var;
else
/* If we hit this abort, it's likely due to subsegs_finish not
providing sufficient alignment on the last frag, and the
machine dependent code using alignment frags with fr_var
greater than 1. */
abort ();
}
#ifdef tc_frob_section
tc_frob_section (sec);
#endif
#ifdef obj_frob_section
obj_frob_section (sec);
#endif
}
7.返回write_object_file
bfd_map_over_sections (stdoutput, write_contents, (char *) 0);//最后写出各个段
8.write_contents
write_object_file->write_contents
static void
write_contents (bfd *abfd ATTRIBUTE_UNUSED,
asection *sec,
void *xxx ATTRIBUTE_UNUSED)
{
segment_info_type *seginfo = seg_info (sec);
addressT offset = 0;
fragS *f;
/* Write out the frags. */
if (seginfo == NULL//没有段信息
|| !(bfd_get_section_flags (abfd, sec) & SEC_HAS_CONTENTS))//或者没有内容
return;//直接返回
for (f = seginfo->frchainP->frch_root;
f;
f = f->fr_next)//对于每一个分片
{
int x;
addressT fill_size;
char *fill_literal;
offsetT count;
assert (f->fr_type == rs_fill);//肯定是rs_fill
if (f->fr_fix)//有固定部分
{
x = bfd_set_section_contents (stdoutput, sec,
f->fr_literal, (file_ptr) offset,
(bfd_size_type) f->fr_fix);//写出
if (!x)
as_fatal (_("can't write %s: %s"), stdoutput->filename,
bfd_errmsg (bfd_get_error ()));
offset += f->fr_fix;
}
//再写可变部分
fill_literal = f->fr_literal + f->fr_fix;//填充模式起始地址
fill_size = f->fr_var;//填充模式长度
count = f->fr_offset;//模式重复填充次数
assert (count >= 0);
if (fill_size && count)
{
char buf[256];
if (fill_size > sizeof (buf))//填充模式长度超过256字节
{
/* Do it the old way. Can this ever happen? */
while (count--)
{
x = bfd_set_section_contents (stdoutput, sec,
fill_literal,
(file_ptr) offset,
(bfd_size_type) fill_size);//重复填充
if (!x)
as_fatal (_("can't write %s: %s"), stdoutput->filename,
bfd_errmsg (bfd_get_error ()));
offset += fill_size;
}
}
else//否则填充模式长度小于256字节,先填充buf,再用buf填充,加快填充速度
{
/* Build a buffer full of fill objects and output it as
often as necessary. This saves on the overhead of
potentially lots of bfd_set_section_contents calls. */
int n_per_buf, i;
if (fill_size == 1)//填充模式长度为1
{
n_per_buf = sizeof (buf);
memset (buf, *fill_literal, n_per_buf);//先在buf里填充
}
else
{
char *bufp;
n_per_buf = sizeof (buf) / fill_size;
for (i = n_per_buf, bufp = buf; i; i--, bufp += fill_size)
memcpy (bufp, fill_literal, fill_size);//先在buf里填充
}
for (; count > 0; count -= n_per_buf)//再使用buf填充
{
n_per_buf = n_per_buf > count ? count : n_per_buf;
x = bfd_set_section_contents
(stdoutput, sec, buf, (file_ptr) offset,
(bfd_size_type) n_per_buf * fill_size);
if (!x)
as_fatal (_("cannot write to output file"));
offset += n_per_buf * fill_size;
}
}
}
}
}
至此,align涉及到的主要操作全部介绍完了。
阅读(2178) | 评论(0) | 转发(0) |