从一段代码看ELF .o文件（ET_REL）-flw2-ChinaUnix博客

xcmhitchangming.blog.chinaunix.net

首页　| 　博文目录　| 　关于我

flw2

博客访问： 265499
博文数量： 52
博客积分： 1379
博客等级：大尉
技术积分： 525
用户组：普通用户
注册时间： 2006-06-18 17:34

文章分类

全部博文（52）

network（6）
线程（10）
编译与链接（11）

yacc（0）

tcc代码笔记（10）
版本管理（2）
以前写的文章（5）
Makefile（3）
轻松一下（4）
linux内核笔记（8）
未分配的博文（3）

文章存档

2011年（48）

2010年（4）

我的朋友

相关博文

从一段代码看ELF .o文件（ET_REL）

分类： LINUX

2011-02-25 10:48:16

网上有好多介绍ELF的文章，基本内容大多来自ELF的标准

Tool Interface Standard (TIS)
Executable and Linking Format (ELF)
Specification
Version 1.2

google elf pdf就能搜索到不少
通过一段代码来学习还是比较容易，下面的代码来自一个实用编译器tcc
主页是
因为tcc包含编译和链接功能，这段代码属于链接器的部分
有些项目（比如linux kernel）可能会有成千上万个.o，如果一个.c代码改变，从而链接成千上万个.o也是非常消耗时间的
所以一般也是会把不同的子目录下的.o合并成一个整体，这样，链接不再需要从头开始
ld -r参数就是由若干个.o 生成一个新的.o

/* load an object file and merge it with current files */
/* XXX: handle correctly stab (debug) info */
ST_FUNC int tcc_load_object_file(TCCState *s1,
int fd, unsigned long file_offset)
{// -----
ElfW(Ehdr) ehdr;
ElfW(Shdr) *shdr, *sh;
int size, i, j, offset, offseti, nb_syms, sym_index, ret;
unsigned char *strsec, *strtab;
int *old_to_new_syms;
char *sh_name, *name;
SectionMergeInfo *sm_table, *sm;
ElfW(Sym) *sym, *symtab;
ElfW_Rel *rel, *rel_end;
Section *s;
int stab_index;
int stabstr_index;
stab_index = stabstr_index = 0;

//每个ELF文件都以一个elf头开始，对elf文件的分析起始于头部

if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
goto fail1;
//
if (ehdr.e_ident[0] != ELFMAG0 ||
ehdr.e_ident[1] != ELFMAG1 ||
ehdr.e_ident[2] != ELFMAG2 ||
ehdr.e_ident[3] != ELFMAG3)
goto fail1;

该函数只分析可重定位目标文件

/* test if object file */
if (ehdr.e_type != ET_REL)
goto fail1;
/* test CPU specific stuff */
if (ehdr.e_ident[5] != ELFDATA2LSB ||
ehdr.e_machine != EM_TCC_TARGET) {
tcc支持不少平台
在i386-gen.c中 #define EM_TCC_TARGET EM_386
fail1:
error_noabort("invalid object file");
return -1;
}

     目标文件不需要 PHT, 目标文件包含若干节，比如数据，代码，符号表等等

每个节的大小是固定的大小，比如32位上

typedef struct
{
Elf32_Word    sh_name;                /* Section name (string tbl index) */
Elf32_Word    sh_type;                /* Section type */
Elf32_Word    sh_flags;               /* Section flags */
Elf32_Addr    sh_addr;                /* Section virtual addr at execution */
Elf32_Off     sh_offset;              /* Section file offset */
Elf32_Word    sh_size;                /* Section size in bytes */
Elf32_Word    sh_link;                /* Link to another section */
Elf32_Word    sh_info;                /* Additional section information */
Elf32_Word    sh_addralign;           /* Section alignment */
Elf32_Word    sh_entsize;             /* Entry size if section holds table */
} Elf32_Shdr;

e_shnum指定有多少节
shdr指向一个Elf32_Shdr结构体数组, 即节头表，用来描述各个节的信息
Elf32_Shdr各个字段对于不同的节有不同的解释，这里只分析ELF_REL

/* read sections */
shdr = load_data(fd, file_offset + ehdr.e_shoff,
sizeof(ElfW(Shdr)) * ehdr.e_shnum);

sm_table = tcc_mallocz(sizeof(SectionMergeInfo) * ehdr.e_shnum);
每节都有名字，所有的节名字也构成一个节，它在节头表中的位置由e_shstrndx给出
/* load section names */
sh = &shdr[ehdr.e_shstrndx];
strsec = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
/* load symtab and strtab */
old_to_new_syms = NULL;
symtab = NULL;
strtab = NULL;
nb_syms = 0;
索引0是保留的，用来表示未定义
for(i = 1; i < ehdr.e_shnum; i++) {
sh = &shdr[i];
if (sh->sh_type == SHT_SYMTAB) {
if (symtab) {
error_noabort("object must contain only one symtab");
fail:
ret = -1;
goto the_end;
}
nb_syms = sh->sh_size / sizeof(ElfW(Sym));
symtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
symtab_section由tcc建立，是符号表的内存表示
sm_table[i].s = symtab_section;
符号表中的每个符号都有名字，比如"printf", "fopen"等, 符号表的名字也有一个节
节号由符号表的sh_link字段给出
/* now load strtab */
sh = &shdr[sh->sh_link];
strtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
}
}
链接用于把所有obj, lib合并，并重定位，指出起始符号(默认是_start)，合并obj的功能在这里完成
比如大多obj中都有的.text节，最后都会合并到text_section，这是text节在内存中的表示
/* now examine each section and try to merge its content with the
ones in memory */
for(i = 1; i < ehdr.e_shnum; i++) {
/* no need to examine section name strtab */
if (i == ehdr.e_shstrndx)
continue;
sh = &shdr[i];
sh_name = strsec + sh->sh_name;
/* ignore sections types we do not handle */
if (sh->sh_type != SHT_PROGBITS &&
sh->sh_type != SHT_RELX &&
#ifdef TCC_ARM_EABI
sh->sh_type != SHT_ARM_EXIDX &&
#endif
sh->sh_type != SHT_NOBITS &&
sh->sh_type != SHT_PREINIT_ARRAY &&
sh->sh_type != SHT_INIT_ARRAY &&
sh->sh_type != SHT_FINI_ARRAY &&
strcmp(sh_name, ".stabstr")
)
continue;
if (sh->sh_addralign < 1)
sh->sh_addralign = 1;
/* find corresponding section, if any */
for(j = 1; j < s1->nb_sections;j++) {
s = s1->sections[j];
if (!strcmp(s->name, sh_name)) {
if (!strncmp(sh_name, ".gnu.linkonce",
sizeof(".gnu.linkonce") - 1)) {
/* if a 'linkonce' section is already present, we
do not add it again. It is a little tricky as
symbols can still be defined in
it. */
linkonce用于不同原文件需要，但是链接一次的情况
比如C++的函数模板需要根据参数的类型生成不同的代码，函数模板必须定义，编译器无法判断
别的原文件是否会生成同样的代码，链接器负责删除重复的，也就是linkonce
sm_table[i].link_once = 1;
goto next;
} else {
goto found;
}
}
}
/* not found: create new section */
s = new_section(s1, sh_name, sh->sh_type, sh->sh_flags);
/* take as much info as possible from the section. sh_link and
sh_info will be updated later */
s->sh_addralign = sh->sh_addralign;
s->sh_entsize = sh->sh_entsize;
sm_table[i].new_section = 1;
found:
if (sh->sh_type != s->sh_type) {
error_noabort("invalid section type");
goto fail;
}
/* align start of section */
offset = s->data_offset;
if (0 == strcmp(sh_name, ".stab")) {
stab_index = i;
goto no_align;
}
if (0 == strcmp(sh_name, ".stabstr")) {
stabstr_index = i;
goto no_align;
}
size = sh->sh_addralign - 1;
offset = (offset + size) & ~size;
if (sh->sh_addralign > s->sh_addralign)
s->sh_addralign = sh->sh_addralign;
s->data_offset = offset;
no_align:
sm_table[i].offset = offset;
sm_table[i].s = s;
/* concatenate sections */
size = sh->sh_size;
if (sh->sh_type != SHT_NOBITS) {
unsigned char *ptr;
lseek(fd, file_offset + sh->sh_offset, SEEK_SET);
ptr = section_ptr_add(s, size);
read(fd, ptr, size);
} else {
s->data_offset += size;
}
next: ;
}
/* //gr relocate stab strings */
if (stab_index && stabstr_index) {
Stab_Sym *a, *b;
unsigned o;
s = sm_table[stab_index].s;
a = (Stab_Sym *)(s->data + sm_table[stab_index].offset);
b = (Stab_Sym *)(s->data + s->data_offset);
o = sm_table[stabstr_index].offset;
while (a < b)
a->n_strx += o, a++;
}
/* second short pass to update sh_link and sh_info fields of new
sections */
for(i = 1; i < ehdr.e_shnum; i++) {
s = sm_table[i].s;
if (!s || !sm_table[i].new_section)
continue;
sh = &shdr[i];
if (sh->sh_link > 0)
s->link = sm_table[sh->sh_link].s;
if (sh->sh_type == SHT_RELX) {
s->sh_info = sm_table[sh->sh_info].s->sh_num;
/* update backward link */
该节需要被重定位，重定位节是s
s1->sections[s->sh_info]->reloc = s;
}
}
sm = sm_table;
/* resolve symbols */
old_to_new_syms = tcc_mallocz(nb_syms * sizeof(int));
合并之后符号的索引就变了
sym = symtab + 1;
for(i = 1; i < nb_syms; i++, sym++) {
if (sym->st_shndx != SHN_UNDEF &&
sym->st_shndx < SHN_LORESERVE) {
sm = &sm_table[sym->st_shndx];
if (sm->link_once) {
/* if a symbol is in a link once section, we use the
already defined symbol. It is very important to get
correct relocations */
if (ELFW(ST_BIND)(sym->st_info) != STB_LOCAL) {
name = strtab + sym->st_name;
sym_index = find_elf_sym(symtab_section, name);
if (sym_index)
old_to_new_syms[i] = sym_index;
}
continue;
}
/* if no corresponding section added, no need to add symbol */
if (!sm->s)
continue;
/* convert section number */
sym->st_shndx = sm->s->sh_num;
/* offset value */
sym->st_value += sm->offset;
}
/* add symbol */
name = strtab + sym->st_name;
sym_index = add_elf_sym(symtab_section, sym->st_value, sym->st_size,
sym->st_info, sym->st_other,
sym->st_shndx, name);
old_to_new_syms[i] = sym_index;
}
/* third pass to patch relocation entries */
for(i = 1; i < ehdr.e_shnum; i++) {
s = sm_table[i].s;
if (!s)
continue;
sh = &shdr[i];
offset = sm_table[i].offset;
switch(s->sh_type) {
case SHT_RELX:
/* take relocation offset information */
offseti = sm_table[sh->sh_info].offset;
rel_end = (ElfW_Rel *)(s->data + s->data_offset);
for(rel = (ElfW_Rel *)(s->data + offset);
rel < rel_end;
rel++) {
int type;
unsigned sym_index;
/* convert symbol index */
type = ELFW(R_TYPE)(rel->r_info);
sym_index = ELFW(R_SYM)(rel->r_info);
/* NOTE: only one symtab assumed */
if (sym_index >= nb_syms)
goto invalid_reloc;
sym_index = old_to_new_syms[sym_index];
/* ignore link_once in rel section. */
if (!sym_index && !sm->link_once
#ifdef TCC_TARGET_ARM
&& type != R_ARM_V4BX
#endif
) {
invalid_reloc:
error_noabort("Invalid relocation entry [%2d] '%s' @ %.8x",
i, strsec + sh->sh_name, rel->r_offset);
goto fail;
}
rel->r_info = ELFW(R_INFO)(sym_index, type);
/* offset the relocation offset */
rel->r_offset += offseti;
}
break;
default:
break;
}
}
ret = 0;
the_end:
tcc_free(symtab);
tcc_free(strtab);
tcc_free(old_to_new_syms);
tcc_free(sm_table);
tcc_free(strsec);
tcc_free(shdr);
return ret;
}

阅读(3654) | 评论(0) | 转发(0) |

上一篇：Linus又骂人stupid

下一篇：tcc 一个预处理bug

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6