Chinaunix首页 | 论坛 | 博客
  • 博客访问: 264560
  • 博文数量: 52
  • 博客积分: 1379
  • 博客等级: 大尉
  • 技术积分: 525
  • 用 户 组: 普通用户
  • 注册时间: 2006-06-18 17:34
文章分类

全部博文(52)

文章存档

2011年(48)

2010年(4)

分类: LINUX

2011-02-25 10:48:16

网上有好多介绍ELF的文章,基本内容大多来自ELF的标准

Tool Interface Standard (TIS)
Executable and Linking Format (ELF)
Specification
Version 1.2

google elf pdf就能搜索到不少
通过一段代码来学习还是比较容易,下面的代码来自一个实用编译器tcc
主页是
因为tcc包含编译和链接功能,这段代码属于链接器的部分
有些项目(比如linux kernel)可能会有成千上万个.o,如果一个.c代码改变,从而链接成千上万个.o也是非常消耗时间的
所以一般也是会把不同的子目录下的.o合并成一个整体,这样,链接不再需要从头开始
ld -r参数就是由若干个.o 生成一个新的.o


  1. /* load an object file and merge it with current files */
  2. /* XXX: handle correctly stab (debug) info */
  3. ST_FUNC int tcc_load_object_file(TCCState *s1,
  4.                                 int fd, unsigned long file_offset)
  5. {// -----
  6.     ElfW(Ehdr) ehdr;
  7.     ElfW(Shdr) *shdr, *sh;
  8.     int size, i, j, offset, offseti, nb_syms, sym_index, ret;
  9.     unsigned char *strsec, *strtab;
  10.     int *old_to_new_syms;
  11.     char *sh_name, *name;
  12.     SectionMergeInfo *sm_table, *sm;
  13.     ElfW(Sym) *sym, *symtab;
  14.     ElfW_Rel *rel, *rel_end;
  15.     Section *s;

  16.     int stab_index;
  17.     int stabstr_index;

  18.     stab_index = stabstr_index = 0;

     //每个ELF文件都以一个elf头开始,对elf文件的分析起始于头部

  1.     if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
  2.         goto fail1;
  3.     //
  4.     if (ehdr.e_ident[0] != ELFMAG0 ||
  5.         ehdr.e_ident[1] != ELFMAG1 ||
  6.         ehdr.e_ident[2] != ELFMAG2 ||
  7.         ehdr.e_ident[3] != ELFMAG3)
  8.         goto fail1;
     该函数只分析可重定位目标文件


  1.     /* test if object file */
  2.     if (ehdr.e_type != ET_REL)
  3.         goto fail1;
  4.     
  5.    
  6.     /* test CPU specific stuff */
  7.     if (ehdr.e_ident[5] != ELFDATA2LSB ||
  8.         ehdr.e_machine != EM_TCC_TARGET) {
  9.     tcc支持不少平台
  10.     在i386-gen.c中 #define EM_TCC_TARGET EM_386

  11.     fail1:
  12.         error_noabort("invalid object file");
  13.         return -1;
  14.     }

    
     目标文件不需要 PHT, 目标文件包含若干节,比如数据,代码,符号表等等
    
  1. 每个节的大小是固定的大小, 比如32位上

typedef struct
{
  Elf32_Word    sh_name;                /* Section name (string tbl index) */
  Elf32_Word    sh_type;                /* Section type */
  Elf32_Word    sh_flags;               /* Section flags */
  Elf32_Addr    sh_addr;                /* Section virtual addr at execution */
  Elf32_Off     sh_offset;              /* Section file offset */
  Elf32_Word    sh_size;                /* Section size in bytes */
  Elf32_Word    sh_link;                /* Link to another section */
  Elf32_Word    sh_info;                /* Additional section information */
  Elf32_Word    sh_addralign;           /* Section alignment */
  Elf32_Word    sh_entsize;             /* Entry size if section holds table */
} Elf32_Shdr;


e_shnum指定有多少节
shdr指向一个Elf32_Shdr结构体数组, 即节头表,用来描述各个节的信息
Elf32_Shdr各个字段对于不同的节有不同的解释,这里只分析ELF_REL

  1.     /* read sections */
  2.     shdr = load_data(fd, file_offset + ehdr.e_shoff,
  3.                      sizeof(ElfW(Shdr)) * ehdr.e_shnum);

  1.     sm_table = tcc_mallocz(sizeof(SectionMergeInfo) * ehdr.e_shnum);
  2.    
  3.     每节都有名字, 所有的节名字也构成一个节,它在节头表中的位置由e_shstrndx给出

  4.     /* load section names */
  5.     sh = &shdr[ehdr.e_shstrndx];
  6.     strsec = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);

  7.     /* load symtab and strtab */
  8.     old_to_new_syms = NULL;
  9.     symtab = NULL;
  10.     strtab = NULL;
  11.     nb_syms = 0;

  12.     索引0是保留的,用来表示未定义

  13.     for(i = 1; i < ehdr.e_shnum; i++) {
  14.         sh = &shdr[i];
  15.         if (sh->sh_type == SHT_SYMTAB) {
  16.             if (symtab) {
  17.                 error_noabort("object must contain only one symtab");
  18.             fail:
  19.                 ret = -1;
  20.                 goto the_end;
  21.             }
  22.             nb_syms = sh->sh_size / sizeof(ElfW(Sym));
  23.             symtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
  24.             symtab_section由tcc建立,是符号表的内存表示
  25.             sm_table[i].s = symtab_section;
  26.             
  27.             符号表中的每个符号都有名字,比如"printf", "fopen"等, 符号表的名字也有一个节
  28.             节号由符号表的sh_link字段给出
  29.             /* now load strtab */
  30.             sh = &shdr[sh->sh_link];
  31.             strtab = load_data(fd, file_offset + sh->sh_offset, sh->sh_size);
  32.         }
  33.     }
  34.     
  35.     链接用于把所有obj, lib合并,并重定位,指出起始符号(默认是_start),合并obj的功能在这里完成
  36.     比如大多obj中都有的.text节,最后都会合并到text_section,这是text节在内存中的表示
  37.     /* now examine each section and try to merge its content with the
  38.        ones in memory */
  39.     for(i = 1; i < ehdr.e_shnum; i++) {
  40.         /* no need to examine section name strtab */
  41.         if (i == ehdr.e_shstrndx)
  42.             continue;
  43.         sh = &shdr[i];
  44.         sh_name = strsec + sh->sh_name;
  45.         /* ignore sections types we do not handle */
  46.         if (sh->sh_type != SHT_PROGBITS &&
  47.             sh->sh_type != SHT_RELX &&
  48. #ifdef TCC_ARM_EABI
  49.             sh->sh_type != SHT_ARM_EXIDX &&
  50. #endif
  51.             sh->sh_type != SHT_NOBITS &&
  52.             sh->sh_type != SHT_PREINIT_ARRAY &&
  53.             sh->sh_type != SHT_INIT_ARRAY &&
  54.             sh->sh_type != SHT_FINI_ARRAY &&
  55.             strcmp(sh_name, ".stabstr")
  56.             )
  57.             continue;
  58.         if (sh->sh_addralign < 1)
  59.             sh->sh_addralign = 1;
  60.         /* find corresponding section, if any */
  61.         for(j = 1; j < s1->nb_sections;j++) {
  62.             s = s1->sections[j];
  63.             if (!strcmp(s->name, sh_name)) {
  64.                 if (!strncmp(sh_name, ".gnu.linkonce",
  65.                              sizeof(".gnu.linkonce") - 1)) {
  66.                     /* if a 'linkonce' section is already present, we
  67.                        do not add it again. It is a little tricky as
  68.                        symbols can still be defined in
  69.                        it. */
  70.                     linkonce用于不同原文件需要,但是链接一次的情况
  71.                     比如C++的函数模板需要根据参数的类型生成不同的代码,函数模板必须定义,编译器无法判断
  72.                     别的原文件是否会生成同样的代码,链接器负责删除重复的,也就是linkonce
  73.                     sm_table[i].link_once = 1;
  74.                     goto next;
  75.                 } else {
  76.                     goto found;
  77.                 }
  78.             }
  79.         }
  80.         /* not found: create new section */
  81.         s = new_section(s1, sh_name, sh->sh_type, sh->sh_flags);
  82.         /* take as much info as possible from the section. sh_link and
  83.            sh_info will be updated later */
  84.         s->sh_addralign = sh->sh_addralign;
  85.         s->sh_entsize = sh->sh_entsize;
  86.         sm_table[i].new_section = 1;
  87.     found:
  88.         if (sh->sh_type != s->sh_type) {
  89.             error_noabort("invalid section type");
  90.             goto fail;
  91.         }

  92.         /* align start of section */
  93.         offset = s->data_offset;

  94.         if (0 == strcmp(sh_name, ".stab")) {
  95.             stab_index = i;
  96.             goto no_align;
  97.         }
  98.         if (0 == strcmp(sh_name, ".stabstr")) {
  99.             stabstr_index = i;
  100.             goto no_align;
  101.         }

  102.         size = sh->sh_addralign - 1;
  103.         offset = (offset + size) & ~size;
  104.         if (sh->sh_addralign > s->sh_addralign)
  105.             s->sh_addralign = sh->sh_addralign;
  106.         s->data_offset = offset;
  107.     no_align:
  108.         sm_table[i].offset = offset;
  109.         sm_table[i].s = s;
  110.         /* concatenate sections */
  111.         size = sh->sh_size;
  112.         if (sh->sh_type != SHT_NOBITS) {
  113.             unsigned char *ptr;
  114.             lseek(fd, file_offset + sh->sh_offset, SEEK_SET);
  115.             ptr = section_ptr_add(s, size);
  116.             read(fd, ptr, size);
  117.         } else {
  118.             s->data_offset += size;
  119.         }
  120.     next: ;
  121.     }

  122.     /* //gr relocate stab strings */
  123.     if (stab_index && stabstr_index) {
  124.         Stab_Sym *a, *b;
  125.         unsigned o;
  126.         s = sm_table[stab_index].s;
  127.         a = (Stab_Sym *)(s->data + sm_table[stab_index].offset);
  128.         b = (Stab_Sym *)(s->data + s->data_offset);
  129.         o = sm_table[stabstr_index].offset;
  130.         while (a < b)
  131.             a->n_strx += o, a++;
  132.     }

  133.     /* second short pass to update sh_link and sh_info fields of new
  134.        sections */
  135.     for(i = 1; i < ehdr.e_shnum; i++) {
  136.         s = sm_table[i].s;
  137.         if (!s || !sm_table[i].new_section)
  138.             continue;
  139.         sh = &shdr[i];
  140.         if (sh->sh_link > 0)
  141.             s->link = sm_table[sh->sh_link].s;
  142.         if (sh->sh_type == SHT_RELX) {
  143.             s->sh_info = sm_table[sh->sh_info].s->sh_num;
  144.             /* update backward link */
  145.             该节需要被重定位,重定位节是s
  146.             s1->sections[s->sh_info]->reloc = s;
  147.         }
  148.     }
  149.     sm = sm_table;

  150.     /* resolve symbols */
  151.     old_to_new_syms = tcc_mallocz(nb_syms * sizeof(int));
  152.     合并之后符号的索引就变了

  153.     sym = symtab + 1;
  154.     for(i = 1; i < nb_syms; i++, sym++) {
  155.         if (sym->st_shndx != SHN_UNDEF &&
  156.             sym->st_shndx < SHN_LORESERVE) {
  157.             sm = &sm_table[sym->st_shndx];
  158.             if (sm->link_once) {
  159.                 /* if a symbol is in a link once section, we use the
  160.                    already defined symbol. It is very important to get
  161.                    correct relocations */
  162.                 if (ELFW(ST_BIND)(sym->st_info) != STB_LOCAL) {
  163.                     name = strtab + sym->st_name;
  164.                     sym_index = find_elf_sym(symtab_section, name);
  165.                     if (sym_index)
  166.                         old_to_new_syms[i] = sym_index;
  167.                 }
  168.                 continue;
  169.             }
  170.             /* if no corresponding section added, no need to add symbol */
  171.             if (!sm->s)
  172.                 continue;
  173.             /* convert section number */
  174.             sym->st_shndx = sm->s->sh_num;
  175.             /* offset value */
  176.             sym->st_value += sm->offset;
  177.         }
  178.         /* add symbol */
  179.         name = strtab + sym->st_name;
  180.         sym_index = add_elf_sym(symtab_section, sym->st_value, sym->st_size,
  181.                                 sym->st_info, sym->st_other,
  182.                                 sym->st_shndx, name);
  183.         old_to_new_syms[i] = sym_index;
  184.     }

  185.     /* third pass to patch relocation entries */
  186.     for(i = 1; i < ehdr.e_shnum; i++) {
  187.         s = sm_table[i].s;
  188.         if (!s)
  189.             continue;
  190.         sh = &shdr[i];
  191.         offset = sm_table[i].offset;
  192.         switch(s->sh_type) {
  193.         case SHT_RELX:
  194.             /* take relocation offset information */
  195.             offseti = sm_table[sh->sh_info].offset;
  196.             rel_end = (ElfW_Rel *)(s->data + s->data_offset);
  197.             for(rel = (ElfW_Rel *)(s->data + offset);
  198.                 rel < rel_end;
  199.                 rel++) {
  200.                 int type;
  201.                 unsigned sym_index;
  202.                 /* convert symbol index */
  203.                 type = ELFW(R_TYPE)(rel->r_info);
  204.                 sym_index = ELFW(R_SYM)(rel->r_info);
  205.                 /* NOTE: only one symtab assumed */
  206.                 if (sym_index >= nb_syms)
  207.                     goto invalid_reloc;
  208.                 sym_index = old_to_new_syms[sym_index];
  209.                 /* ignore link_once in rel section. */
  210.                 if (!sym_index && !sm->link_once
  211. #ifdef TCC_TARGET_ARM
  212.                     && type != R_ARM_V4BX
  213. #endif
  214.                    ) {
  215.                 invalid_reloc:
  216.                     error_noabort("Invalid relocation entry [%2d] '%s' @ %.8x",
  217.                         i, strsec + sh->sh_name, rel->r_offset);
  218.                     goto fail;
  219.                 }
  220.                 rel->r_info = ELFW(R_INFO)(sym_index, type);
  221.                 /* offset the relocation offset */
  222.                 rel->r_offset += offseti;
  223.             }
  224.             break;
  225.         default:
  226.             break;
  227.         }
  228.     }
  229.     
  230.     ret = 0;
  231.  the_end:
  232.     tcc_free(symtab);
  233.     tcc_free(strtab);
  234.     tcc_free(old_to_new_syms);
  235.     tcc_free(sm_table);
  236.     tcc_free(strsec);
  237.     tcc_free(shdr);
  238.     return ret;
  239. }



阅读(3633) | 评论(0) | 转发(0) |
0

上一篇:Linus又骂人stupid

下一篇:tcc 一个预处理bug

给主人留下些什么吧!~~