Search code examples
cstringheadersymbolself

Symbol table has missing items in c


i wanted to write a code that does store symbol table and string table from the raw .elf file. My code here accomplishes that but it has missing addresses and names:

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

// ELF header structure
typedef struct {
    uint8_t e_ident[16];
    uint16_t e_type;
    uint16_t e_machine;
    uint32_t e_version;
    uint64_t e_entry;
    uint64_t e_phoff;
    uint64_t e_shoff;
    uint32_t e_flags;
    uint16_t e_ehsize;
    uint16_t e_phentsize;
    uint16_t e_phnum;
    uint16_t e_shentsize;
    uint16_t e_shnum;
    uint16_t e_shstrndx;
} Elf64_Ehdr;

// Section header structure
typedef struct {
    uint32_t sh_name;
    uint32_t sh_type;
    uint64_t sh_flags;
    uint64_t sh_addr;
    uint64_t sh_offset;
    uint64_t sh_size;
    uint32_t sh_link;
    uint32_t sh_info;
    uint64_t sh_addralign;
    uint64_t sh_entsize;
} Elf64_Shdr;

// Symbol table entry structure
typedef struct {
    uint32_t st_name;
    uint8_t st_info;
    uint8_t st_other;
    uint16_t st_shndx;
    uint32_t st_value; // normally it was uint64_t
    uint64_t st_size;
    uint32_t st_name2; // Extended name field
} Elf64_Sym;

Elf64_Sym* symbol_table;
char* string_table;
uint64_t stringSize;
size_t num_symbols;

struct SymbolEntry {
    char name[256];
    unsigned long address;
};

// Comparison function for sorting symbols based on their addresses
int compare_symbols(const void* a, const void* b) {
    const Elf64_Sym* symbol_a = (const Elf64_Sym*)a;
    const Elf64_Sym* symbol_b = (const Elf64_Sym*)b;

    if (symbol_a->st_value < symbol_b->st_value) return -1;
    if (symbol_a->st_value > symbol_b->st_value) return 1;
    return 0;
}

int findTables(){
    
    FILE* file = fopen("trace.elf", "rb");
    if (!file) {
        printf("Error opening file\n");
        return 1;
    }

    // Read the ELF header
    Elf64_Ehdr elf_header;
    if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, file) != 1) {
        printf("Error reading ELF header\n");
        fclose(file);
        return 1;
    }

    // Get the section header table offset from the ELF header
    fseek(file, elf_header.e_shoff, SEEK_SET);

    // Read the section header table
    Elf64_Shdr section_headers[elf_header.e_shnum];
    if (fread(section_headers, sizeof(Elf64_Shdr), elf_header.e_shnum, file) != elf_header.e_shnum) {
        printf("Error reading section headers\n");
        fclose(file);
        return 1;
    }

    // Find the symbol table and the associated string table (for symbol names)
    Elf64_Shdr* symbol_table_header = NULL;
    Elf64_Shdr* string_table_header = NULL;
    for (int i = 0; i < elf_header.e_shnum; i++) {
        if (section_headers[i].sh_type == 0x2) { // SHT_SYMTAB
            symbol_table_header = &section_headers[i];
        } else if (section_headers[i].sh_type == 0x3) { // SHT_STRTAB
            string_table_header = &section_headers[i];
        }

        if (symbol_table_header && string_table_header) {
            break; // Both tables found, exit the loop
        }
    }

    if (!symbol_table_header || !string_table_header) {
        printf("Symbol table or string table not found\n");
        fclose(file);
        return 1;
    }

    // Calculate the number of symbols in the symbol table
    num_symbols = symbol_table_header->sh_size / sizeof(Elf64_Sym);

    // Read the symbol table
    symbol_table = (Elf64_Sym*)malloc(symbol_table_header->sh_size);
    if (!symbol_table) {
        printf("Error allocating memory for symbol table\n");
        fclose(file);
        return 1;
    }
    fseek(file, symbol_table_header->sh_offset, SEEK_SET);
    if (fread(symbol_table, sizeof(Elf64_Sym), num_symbols, file) != num_symbols) {
        printf("Error reading symbol table\n");
        free(symbol_table);
        fclose(file);
        return 1;
    }

    // Read the string table
    string_table = (char*)malloc(string_table_header->sh_size);
    if (!string_table) {
        printf("Error allocating memory for string table\n");
        free(symbol_table);
        fclose(file);
        return 1;
    }
    fseek(file, string_table_header->sh_offset, SEEK_SET);
    if (fread(string_table, 1, string_table_header->sh_size, file) != string_table_header->sh_size) {
        printf("Error reading string table\n");
        free(symbol_table);
        free(string_table);
        fclose(file);
        return 1;
    }
    stringSize = string_table_header->sh_size;
    fclose(file);
}

void printTables(){
    printf("Address\t\tName\n");
    printf("-------\t\t----\n");

    // Print all symbols with their addresses and names
    for (size_t j = 0; j < num_symbols; j++) {
        // Check if the symbol name is valid (inside the string table)
        if (symbol_table[j].st_name < stringSize) {
            char* symbol_name = &string_table[symbol_table[j].st_name];
            printf("0x%08x\t%s\n", symbol_table[j].st_value, symbol_name);
        }
    }
}

void freeSpaces(){
    free(symbol_table);
    free(string_table);
}

int main() {

    if(findTables() == 1){
        printf("Exiting...");
        return 0;
    }
    // Sort the symbol table based on addresses
    qsort(symbol_table, num_symbols, sizeof(Elf64_Sym), compare_symbols);

    printTables();
    
    freeSpaces();

    return 0;
}

Here's some of the output of the code for trace.elf:

0x004016d0      __do_global_dtors_aux
0x00401ac1      trace_return_values
0x00401b0c      foo
0x00401b36      main
0x00401b50      handle_zhaoxin
0x00401fb0      get_common_indices.constprop.0
0x00402df0      __libc_start_main_impl

but when use the "nm -n trace.elf" command and get the symbol table that way the same section is:

00000000004016d0 t __do_global_dtors_aux
0000000000401710 t frame_dummy
0000000000401745 T get_address
0000000000401ac1 T trace_return_values
0000000000401b0c T foo
0000000000401b21 T bar
0000000000401b36 T main
0000000000401b50 t handle_zhaoxin
0000000000401d20 t handle_amd
0000000000401ed0 t call_fini
0000000000401f10 t __libc_start_call_main

as you can see some addresses and names is missing in the first output. especially "bar". Is my code wrong or there are other problems?


Solution

  • symbol table and string table from the raw .elf file.

    You should be aware that an ELF file with .e_type of ET_DYN or ET_EXEC may have two symbol tables -- the "regular" and the "dynamic" one:

    readelf -WS /tmp/a.out | egrep 'symtab|dynsym'
      [ 7] .dynsym           DYNSYM          0000000000000410 000410 000120 18   A  8   1  8
      [30] .symtab           SYMTAB          0000000000000000 003080 000468 18     31  21  8
    

    and up to three SYMTAB sections, e.g.

     readelf -WS /tmp/a.out | egrep 'STRTAB'
      [ 8] .dynstr           STRTAB          0000000000000530 000530 000139 00   A  0   0  1
      [31] .strtab           STRTAB          0000000000000000 0034e8 00035a 00      0   0  1
      [32] .shstrtab         STRTAB          0000000000000000 003842 00012c 00      0   0  1
    

    Your code is wrong in that it picks up the first STRTAB section it finds, which may not the the one SYMTAB is actually referencing.

    For example, for the executable above your code will pick .dynstr instead of .strtab (which is what you intended).

    After you locate the SYMTAB section, you must look at symbol_table_header->sh_link to find the correct STRTAB section that your SYMTAB references (the sh_link is the 9th column above; you can see that .dynsym links to section 8, and .symtab to section 31).


    P.S. Unless your program is being built on a system without /usr/include/elf.h, it's a really bad idea™ to do this:

    // ELF header structure
    typedef struct {
        uint8_t e_ident[16];
        uint16_t e_type;
        uint16_t e_machine;
    ...
    

    You should use the system-provided <elf.h> instead.

    And if you are building this on a system without <elf.h>, it is still a very bad idea to do this:

     if (section_headers[i].sh_type == 0x2) { // SHT_SYMTAB
    

    Instead, write code like this:

    #ifndef SHT_SYMTAB
    #define SHT_SYMTAB 2
    #endif
    ...
    
     if (section_headers[i].sh_type == SHT_SYMTAB) {
    ...