i wanted to write a code that does store symbol table and string table from the raw .elf file. My code here accomplishes that but it has missing addresses and names:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
// ELF header structure
typedef struct {
uint8_t e_ident[16];
uint16_t e_type;
uint16_t e_machine;
uint32_t e_version;
uint64_t e_entry;
uint64_t e_phoff;
uint64_t e_shoff;
uint32_t e_flags;
uint16_t e_ehsize;
uint16_t e_phentsize;
uint16_t e_phnum;
uint16_t e_shentsize;
uint16_t e_shnum;
uint16_t e_shstrndx;
} Elf64_Ehdr;
// Section header structure
typedef struct {
uint32_t sh_name;
uint32_t sh_type;
uint64_t sh_flags;
uint64_t sh_addr;
uint64_t sh_offset;
uint64_t sh_size;
uint32_t sh_link;
uint32_t sh_info;
uint64_t sh_addralign;
uint64_t sh_entsize;
} Elf64_Shdr;
// Symbol table entry structure
typedef struct {
uint32_t st_name;
uint8_t st_info;
uint8_t st_other;
uint16_t st_shndx;
uint32_t st_value; // normally it was uint64_t
uint64_t st_size;
uint32_t st_name2; // Extended name field
} Elf64_Sym;
Elf64_Sym* symbol_table;
char* string_table;
uint64_t stringSize;
size_t num_symbols;
struct SymbolEntry {
char name[256];
unsigned long address;
};
// Comparison function for sorting symbols based on their addresses
int compare_symbols(const void* a, const void* b) {
const Elf64_Sym* symbol_a = (const Elf64_Sym*)a;
const Elf64_Sym* symbol_b = (const Elf64_Sym*)b;
if (symbol_a->st_value < symbol_b->st_value) return -1;
if (symbol_a->st_value > symbol_b->st_value) return 1;
return 0;
}
int findTables(){
FILE* file = fopen("trace.elf", "rb");
if (!file) {
printf("Error opening file\n");
return 1;
}
// Read the ELF header
Elf64_Ehdr elf_header;
if (fread(&elf_header, sizeof(Elf64_Ehdr), 1, file) != 1) {
printf("Error reading ELF header\n");
fclose(file);
return 1;
}
// Get the section header table offset from the ELF header
fseek(file, elf_header.e_shoff, SEEK_SET);
// Read the section header table
Elf64_Shdr section_headers[elf_header.e_shnum];
if (fread(section_headers, sizeof(Elf64_Shdr), elf_header.e_shnum, file) != elf_header.e_shnum) {
printf("Error reading section headers\n");
fclose(file);
return 1;
}
// Find the symbol table and the associated string table (for symbol names)
Elf64_Shdr* symbol_table_header = NULL;
Elf64_Shdr* string_table_header = NULL;
for (int i = 0; i < elf_header.e_shnum; i++) {
if (section_headers[i].sh_type == 0x2) { // SHT_SYMTAB
symbol_table_header = §ion_headers[i];
} else if (section_headers[i].sh_type == 0x3) { // SHT_STRTAB
string_table_header = §ion_headers[i];
}
if (symbol_table_header && string_table_header) {
break; // Both tables found, exit the loop
}
}
if (!symbol_table_header || !string_table_header) {
printf("Symbol table or string table not found\n");
fclose(file);
return 1;
}
// Calculate the number of symbols in the symbol table
num_symbols = symbol_table_header->sh_size / sizeof(Elf64_Sym);
// Read the symbol table
symbol_table = (Elf64_Sym*)malloc(symbol_table_header->sh_size);
if (!symbol_table) {
printf("Error allocating memory for symbol table\n");
fclose(file);
return 1;
}
fseek(file, symbol_table_header->sh_offset, SEEK_SET);
if (fread(symbol_table, sizeof(Elf64_Sym), num_symbols, file) != num_symbols) {
printf("Error reading symbol table\n");
free(symbol_table);
fclose(file);
return 1;
}
// Read the string table
string_table = (char*)malloc(string_table_header->sh_size);
if (!string_table) {
printf("Error allocating memory for string table\n");
free(symbol_table);
fclose(file);
return 1;
}
fseek(file, string_table_header->sh_offset, SEEK_SET);
if (fread(string_table, 1, string_table_header->sh_size, file) != string_table_header->sh_size) {
printf("Error reading string table\n");
free(symbol_table);
free(string_table);
fclose(file);
return 1;
}
stringSize = string_table_header->sh_size;
fclose(file);
}
void printTables(){
printf("Address\t\tName\n");
printf("-------\t\t----\n");
// Print all symbols with their addresses and names
for (size_t j = 0; j < num_symbols; j++) {
// Check if the symbol name is valid (inside the string table)
if (symbol_table[j].st_name < stringSize) {
char* symbol_name = &string_table[symbol_table[j].st_name];
printf("0x%08x\t%s\n", symbol_table[j].st_value, symbol_name);
}
}
}
void freeSpaces(){
free(symbol_table);
free(string_table);
}
int main() {
if(findTables() == 1){
printf("Exiting...");
return 0;
}
// Sort the symbol table based on addresses
qsort(symbol_table, num_symbols, sizeof(Elf64_Sym), compare_symbols);
printTables();
freeSpaces();
return 0;
}
Here's some of the output of the code for trace.elf:
0x004016d0 __do_global_dtors_aux
0x00401ac1 trace_return_values
0x00401b0c foo
0x00401b36 main
0x00401b50 handle_zhaoxin
0x00401fb0 get_common_indices.constprop.0
0x00402df0 __libc_start_main_impl
but when use the "nm -n trace.elf" command and get the symbol table that way the same section is:
00000000004016d0 t __do_global_dtors_aux
0000000000401710 t frame_dummy
0000000000401745 T get_address
0000000000401ac1 T trace_return_values
0000000000401b0c T foo
0000000000401b21 T bar
0000000000401b36 T main
0000000000401b50 t handle_zhaoxin
0000000000401d20 t handle_amd
0000000000401ed0 t call_fini
0000000000401f10 t __libc_start_call_main
as you can see some addresses and names is missing in the first output. especially "bar". Is my code wrong or there are other problems?
symbol table and string table from the raw .elf file.
You should be aware that an ELF file with .e_type
of ET_DYN
or ET_EXEC
may have two symbol tables -- the "regular" and the "dynamic" one:
readelf -WS /tmp/a.out | egrep 'symtab|dynsym'
[ 7] .dynsym DYNSYM 0000000000000410 000410 000120 18 A 8 1 8
[30] .symtab SYMTAB 0000000000000000 003080 000468 18 31 21 8
and up to three SYMTAB
sections, e.g.
readelf -WS /tmp/a.out | egrep 'STRTAB'
[ 8] .dynstr STRTAB 0000000000000530 000530 000139 00 A 0 0 1
[31] .strtab STRTAB 0000000000000000 0034e8 00035a 00 0 0 1
[32] .shstrtab STRTAB 0000000000000000 003842 00012c 00 0 0 1
Your code is wrong in that it picks up the first STRTAB
section it finds, which may not the the one SYMTAB
is actually referencing.
For example, for the executable above your code will pick .dynstr
instead of .strtab
(which is what you intended).
After you locate the SYMTAB
section, you must look at symbol_table_header->sh_link
to find the correct STRTAB
section that your SYMTAB
references (the sh_link
is the 9th column above; you can see that .dynsym
links to section 8, and .symtab
to section 31).
P.S. Unless your program is being built on a system without /usr/include/elf.h
, it's a really bad idea™ to do this:
// ELF header structure
typedef struct {
uint8_t e_ident[16];
uint16_t e_type;
uint16_t e_machine;
...
You should use the system-provided <elf.h>
instead.
And if you are building this on a system without <elf.h>
, it is still a very bad idea to do this:
if (section_headers[i].sh_type == 0x2) { // SHT_SYMTAB
Instead, write code like this:
#ifndef SHT_SYMTAB
#define SHT_SYMTAB 2
#endif
...
if (section_headers[i].sh_type == SHT_SYMTAB) {
...