Search code examples
assemblyreverse-engineeringelfdisassemblyobjdump

Can I combine all the sections "Objdump -S -d elf-file" generate into a re-assemble capable file?


THe elf file is static linked

and currently the objdump's output is something like:

Disassembly of section: .init:

xxxxxx

Disassembly of section: .plt:

xxxxxx

Disassembly of section: .text:

xxxxxx

basically what I want to achieve is

"elf-file -(disassemble by objdump)-> assemble file --(re-compile)--> same functionality"

I don't need the re-compiled binary has the binary content same as the original one, only same functionality is enough.

After a quick search, basically the answer is no, and they argued that disassemble file lost some stuff like symbolic information or others, but I think by static link, I can get rid of this issue...

Thank you!


Solution

  • objdump -S -d elf-file is not usually sufficient, as it lacks .data section.

    But it seems that objdump -S -D elf-file is sufficient.

    To try this, I wrote a small x86-64 assembly file that uses extern printf, assembled it with YASM without debug symbols and linked with GCC.

    [bits 64]
    
    ; yasm -f elf64 -m amd64 1st_generation.asm -o 1st_generation.o; gcc -o 1st_generation 1st_generation.o
    
    section .text
    global main
    extern printf
    
    main:
        push    rbp
        mov     rbp,rsp
        lea     rdi,[msg]
        mov     rsi,[num]
        xor     eax,eax
        call    printf
        mov     eax,60
        xor     ebx,ebx
        syscall
    
    section .data
    
    msg db 'abcdef = %d', 0xa, 0
    num dd 1337
    
    testmsg1:
    db "test 01", 0x0a, 0
    

    Then I disassembled it with objdump -S -D -M intel elf-file >objdump_output.txt. -M intel produces the disassembly in Intel format. AT&T would work too, but I prefer Intel format for its clarity.

    Then I wrote a small gawk program objdump_to_asm to convert the disassembly produced by objdump -S -D -M intel elf-file >objdump_output.txt into a suitable format for YASM. Assumes x86-64 code and main as entry point. Can be easily edited to different kinds of environment (x86 is trivial, others may need more work). Usage ./objdump_to_asm objdump_output.txt. Interestingly 1st generation executable has size of 6598 bytes, whereas 2nd generation executable has size of only 6496 bytes. 3rd generation assembly code is identical with the 2nd generation assembly code.

    Here's the code:

    #!/usr/bin/awk -f
    BEGIN{
        disassembly_of_section_string = "Disassembly of section ";
    
        sections_to_discard[1] = ".interp";
        sections_to_discard[2] = ".note.ABI-tag";
        sections_to_discard[3] = ".note.gnu.build-id";
        sections_to_discard[4] = ".dynsym";
        sections_to_discard[5] = ".dynstr";
        sections_to_discard[6] = ".hash";
        sections_to_discard[7] = ".gnu.hash";
        sections_to_discard[8] = ".gnu.version";
        sections_to_discard[9] = ".gnu.version_r";
        sections_to_discard[10] = ".rela.dyn";
        sections_to_discard[11] = ".rela.init";
        sections_to_discard[12] = ".eh_frame";
        sections_to_discard[13] = ".dynamic";
        sections_to_discard[14] = ".got";
        sections_to_discard[15] = ".got.plt";
        sections_to_discard[16] = ".jcr";
        sections_to_discard[17] = ".init_array";
        sections_to_discard[18] = ".comment";
        sections_to_discard[19] = ".note.gnu.gold-version";
    
        number_of_sections_to_discard = length(sections_to_discard);
    
        sections_to_handle[1] = ".plt";
        sections_to_handle[2] = ".text";
        sections_to_handle[3] = ".data";
        sections_to_handle[4] = ".bss";
    
        number_of_sections_to_handle = length(sections_to_handle);
    
        blocks_to_discard_in_text[1] = "<call_gmon_start>:";
        blocks_to_discard_in_text[2] = "<deregister_tm_clones>:";
        blocks_to_discard_in_text[3] = "<register_tm_clones>:";
        blocks_to_discard_in_text[4] = "<__do_global_dtors_aux>:";
        blocks_to_discard_in_text[5] = "<frame_dummy>:"
        blocks_to_discard_in_text[6] = "<__libc_csu_fini>:"
        blocks_to_discard_in_text[7] = "<__libc_csu_init>:"
        blocks_to_discard_in_text[8] = "<_start>:"; # !!!
    
        number_of_blocks_to_discard_in_text = length(blocks_to_discard_in_text);
    
        blocks_to_handle_in_text[1] = "main"
    
        number_of_blocks_to_handle_in_text = length(blocks_to_handle_in_text);
    
        blocks_to_handle_in_data[1] = "__dso_handle"
    
        number_of_blocks_to_handle_in_data = length(blocks_to_handle_in_data);
    
        externs_to_handle[1] = "printf";
    
        number_of_externs_to_handle = length(externs_to_handle);
    
        hexdump_start_byte = 11;
        disassembly_start_byte = 33;
    
        current_section = "";
    
        getline;
        getline;
    
        file_format_index = match($0, "file format elf64-x86-64")
        if (file_format_index > 0)
        {
            print "[bits 64]";
        }
    }
    {
        match_index = 0; # 0 : no match, > 0 : match.
        i = 1;           # index to sections_to_handle .
        while (i <= number_of_sections_to_handle)
        {
            match_index = match($0, (disassembly_of_section_string sections_to_handle[i]));
            if (match_index > 0) # we have a section to handle.
            {
                current_section = sections_to_handle[i];
                getline;
                break;
            }
            i++;
        }
    
        match_index = 0; # 0 : no match, > 0 : match.
        i = 1;           # index to sections_to_discard .
        while (i <= number_of_sections_to_discard)
        {
            match_index = match($0, (disassembly_of_section_string sections_to_discard[i]));
            if (match_index > 0) # we have a section to discard.
            {
                current_section = sections_to_discard[i];
                getline;
                break;
            }
            i++;
        }
    
        if (match (current_section, ".plt"))
        {
            match_index = 0; # 0 : no match, > 0 : match.
            i = 1;           # index to externs_to_handle.
    
            while (i <= number_of_externs_to_handle)
            {
                match_index = match($0, ("<" externs_to_handle[i] "@plt>:"));
    
                if (match_index > 0)    # we have an extern to handle.
                {
                    print "extern " externs_to_handle[i];
                    getline;
                    break;
                }
                i++;
            }
        }
        if (match (current_section, ".text"))
        {
            match_index = 0; # 0 : no match, > 0 : match.
            i = 1;           # index to the blocks of section .text . 
    
            while (i <= number_of_blocks_to_handle_in_text)
            {
                match_index = match($0, ("<" blocks_to_handle_in_text[i] ">:"));
    
                if (match_index > 0)    # we have a block to handle.
                {
                    print "section .text";
                    print "global main";
                    print blocks_to_handle_in_text[i] ":";
                    getline;
    
                    while ((length ($0)) > 0)
                    {
                        disassembly_without_hex_bytes = substr($0, disassembly_start_byte);
                        disassembly_without_hex_bytes = gensub(/PTR /, "", "g", disassembly_without_hex_bytes);
                        disassembly_without_hex_bytes = gensub(/(ds:)([a-z0-9]*)/, "[\\2]", "g", disassembly_without_hex_bytes);
    
                        match_index = 0; # 0 : no match, > 0 : match.
                        j = 1;           # index to externs to handle.
    
                        while (j <= number_of_externs_to_handle)
                        {
                            match_index = match(disassembly_without_hex_bytes, ("<" externs_to_handle[i] "@plt>"));
    
                            if (match_index > 0)    # we have an extern to handle.
                            {
                                current_extern_to_handle = externs_to_handle[j];
                                "echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'" |& getline disassembly_without_hex_bytes;
                                close("echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'");
                                break;
                            }
                            j++;
                        }
    
                        if (match(disassembly_without_hex_bytes, "data32") != 1)
                        {
                            print disassembly_without_hex_bytes;
                        }
                        getline;
                    }
                    break;
                }
                i++;
            }
        }
    
        if (match (current_section, ".data"))
        {
            match_index = 0; # 0 : no match, > 0 : match.
            i = 1;           # index to the blocks of section .data .
    
            while (i <= number_of_blocks_to_handle_in_data)
            {
                match_index = match($0, ("<" blocks_to_handle_in_data[i] ">:"));
    
                if (match_index > 0)    # we have a block to handle.
                {
                    print "section .data";
                    getline;
    
                    while ((length ($0)) > 0)
                    {
                        getline;
                        hexdump_only = substr($0, hexdump_start_byte, (disassembly_start_byte - hexdump_start_byte));
                        hexdump_only = gensub(/([[:alnum:]]+)/, "0x\\1", "g", hexdump_only);
                        hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
                        hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
                        if (match (hexdump_only, "0x") > 0)
                        {
                            print "db " hexdump_only;
                        }
                    }
                    break;
                }
                i++;
            }
        }
    }
    

    Executing ./objdump_to_asm objdump_output.txt >2nd_generation.asm produces the following assembly file. Assembles with YASM, links with GCC. The assembled and linked executable is not identical with the original, actually it's 6496 bytes, whereas the original executable has size of 6568 bytes.

    [bits 64]
    extern printf
    section .text
    global main
    main:
    push   rbp
    mov    rbp,rsp
    lea    rdi,[0x401958]
    
    mov    rsi,QWORD [0x401965]
    
    xor    eax,eax
    call   printf
    mov    eax,0x3c
    xor    ebx,ebx
    syscall 
    
    section .data
    db 0x61                     
    db 0x62                     
    db 0x63, 0x64, 0x65, 0x66           
    db 0x20, 0x3d, 0x20, 0x25, 0x64, 0x0a       
    db 0x00, 0x39                   
    db 0x05, 0x00, 0x00, 0x74, 0x65         
    db 0x73, 0x74                   
    db 0x20, 0x30                   
    db 0x31, 0x0a                   
    db 0x00, 0x00