Search code examples
pythonreverse-engineeringida

[IDA Pro][python Script] :: How to Export binary pattern of an address up to it is end?


I wanna to export a function's binary pattern from its beginning address to its end address with python script in IDA pro. For example: sub_454345. I want to first identify in which address this function starts and then in which it ends, after that export the whole binary pattern for function sub_454345.

Output example :

sub_454345
Start Address : 0x401000
End Address : 0xD5B000
Binary Pattern : 55 8B EC 51 51 56 57 8B 7D 08 8D 45

Solution

  • This is far from being error-proof but here's a possible start:

    from __future__ import print_function
    
    def dump_instruction_opcodes(start, end):
        opcodes = list()
        for ea in xrange(start, end):
            opcodes.append(idaapi.idaapi.get_original_byte(ea))
    
        return opcodes
    
    def main():
        # get cursor position
        screen_ea = ScreenEA()
    
        func = idaapi.get_func(screen_ea)
        if not func:
            print("No function at address {:#x}".format(screen_ea))
            return
        elif not func.does_return:
            print("Function doesn't have an exit point...")
            return
        func_start = func.startEA
        func_end = func.endEA    
        func_name = GetFunctionName(func_start)
    
        print("Function '{}' starts at {:#x} and ends at {:#x}".format(func_name, func_start, func_end))
    
        # traverse code or data in function boundaries
        for head in Heads(func_start, func_end):
            # we just want code, not data
            if isCode(GetFlags(head)):
                next_head = NextHead(head, func_end)
                #if last instruction, set end to func_end
                if next_head == BADADDR:
                    next_head = func_end
                #print("Current inst: {:#x}, next inst: {:#x}".format(head, next_head))
                opcodes = dump_instruction_opcodes(head, next_head)
                printable_opcodes = ["{:02x}".format(op) for op in opcodes]
                print("Current inst: {:#x} [{}]".format(head, ' '.join(printable_opcodes)))
    
        print("[*] Done!")
    
    if __name__ == "__main__":
        main()
    

    Example

    Input:

    .text:0040F120 sub_40F120 proc near         ; CODE XREF: sub_40A8E0+2Dp
    .text:0040F120
    .text:0040F120 arg_0           = dword ptr  8
    .text:0040F120
    .text:0040F120                 push    ebp
    .text:0040F121                 mov     ebp, esp
    .text:0040F123                 push    esi
    .text:0040F124                 mov     esi, [ebp+arg_0]
    .text:0040F127                 test    esi, esi
    .text:0040F129                 jnz     short loc_40F133
    .text:0040F12B
    .text:0040F12B loc_40F12B:                             ; CODE XREF: 40F120+1Ej
    .text:0040F12B                                         ; 40F120+28j
    .text:0040F12B                 mov     eax, 0FFFFFFFEh
    ...
    

    Output:

    Function 'sub_40F120' starts at 0x40f120 and ends at 0x40f180
    Current inst: 0x40f120 [55]
    Current inst: 0x40f121 [8b ec]
    Current inst: 0x40f123 [56]
    Current inst: 0x40f124 [8b 75 08]
    Current inst: 0x40f127 [85 f6]
    Current inst: 0x40f129 [75 08]
    Current inst: 0x40f12b [b8 fe ff ff ff]
    ...
    

    The above script will work most of the time, but there can be multiple problems, including:

    • function has thunk(s) [you'll need to check if inst is a flow instruction and check if target is within function bounds or not]
    • One of the exit point(s) disrupt the CFG (syscall , int, etc.) [you'll need to trackback...]
    • Function has inner calls