I am reading Smashing the Stack for Fun and Profit (in particular, this post refers to the "Buffer Overflows" section). The article is written for a 32-bit machine however I am working on a 64-bit for which I take account in my examples. One particular example is causing some issues that I cannot explain. example3.c has the functionality of overwriting the return address to skip an instruction in the main function. Here is my code:
#include <stdio.h>
void function(int a, int b, int c)
{
char buf1[5];
char buf2[10];
int *retptr;
retptr = (void*)(buf2 + 40);
(*retptr) += 8;
}
int main(void)
{
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n", x);
return 0;
}
I compile this program with gcc v4.8.2 with the following command:
gcc example3.c -o example3
Note that by default the gcc compiler appears to implement some stack
protection such as address space layout randomisation and stack canaries. I have taken into account these safety measures in my calculation of the ret
pointer value. Here
is the corresponding assembly produced by
gcc example3.c -S -fverbose-asm -o stack-protection.s
:
.file "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
# compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-stack-pro.s -fverbose-asm
# -fstack-protector -Wformat -Wformat-security
# options enabled: -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstack-protector
# -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-coalesce-vars -ftree-cselim -ftree-forwprop -ftree-loop-if-convert
# -ftree-loop-im -ftree-loop-ivcanon -ftree-loop-optimize
# -ftree-parallelize-loops= -ftree-phiprop -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-slp-vectorize -ftree-vect-loop-version
# -funit-at-a-time -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc
# -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse
# -msse2 -mtls-direct-seg-refs
.text
.globl function
.type function, @function
function:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $64, %rsp #,
movl %edi, -52(%rbp) # a, a
movl %esi, -56(%rbp) # b, b
movl %edx, -60(%rbp) # c, c
movq %fs:40, %rax #, tmp65
movq %rax, -8(%rbp) # tmp65, D.2197
xorl %eax, %eax # tmp65
leaq -32(%rbp), %rax #, tmp61
addq $40, %rax #, tmp62
movq %rax, -40(%rbp) # tmp62, ret
movq -40(%rbp), %rax # ret, tmp63
movl (%rax), %eax # *ret_1, D.2195
leal 8(%rax), %edx #, D.2195
movq -40(%rbp), %rax # ret, tmp64
movl %edx, (%rax) # D.2195, *ret_1
movq -8(%rbp), %rax # D.2197, tmp66
xorq %fs:40, %rax #, tmp66
je .L2 #,
call __stack_chk_fail #
.L2:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size function, .-function
.section .rodata
.LC0:
.string "%d\n"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $16, %rsp #,
movl $0, -4(%rbp) #, x
movl $3, %edx #,
movl $2, %esi #,
movl $1, %edi #,
call function #
movl $1, -4(%rbp) #, x
movl -4(%rbp), %eax # x, tmp61
movl %eax, %esi # tmp61,
movl $.LC0, %edi #,
movl $0, %eax #,
call printf #
movl $0, %eax #, D.2200
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",@progbits
Executing example3 has the desired effect of skipping the second assignment to x
and the program outputs 0
.
However, if instead I compile using the -fno-stack-protector
option:
gcc -fno-stack-protector example3.c -S -fverbose-asm -o no-stack-protection.s
I receive the following assembly file:
.file "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
# compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-no-stack-pro.s -fno-stack-protector
# -fverbose-asm -Wformat -Wformat-security
# options enabled: -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstrict-volatile-bitfields
# -fsync-libcalls -ftrapping-math -ftree-coalesce-vars -ftree-cselim
# -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
# -ftree-loop-optimize -ftree-parallelize-loops= -ftree-phiprop -ftree-pta
# -ftree-reassoc -ftree-scev-cprop -ftree-slp-vectorize
# -ftree-vect-loop-version -funit-at-a-time -funwind-tables -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
# -maccumulate-outgoing-args -malign-stringops -mfancy-math-387
# -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4
# -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs
.text
.globl function
.type function, @function
function:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
movl %edi, -36(%rbp) # a, a
movl %esi, -40(%rbp) # b, b
movl %edx, -44(%rbp) # c, c
leaq -32(%rbp), %rax #, tmp61
addq $40, %rax #, tmp62
movq %rax, -8(%rbp) # tmp62, ret
movq -8(%rbp), %rax # ret, tmp63
movl (%rax), %eax # *ret_1, D.2195
leal 8(%rax), %edx #, D.2195
movq -8(%rbp), %rax # ret, tmp64
movl %edx, (%rax) # D.2195, *ret_1
popq %rbp #
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size function, .-function
.section .rodata
.LC0:
.string "%d\n"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
subq $16, %rsp #,
movl $0, -4(%rbp) #, x
movl $3, %edx #,
movl $2, %esi #,
movl $1, %edi #,
call function #
movl $1, -4(%rbp) #, x
movl -4(%rbp), %eax # x, tmp61
movl %eax, %esi # tmp61,
movl $.LC0, %edi #,
movl $0, %eax #,
call printf #
movl $0, %eax #, D.2196
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
.section .note.GNU-stack,"",@progbits
and the corresponding executable does not produce the desired value of 0 but a random value which I cannot reconcile with the assembly file.
My mental picture of the stack frame in the -fno-stack-protector
case is (sfp = saved frame pointer, ret = return address):
low memory address buf2 (16 bytes) buf1 (8 bytes) retptr (8 bytes) sfp (8 bytes) ret high memory address
<--- [ ][ ][ ][ ][ ] ...
top of stack bottom of stack
My Question:
Am I miscalculating the position of the return address in the unprotected case?
Am I miscalculating the position of the return address in the unprotected case?
That part is correct, at least as long as the address fits in an int. The correct type for retptr
would be long
with x86-64 asm, so that the pointer can hold a 64 bit address.
You could double check that by running the following program:
#include <stdio.h>
void function(int a, int b, int c)
{
char buf1[5];
char buf2[10];
int *retptr;
retptr = (void*)(buf2 + 40);
printf("retptr points to: %p\n", (long*)(long)*retptr);
(*retptr) += 8;
}
int main(void)
{
int x;
printf("ret address is %p\n", &&label);
x = 0;
function(1,2,3);
label:
x = 1;
printf("%d\n", x);
return 0;
}
By running this, you should be able to confirm that the address right after the function
in is the one also held by retptr
.
I believe that the reason you're not getting the expected 0
, lies in this line:
(*retptr) += 8;
On my 64 bit system, x = 1
is compiled as:
40058a: c7 45 fc 01 00 00 00 movl $0x1,-0x4(%rbp)
400591: 8b 45 fc mov -0x4(%rbp),%eax
400594: 89 c6 mov %eax,%esi
The first line loads 1
in x
and the two other lines pass the value of x as an argument to printf()
. Notice how that's 7 bytes, and not 8. If you change the increment to 7, you should see 0
, as you expected.
Effectively, by adding 8, the ret
instruction has setup the instruction pointer to point to 45
, rather than 8b
.
That code then becomes:
45 fc rex.RB cld
89 c6 mov %eax,%esi
I'm not entirely sure what happens at that point, and I suspect that depends on the CPU model. Mine appears to skip the instructions until mov %eax,%esi
, and so printf
displays the value of whatever %eax
was. If you look at the disassembly of function()
, it turns out that %rax
is used to store the value of retptr
, and that's the seemingly random value that gets printed.