I have a simple program that initializes a c style string and then initializes a character. I then use the function strcpy
to cause a buffer overflow situation which would seemingly overwrite the memory content of the character variable x
(assuming it is stored in adjacent memory).
char str[] = "Testt";
char x = 'X';
// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);
// print value of x
printf("%c\n", x);
// cause buffer overflow
strcpy(str, "Hello world");
// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);
// print address and value of x
// printf("%p: ", &x);
printf("%c\n", x);
return 0;
When run, this code produces output that looks like
0061FF29: Testt
X
0061FF29: Hello world
w
This situation shows that the buffer overflow did occur, and it caused the value of the x
variable to change from 'X'
to 'w'
.
However, if I remove the commented // printf("%p: ", &x);
on the third to last line, the buffer overflow does not cause the x
variable to be overwritten.
For clarity here is that code (notice the change on the third to last line)
char str[] = "Testt";
char x = 'X';
// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);
// print value of x
printf("%c\n", x);
// cause buffer overflow
strcpy(str, "Hello world");
// print address and value of str
printf("%p: ", &str);
printf("%s\n", str);
// print address and value of x
printf("%p: ", &x);
printf("%c\n", x);
return 0;
This causes the output to be:
0061FF2A: Testt
X
0061FF2A: Hello world
0061FF29: X
So in this situation, the buffer overflow did not overwrite the x
variable.
Why does simply printing the memory address of the x
variable have this affect on the buffer overflow situation?
edit: added in assembly for the two situations The generated assembly for the first case (no printf):
.file "hello.c"
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "%p: \0"
LC1:
.ascii "%c\12\0"
.text
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
LFB17:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $32, %esp
call ___main
movl $1953719636, 25(%esp)
movw $116, 29(%esp)
movb $88, 31(%esp)
leal 25(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
leal 25(%esp), %eax
movl %eax, (%esp)
call _puts
movsbl 31(%esp), %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
leal 25(%esp), %eax
movl $1819043144, (%eax)
movl $1870078063, 4(%eax)
movl $6581362, 8(%eax)
leal 25(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
leal 25(%esp), %eax
movl %eax, (%esp)
call _puts
movsbl 31(%esp), %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
movl $0, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE17:
.ident "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
.def _printf; .scl 2; .type 32; .endef
.def _puts; .scl 2; .type 32; .endef
and for the second situation
.file "hello.c"
.def ___main; .scl 2; .type 32; .endef
.section .rdata,"dr"
LC0:
.ascii "%p: \0"
LC1:
.ascii "%c\12\0"
.text
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
LFB17:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $32, %esp
call ___main
movl $1953719636, 26(%esp)
movw $116, 30(%esp)
movb $88, 25(%esp)
leal 26(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
leal 26(%esp), %eax
movl %eax, (%esp)
call _puts
movzbl 25(%esp), %eax
movsbl %al, %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
leal 26(%esp), %eax
movl $1819043144, (%eax)
movl $1870078063, 4(%eax)
movl $6581362, 8(%eax)
leal 26(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
leal 26(%esp), %eax
movl %eax, (%esp)
call _puts
leal 25(%esp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
movzbl 25(%esp), %eax
movsbl %al, %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
movl $0, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
LFE17:
.ident "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
.def _printf; .scl 2; .type 32; .endef
.def _puts; .scl 2; .type 32; .endef
First lets look at why a buffer overflow did not happen in the second example.
Looking at your output:
0061FF2A: Testt
X
0061FF2A: Hello world
0061FF29: X
We can see that str
is above x
on the stack.
The string "Hello world"
is taking up memory addresses 0061FF2A
through 0061FF36
The stack looks something like
0061FF29 0061FF2A 0061FF36
| | |
----------------------------
| X | H e l l o w o r l d |
----------------------------
In this case it doesn't matter how far past the end of str
we write because x
comes before str
on the stack.
Next lets look at why a buffer overflow did happen in the first example.
We can't see the addresses of each variable directly in your output however we can see their locations on the stack in the assembly.
movl $1953719636, 25(%esp)
movw $116, 29(%esp)
movb $88, 31(%esp)
The x
variable is definitely at 31(%esp)
as we see the decimal ASCII value for 'X'
being placed there.
It is not too big of a leap to assume that the 5 character string "Testt"
is being stored at 25(%esp)
as the distance between 25(%esp)
and 31(%esp)
is just enough to store 5 characters and a null terminator.
So we know str
is at 25(%esp)
and x
is at 31(%esp)
. The stack should look something like:
esp +25 +31
| | |
----------------------
| | T e s t t | X |
----------------------
Now we can easily see that str
comes before x
and it is clear to see why writing past the end of str
would cause x
to be overwritten.
Now the main question, Why did this work in the first case but not the second?
For some reason the compiler decided to place x
after str
in the first example and x
before str
in the second example.
As was pointed out in the comments, the exact location of local variables on the stack is not defined by C. The compiler can decide the order it wants things stored in and may change that order from program to program for non-obvious reasons.
Essentially, the exact location and ordering of local variables on the stack is undefined, and so undefined behavior is why the buffer overflow works in one case but not the other.