#include <stdio.h>
#include <pthread.h>
void* function(void* arg){
int picture[4096][4096];
}
int main(){
int N=10, S=10;
pthread_t pids[10];
pthread_create(&pids[0], NULL, function, NULL);
pthread_join(pids[0], NULL);
return 0;
}
I compiled the above code with: gcc test.c -pthread
.
On running the executable, it crashes, displaying: Segmentation fault
.
But, if I remove the int picture[4096][4096];
definition, it doesn't crash.
What could be the reason for this?
The crashing program is:
#include <stdio.h>
#include <pthread.h>
void *function(void *arg)
{
int picture[4096][4096]; // 4096*4096*sizeof(int) = 67108864 bytes = 64 MB
}
int main()
{
pthread_t pids[10];
pthread_create(&pids[0],NULL, function, NULL);
pthread_join(pids[0],NULL);
return 0;
}
The program crashes at execution time:
$ gcc p.c -lpthread
$ ./a.out
Segmentation fault (core dumped)
The default stack size for a thread in the GLIBC/pthread is 8 MB. At thread creation time, the Thread descriptor also called Task Control Block (TCB), is stored at the bottom of the stack and a red zone (guard page of 4 KB without read/write permission is set at the top of the stack). The stack grows from the high to low addresses.
Result of the program under the control of strace
:
$ strace -f ./a.out
[...]
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fee8d4dc000
mprotect(0x7fee8d4dd000, 8388608, PROT_READ|PROT_WRITE) = 0
brk(NULL) = 0x556cf1b72000
brk(0x556cf1b93000) = 0x556cf1b93000
clone(child_stack=0x7fee8dcdbfb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTIDstrace: Process 3338 attached
, parent_tid=[3338], tls=0x7fee8dcdc700, child_tidptr=0x7fee8dcdc9d0) = 3338
[pid 3338] set_robust_list(0x7fee8dcdc9e0, 24 <unfinished ...>
[pid 3337] futex(0x7fee8dcdc9d0, FUTEX_WAIT, 3338, NULL <unfinished ...>
[pid 3338] <... set_robust_list resumed>) = 0
[pid 3338] --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_ACCERR, si_addr=0x7fee8d4dcef0} ---
[pid 3337] <... futex resumed>) = ?
[pid 3338] +++ killed by SIGSEGV (core dumped) +++
+++ killed by SIGSEGV (core dumped) +++
Segmentation fault (core dumped)
In the preceding:
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fee8d4dc000
mprotect(0x7fee8d4dd000, 8388608, PROT_READ|PROT_WRITE) = 0
clone(child_stack=0x7fee8dcdbfb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[3338], tls=0x7fee8dcdc700, child_tidptr=0x7fee8dcdc9d0) = 3338
Hence the following memory space layout:
+ +--------------------+ 0x7fee8d4dc000
| | |
4 KB | | RED ZONE |
(PROT_NONE)| | (guard page) |
+ +--------------------+ 0x7fee8d4dd000
| | |
| | |
| | ^ |
8192 KB | | | |
(PROT_READ/WRITE) | Stack |
| | | |
| | | |
| +--------------------+ 0x7fee8dcdbfb0
| | |
| | TCB + TLS |
| | |
+ +--------------------+ 0x7fee8dcdd000
The thread entry point defines a table of 4096x4096x4
bytes which is equal to 64 MB. This is too much for the 8 MB long stack area. However, we could expect no crash at all as the function defines a huge local table but there is no read/write access into it. So, no crash should occur.
The strace
logs show that the crash occurs upon access to address 0x7fee8d4dcef0 which is above the stack area in the allocated memory zone:
[pid 3338] --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_ACCERR, si_addr=0x7fee8d4dcef0} ---
It is actually in the guard page:
+ +--------------------+ 0x7fee8d4dc000
| | |
4 KB | | RED ZONE <--------- Trap @ si_addr=0x7fee8d4dcef0
(PROT_NONE)| | | si_code=SEGV_ACCERR
+ +--------------------+ 0x7fee8d4dd000
| | |
| | |
| | ^ |
8192 KB | | | |
(PROT_READ/WRITE) | Stack |
| | | |
| | | |
| +--------------------+ 0x7fee8dcdbfb0
| | |
| | TCB + TLS |
| | |
+ +--------------------+ 0x7fee8dcdd000
The core dump analysis under gdb
provides the following location for the crash:
$ gdb a.out core
[...]
(gdb) where
#0 0x00005594eb9461a0 in function (arg=<error reading variable: Cannot access memory at address 0x7fe95459ded8>) at p.c:56
#1 0x00007fe95879d609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#2 0x00007fe9586c4293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disas /m
Dump of assembler code for function function:
56 void* function(void* arg){
0x00005594eb946189 <+0>: endbr64
0x00005594eb94618d <+4>: push %rbp
0x00005594eb94618e <+5>: mov %rsp,%rbp
0x00005594eb946191 <+8>: lea -0x4000000(%rsp),%r11
0x00005594eb946199 <+16>: sub $0x1000,%rsp
=> 0x00005594eb9461a0 <+23>: orq $0x0,(%rsp)
0x00005594eb9461a5 <+28>: cmp %r11,%rsp
0x00005594eb9461a8 <+31>: jne 0x5594eb946199 <function+16>
0x00005594eb9461aa <+33>: sub $0x20,%rsp
0x00005594eb9461ae <+37>: mov %rdi,-0x4000018(%rbp)
0x00005594eb9461b5 <+44>: mov %fs:0x28,%rax
0x00005594eb9461be <+53>: mov %rax,-0x8(%rbp)
0x00005594eb9461c2 <+57>: xor %eax,%eax
57 int picture[4096][4096];
58 }
The above disassembly code of the thread entry point shows that gcc
generates stack accesses every 4 KB (memory page size). It first sets R11
register with the address of the beginning of the local table (0x4000000 is 4096x4096xsizeof(int) = 67108864 bytes):
0x00005594eb946191 <+8>: lea -0x4000000(%rsp),%r11
Then, it loops "oring" the content of the stack with 0 every 4096 bytes (0x1000):
0x00005594eb946199 <+16>: sub $0x1000,%rsp
=> 0x00005594eb9461a0 <+23>: orq $0x0,(%rsp)
0x00005594eb9461a5 <+28>: cmp %r11,%rsp
0x00005594eb9461a8 <+31>: jne 0x5594eb946199 <function+16>
Hence, the crash because at some point, the orq
instruction occurs in the guard page of the stack!
N.B.:
$ gcc p.c -lpthread -O2
$ ./a.out
The optimized disassembly code of function() is a simple "return":
$ objdump -S a.out
[...]
00000000000011f0 <function>:
11f0: f3 0f 1e fa endbr64
11f4: c3 retq
11f5: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
11fc: 00 00 00
11ff: 90 nop
As seen above, by default, the GLIBC/pthread library allocates a default stack of 8 MB. But it also provides the ability to set a stack allocated by the user or simply define the stack size with the following steps:
Here is an enhanced version of the program which defines a stack of 65 MB for the thread:
#include <stdio.h>
#include <pthread.h>
void* function(void* arg)
{
int picture[4096][4096]; // 4096*4096*sizeof(int) = 67108864 bytes = 64 MB
}
int main(void)
{
pthread_t pids[10];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 65*1024*1024);
pthread_create(&pids[0], &attr, function, NULL);
pthread_join(pids[0], NULL);
pthread_attr_destroy(&attr);
return 0;
}
Build and execution:
$ gcc p2.c -lpthread
$ ./a.out
There is no crash. With strace
, we can verify the behavior:
$ strace ./a.out
[...]
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
mmap(NULL, 68161536, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fe55afd3000
mprotect(0x7fe55afd4000, 68157440, PROT_READ|PROT_WRITE) = 0
brk(NULL) = 0x55b9d7ade000
brk(0x55b9d7aff000) = 0x55b9d7aff000
clone(child_stack=0x7fe55f0d2fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tid=[5199], tls=0x7fe55f0d3700, child_tidptr=0x7fe55f0d39d0) = 5199
futex(0x7fe55f0d39d0, FUTEX_WAIT, 5199, NULL) = 0
munmap(0x7fe55afd3000, 68161536) = 0
exit_group(0) = ?
+++ exited with 0 +++
We can see in the above traces:
mmap(NULL, 68161536, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fe55afd3000
mprotect(0x7fe55afd4000, 68157440, PROT_READ|PROT_WRITE) = 0
Hence the new memory space layout:
+ +--------------------+ 0x7fe55afd3000
| | |
4 KB | | RED ZONE |
(PROT_NONE)| | |
+ +--------------------+ 0x7fe55afd4000
| | |
| | |
| | ^ |
66560 KB | | | |
(PROT_READ/WRITE) | Stack |
| | | |
| | | |
| +--------------------+ 0x7fe55f0d2fb0
| | |
| | TCB + TLS |
| | |
+ +--------------------+ 0x7FE55F0D4000
From a simple program ending into a strange crash, we took the opportunity to study the thread's stack layout in the GLIBC/pthread library as well as the protection mechanism against the stack overflows and the stack size configuration.
However, from a program design point of view, we should never allocate so huge variables in the stack. In the current program, the table should be dynamically allocated or defined as a global variable (in Thread Local Storage) for examples. But it is another story...