Search code examples
cgocgoucontext

Golang+CGO using ucontext crash(on-purpose) crash with SIGSEGV or SIGTRAP while using different stacks


I'm currently writing a Golang + CGO program, and will use posix ucontext in CGO. Since all my core logic will be in the bind function of ucontext, we should catch up all the bad code. And I test it by accessing to the null pointer, which give me totally different behaviors that all depending on the stack location which ucontext used. Here are more details with simplified example.

If I allocate the ucontext stack on the thread's stack, it will trigger SIGSEGV. But if I allocate it on the heap, it will first trigger SIGSEGV, and then SIGTRAP while calling morestack_noctxt before calling into runtime.sigpanic. How can I fix this or how can I just get SIGSEGV? Why it need morestack?

All below are details, any suggestions or comments will be appreciated. Thanks!

Crash(with malloc stack):

fatal: morestack on g0
SIGTRAP: trace trap
PC=0x45f342 m=0 sigcode=128
signal arrived during cgo execution

goroutine 1 [syscall]:
runtime.cgocall(0x464870, 0xc000067f60)
    /usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c
main._Cfunc_core_logic()
    _cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5
main.coreLogic()
    /container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737
main.main()
    /container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777
runtime.main()
    /usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1

goroutine 2 [force gc (idle)]:
runtime.gopark(0x47a860, 0x6cb4d0, 0x11, 0x14, 0x1)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a
runtime.forcegchelper()
    /usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1
created by runtime.init.6
    /usr/local/go/src/runtime/proc.go:293 +0x25

goroutine 3 [GC sweep wait]:
runtime.gopark(0x47a860, 0x6cb640, 0xc, 0x14, 0x1)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a
runtime.bgsweep(0x0?)
    /usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998
runtime.gcenable.func1()
    /usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1
created by runtime.gcenable
    /usr/local/go/src/runtime/mgc.go:178 +0x6b

goroutine 4 [GC scavenge wait]:
runtime.gopark(0x47a860, 0x6cb6c0, 0xd, 0x14, 0x2)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd
runtime.goparkunlock(0x47ca80?, 0x1?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a
runtime.(*scavengerState).park(0x6cb6c0)
    /usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b
runtime.bgscavenge(0x0?)
    /usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25
runtime.gcenable.func2()
    /usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1
created by runtime.gcenable
    /usr/local/go/src/runtime/mgc.go:179 +0xaa

rax    0x17
rbx    0x476413
rcx    0x460c95
rdx    0x17
rdi    0x2
rsi    0x476413
rbp    0x7f18906b3ff0
rsp    0x7f18906b3fd8
r8     0xffffffff
r9     0x0
r10    0x8
r11    0x246
r12    0xc000067c70
r13    0x0
r14    0x6cb760
r15    0x0
rip    0x45f342
rflags 0x206
cs     0x33
fs     0x0
gs     0x0

Crash(with thread's stack):

fatal error: unexpected signal during runtime execution
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0]

runtime stack:
runtime.throw({0x479118?, 0xffffffffffffffff?})
    /usr/local/go/src/runtime/panic.go:1047 +0x5d fp=0x7fff293551f0 sp=0x7fff293551c0 pc=0x43417d
runtime.sigpanic()
    /usr/local/go/src/runtime/signal_unix.go:825 +0x285 fp=0x7fff29355220 sp=0x7fff293551f0 pc=0x4495a5

goroutine 1 [syscall]:
runtime.cgocall(0x464890, 0xc000067f60)
    /usr/local/go/src/runtime/cgocall.go:157 +0x5c fp=0xc000067f38 sp=0xc000067f00 pc=0x40465c
main._Cfunc_core_logic()
    _cgo_gotypes.go:39 +0x45 fp=0xc000067f60 sp=0xc000067f38 pc=0x4646e5
main.coreLogic()
    /container_share/works/badstack/main.go:46 +0x17 fp=0xc000067f70 sp=0xc000067f60 pc=0x464737
main.main()
    /container_share/works/badstack/main.go:51 +0x17 fp=0xc000067f80 sp=0xc000067f70 pc=0x464777
runtime.main()
    /usr/local/go/src/runtime/proc.go:250 +0x1d3 fp=0xc000067fe0 sp=0xc000067f80 pc=0x436913
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000067fe8 sp=0xc000067fe0 pc=0x45f4a1

goroutine 2 [force gc (idle)]:
runtime.gopark(0x47a880, 0x6cb4d0, 0x11, 0x14, 0x1)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000054f88 sp=0xc000054f58 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000054fb8 sp=0xc000054f88 pc=0x436e4a
runtime.forcegchelper()
    /usr/local/go/src/runtime/proc.go:305 +0xb0 fp=0xc000054fe0 sp=0xc000054fb8 pc=0x436b90
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000054fe8 sp=0xc000054fe0 pc=0x45f4a1
created by runtime.init.6
    /usr/local/go/src/runtime/proc.go:293 +0x25

goroutine 3 [GC sweep wait]:
runtime.gopark(0x47a880, 0x6cb640, 0xc, 0x14, 0x1)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055758 sp=0xc000055728 pc=0x436dbd
runtime.goparkunlock(0x0?, 0x0?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055788 sp=0xc000055758 pc=0x436e4a
runtime.bgsweep(0x0?)
    /usr/local/go/src/runtime/mgcsweep.go:278 +0x98 fp=0xc0000557c8 sp=0xc000055788 pc=0x421998
runtime.gcenable.func1()
    /usr/local/go/src/runtime/mgc.go:178 +0x26 fp=0xc0000557e0 sp=0xc0000557c8 pc=0x415f66
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc0000557e8 sp=0xc0000557e0 pc=0x45f4a1
created by runtime.gcenable
    /usr/local/go/src/runtime/mgc.go:178 +0x6b

goroutine 4 [GC scavenge wait]:
runtime.gopark(0x47a880, 0x6cb6c0, 0xd, 0x14, 0x2)
    /usr/local/go/src/runtime/proc.go:381 +0xfd fp=0xc000055f48 sp=0xc000055f18 pc=0x436dbd
runtime.goparkunlock(0x47caa0?, 0x1?, 0x0?, 0x0?)
    /usr/local/go/src/runtime/proc.go:387 +0x2a fp=0xc000055f78 sp=0xc000055f48 pc=0x436e4a
runtime.(*scavengerState).park(0x6cb6c0)
    /usr/local/go/src/runtime/mgcscavenge.go:400 +0x4b fp=0xc000055fa0 sp=0xc000055f78 pc=0x41f44b
runtime.bgscavenge(0x0?)
    /usr/local/go/src/runtime/mgcscavenge.go:628 +0x45 fp=0xc000055fc8 sp=0xc000055fa0 pc=0x41fa25
runtime.gcenable.func2()
    /usr/local/go/src/runtime/mgc.go:179 +0x26 fp=0xc000055fe0 sp=0xc000055fc8 pc=0x415f06
runtime.goexit()
    /usr/local/go/src/runtime/asm_amd64.s:1598 +0x1 fp=0xc000055fe8 sp=0xc000055fe0 pc=0x45f4a1
created by runtime.gcenable
    /usr/local/go/src/runtime/mgc.go:179 +0xaa

GDB(with malloc stack):

This will call into runtime.morestack_noctxt, and end up with badstack since it's on g0's stack.

(gdb) b runtime.sigpanic
Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822.
(gdb) r
Starting program: /container_share/works/badstack/main
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7fffd05d3700 (LWP 213229)]
[New Thread 0x7fffcfdd2700 (LWP 213230)]
[New Thread 0x7fffcf5d1700 (LWP 213231)]
[New Thread 0x7fffcedd0700 (LWP 213232)]
[New Thread 0x7fffce58f700 (LWP 213233)]
[New Thread 0x7fffcdd8e700 (LWP 213234)]

Thread 1 "main" received signal SIGSEGV, Segmentation fault.
0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18
18    *ptr = 1024;
(gdb) c
Continuing.

Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822
822 func sigpanic() {
(gdb) p $rsp
$1 = (void *) 0x7fffcd58cfe8
(gdb) x/x $r14+0x10
0x6cb770 <runtime.g0+16>:   0xff7fed70
(gdb) c
Continuing.
fatal: morestack on g0

GDB(with thread's stack):

This seems all working as expected.

(gdb) b runtime.sigpanic
Breakpoint 1 at 0x449320: file /usr/local/go/src/runtime/signal_unix.go, line 822.
(gdb) r
Starting program: /container_share/works/badstack/main
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7fffd05d3700 (LWP 214288)]
[New Thread 0x7fffcfdd2700 (LWP 214289)]
[New Thread 0x7fffcf5d1700 (LWP 214290)]
[New Thread 0x7fffcedd0700 (LWP 214291)]
[New Thread 0x7fffce5cf700 (LWP 214292)]

Thread 1 "main" received signal SIGSEGV, Segmentation fault.
0x00000000004647a0 in core () at /container_share/works/badstack/main.go:18
18    *ptr = 1024;
(gdb) c
Continuing.

Thread 1 "main" hit Breakpoint 1, runtime.sigpanic () at /usr/local/go/src/runtime/signal_unix.go:822
822 func sigpanic() {
(gdb) p $rsp
$1 = (void *) 0x7fffffffd8e8
(gdb) x/x $r14+0x10
0x6cb770 <runtime.g0+16>:   0xff7fed70
(gdb) c
Continuing.
fatal error: unexpected signal during runtime execution
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x4647a0]

ENV:

❯ clang -v                                                                     
clang version 16.0.6 (Red Hat 16.0.6-2.module_el8+588+6f71ce7b)

❯ gcc -v                                                                       
gcc version 8.4.1 20200928 (Red Hat 8.4.1-1) (GCC)

❯ uname -a
Linux 6cc94b77abd7 6.4.16-orbstack-00103-g02b40eb69695 #1 SMP Wed Sep 13 10:13:30 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

Reproducer:

compile with: CC=clang CXX=clang++ CFLAGS="-g -O0" go build -gcflags="all=-N -l" main.go

package main

/*
#include <stdio.h>
#include <stddef.h>
#include <ucontext.h>
#include <stdlib.h>

static ucontext_t uctx_main, uctx_core;

void core()
{
  // core logic


  // trigger crash
  int* ptr = NULL;
  *ptr = 1024;
}

void core_logic()
{
  size_t size = 1024 * 1024;
  char stack[size]; // SIGSEGV
  //void* stack = malloc(size); // SIGTRAP

  if (getcontext(&uctx_core) == -1)
    printf("failed to getcontext");

  uctx_core.uc_stack.ss_sp = stack;
  uctx_core.uc_stack.ss_size = size;
  uctx_core.uc_link = &uctx_main;

  makecontext(&uctx_core, core, 0);

  if (swapcontext(&uctx_main, &uctx_core) == -1)
    printf("failed to swapcontext");

  printf("back\n");
}
*/
// #cgo CFLAGS: -g -O0
import "C"

func coreLogic() {
  C.core_logic()
}

func main() {
  // Call the C function from Go
  coreLogic()
}

Here is my guess, but seems not correct: when I used the heap stack, it was treat as it has stack overflow, and should more stack, but end up find it's g0, then fatal. But it seems the goroutine's stack is much more lower address than thread's stack?

Updated on 2023-09-24:

with a pure C program, it will get a SIGSEGV whatever stack I use.

#include <stdio.h>
#include <stddef.h>
#include <ucontext.h>
#include <stdlib.h>

static ucontext_t uctx_main, uctx_core;

void core()
{
  // core logic


  // trigger crash
  int* ptr = NULL;
  *ptr = 1024;
}

void core_logic()
{
  size_t size = 100 * 1024 * 1024;
  //char stack[size]; // SIGSEGV
  void* stack = malloc(size); // SIGTRAP

  if (getcontext(&uctx_core) == -1)
    printf("failed to getcontext");

  uctx_core.uc_stack.ss_sp = stack;
  uctx_core.uc_stack.ss_size = size;
  uctx_core.uc_link = &uctx_main;

  makecontext(&uctx_core, core, 0);

  if (swapcontext(&uctx_main, &uctx_core) == -1)
    printf("failed to swapcontext");

  printf("back\n");
}

void coreLogic() {
  core_logic();
}

int main() {
  coreLogic();
  return 0;
}

Solution

  • Finally I figure this out with the help of Go Team members by opening an issue in Golang's github repo, and in case you need.

    TL;DR: this is a bug which started since Go1.21, and should be fixed in Go1.22. And you may still facing this problem even you are using Go1.20 because of another bug. Maybe I will post more details later if anyone need it or you can check more in the issue I mentioned before.