Search code examples
assemblygccoptimizationx86-64ld

GCC creates wrong relative jump table when encountering switch statement with multiple numerically consecutive options


When I'm building the OS, I can't use stdlib and pic. At this point the GCC optimized switch statement will cause a fatal error.It looks like gcc uses a fast jump lookup table stored in rodata when it encounters a switch with a large number of consecutive-valued options, but this table doesn't seem to be available on the kernel , ld didn't set the table correctly(GDB confirms that).

The trying of indirect jump with incorrect address (about 0x8?fffffff???????) will crash the entire system.

because the format transform from ELF to binary cannot make sure the entry point stay at the front of .text, i use a "jmpq $kinit " tiny program in a single section to make the kernel entry easier to found by bootloader

void _entry kinit(uint32_t _mbtp)
{
    ...
    struct mbt_tag_header *p = (void *)(uintptr_t)_mbtp;
    ...
    p++; // skip the head part
    while (p->type != 0)
    {
        switch (p->type)
        {
        case MBT_BASIC_MEM:
            break;
            ...(more than 3 options)
        default:
            break;
        }
        p = (struct mbt_tag_header *)((char *)p + p->size);
        p = ((uintptr_t)p % 8) ? (struct mbt_tag_header *)((char *)p + 8 - ((uintptr_t)p % 8)) : p;
    }
    ...
    while (1)
        ;
}

gcc generates the following assembly code:

Disassembly of section .text:

init.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <kinit>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   48 81 ec a0 00 00 00    sub    $0xa0,%rsp
   b:   89 bd 6c ff ff ff       mov    %edi,-0x94(%rbp)
  11:   48 c7 85 70 ff ff ff    movq   $0x7830,-0x90(%rbp)
  18:   30 78 00 00 
  1c:   48 c7 85 78 ff ff ff    movq   $0x0,-0x88(%rbp)
  23:   00 00 00 00 
  27:   48 c7 45 80 00 00 00    movq   $0x0,-0x80(%rbp)
  2e:   00 
  2f:   48 c7 45 88 00 00 00    movq   $0x0,-0x78(%rbp)
  36:   00 
  37:   48 c7 45 90 00 00 00    movq   $0x0,-0x70(%rbp)
  3e:   00 
  3f:   48 c7 45 98 00 00 00    movq   $0x0,-0x68(%rbp)
  46:   00 
  47:   48 c7 45 a0 00 00 00    movq   $0x0,-0x60(%rbp)
  4e:   00 
  4f:   48 c7 45 a8 00 00 00    movq   $0x0,-0x58(%rbp)
  56:   00 
  57:   48 c7 45 b0 00 00 00    movq   $0x0,-0x50(%rbp)
  5e:   00 
  5f:   48 c7 45 b8 00 00 00    movq   $0x0,-0x48(%rbp)
  66:   00 
  67:   48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
  6e:   00 
  6f:   48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
  76:   00 
  77:   48 c7 45 d0 00 00 00    movq   $0x0,-0x30(%rbp)
  7e:   00 
  7f:   48 c7 45 d8 00 00 00    movq   $0x0,-0x28(%rbp)
  86:   00 
  87:   48 c7 45 e0 00 00 00    movq   $0x0,-0x20(%rbp)
  8e:   00 
  8f:   c7 45 e8 00 00 00 00    movl   $0x0,-0x18(%rbp)
  96:   c6 45 ec 00             movb   $0x0,-0x14(%rbp)
  9a:   8b 85 6c ff ff ff       mov    -0x94(%rbp),%eax
  a0:   48 89 45 f8             mov    %rax,-0x8(%rbp)
  a4:   48 b8 00 00 00 00 00    movabs $0x0,%rax
  ab:   00 00 00 
  ae:   ff d0                   callq  *%rax
  b0:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
  b7:   00 00 00 
  ba:   b8 00 00 00 00          mov    $0x0,%eax
  bf:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
  c6:   00 00 00 
  c9:   ff d2                   callq  *%rdx
  cb:   48 83 45 f8 08          addq   $0x8,-0x8(%rbp)
  d0:   e9 a7 01 00 00          jmpq   27c <kinit+0x27c>
  d5:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  d9:   8b 00                   mov    (%rax),%eax
  db:   83 f8 0f                cmp    $0xf,%eax
  de:   0f 87 5e 01 00 00       ja     242 <kinit+0x242>
  e4:   89 c0                   mov    %eax,%eax
  e6:   48 8d 14 c5 00 00 00    lea    0x0(,%rax,8),%rdx
  ed:   00 
  ee:   48 b8 00 00 00 00 00    movabs $0x0,%rax
  f5:   00 00 00 
  f8:   48 01 d0                add    %rdx,%rax
  fb:   48 8b 00                mov    (%rax),%rax
  fe:   ff e0                   jmpq   *%rax
 100:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
 107:   00 00 00 
 10a:   b8 00 00 00 00          mov    $0x0,%eax
 10f:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 116:   00 00 00 
 119:   ff d2                   callq  *%rdx
 11b:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 11f:   48 83 c0 0c             add    $0xc,%rax
 123:   48 89 c7                mov    %rax,%rdi
 126:   b8 00 00 00 00          mov    $0x0,%eax
 12b:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 132:   00 00 00 
 135:   ff d2                   callq  *%rdx
 137:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
 13e:   00 00 00 
 141:   b8 00 00 00 00          mov    $0x0,%eax
 146:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 14d:   00 00 00 
 150:   ff d2                   callq  *%rdx
 152:   e9 ec 00 00 00          jmpq   243 <kinit+0x243>
 157:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
 15e:   00 00 00 
 161:   b8 00 00 00 00          mov    $0x0,%eax
 166:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 16d:   00 00 00 
 170:   ff d2                   callq  *%rdx
 172:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 176:   48 83 c0 0c             add    $0xc,%rax
 17a:   48 89 c7                mov    %rax,%rdi
 17d:   b8 00 00 00 00          mov    $0x0,%eax
 182:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 189:   00 00 00 
 18c:   ff d2                   callq  *%rdx
 18e:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
 195:   00 00 00 
 198:   b8 00 00 00 00          mov    $0x0,%eax
 19d:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 1a4:   00 00 00 
 1a7:   ff d2                   callq  *%rdx
 1a9:   e9 95 00 00 00          jmpq   243 <kinit+0x243>
 1ae:   48 bf 00 00 00 00 00    movabs $0x0,%rdi
 1b5:   00 00 00 
 1b8:   b8 00 00 00 00          mov    $0x0,%eax
 1bd:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 1c4:   00 00 00 
 1c7:   ff d2                   callq  *%rdx
 1c9:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 1cd:   48 8b 40 08             mov    0x8(%rax),%rax
 1d1:   89 c2                   mov    %eax,%edx
 1d3:   48 8d 85 70 ff ff ff    lea    -0x90(%rbp),%rax
 1da:   89 d6                   mov    %edx,%esi
 1dc:   48 89 c7                mov    %rax,%rdi
 1df:   48 b8 00 00 00 00 00    movabs $0x0,%rax
 1e6:   00 00 00 
 1e9:   ff d0                   callq  *%rax
 1eb:   48 89 c7                mov    %rax,%rdi
 1ee:   b8 00 00 00 00          mov    $0x0,%eax
 1f3:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 1fa:   00 00 00 
 1fd:   ff d2                   callq  *%rdx
 1ff:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 203:   48 8b 40 08             mov    0x8(%rax),%rax
 207:   48 89 c2                mov    %rax,%rdx
 20a:   48 b8 00 00 00 00 00    movabs $0x0,%rax
 211:   00 00 00 
 214:   48 89 50 08             mov    %rdx,0x8(%rax)
 218:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 21c:   8b 40 14                mov    0x14(%rax),%eax
 21f:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 226:   00 00 00 
 229:   89 42 10                mov    %eax,0x10(%rdx)
 22c:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 230:   8b 40 18                mov    0x18(%rax),%eax
 233:   48 ba 00 00 00 00 00    movabs $0x0,%rdx
 23a:   00 00 00 
 23d:   89 42 14                mov    %eax,0x14(%rdx)
 240:   eb 01                   jmp    243 <kinit+0x243>
 242:   90                      nop
 243:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 247:   8b 40 04                mov    0x4(%rax),%eax
 24a:   89 c0                   mov    %eax,%eax
 24c:   48 01 45 f8             add    %rax,-0x8(%rbp)
 250:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 254:   83 e0 07                and    $0x7,%eax
 257:   48 85 c0                test   %rax,%rax
 25a:   74 18                   je     274 <kinit+0x274>
 25c:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 260:   83 e0 07                and    $0x7,%eax
 263:   ba 08 00 00 00          mov    $0x8,%edx
 268:   48 29 c2                sub    %rax,%rdx
 26b:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 26f:   48 01 d0                add    %rdx,%rax
 272:   eb 04                   jmp    278 <kinit+0x278>
 274:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 278:   48 89 45 f8             mov    %rax,-0x8(%rbp)
 27c:   48 8b 45 f8             mov    -0x8(%rbp),%rax
 280:   8b 00                   mov    (%rax),%eax
 282:   85 c0                   test   %eax,%eax
 284:   0f 85 4b fe ff ff       jne    d5 <kinit+0xd5>
 28a:   48 b8 00 00 00 00 00    movabs $0x0,%rax
 291:   00 00 00 
 294:   ff d0                   callq  *%rax
 296:   48 b8 00 00 00 00 00    movabs $0x0,%rax
 29d:   00 00 00 
 2a0:   ff d0                   callq  *%rax
 2a2:   eb fe                   jmp    2a2 <kinit+0x2a2>

Linked version: (-g to be easier to read)

fffffff00020003e <kinit>:
#include <device/con/con.h>
#include <nst/printk.h>
#include <nst/types.h>
extern char* num2str32(char*,uint32_t);
void _entry kinit(uint32_t _mbtp)
{
fffffff00020003e:   55                      push   %rbp
fffffff00020003f:   48 89 e5                mov    %rsp,%rbp
fffffff000200042:   48 81 ec a0 00 00 00    sub    $0xa0,%rsp
fffffff000200049:   89 bd 6c ff ff ff       mov    %edi,-0x94(%rbp)
    char buf[125] = "0x";
...
    struct mbt_tag_header *p = (void *)(uintptr_t)_mbtp;
fffffff0002000d8:   8b 85 6c ff ff ff       mov    -0x94(%rbp),%eax
fffffff0002000de:   48 89 45 f8             mov    %rax,-0x8(%rbp)
    ...(printk function calls)
    p++; // skip the head part
fffffff000200109:   48 83 45 f8 08          addq   $0x8,-0x8(%rbp)
    while (p->type != 0)
fffffff00020010e:   e9 a7 01 00 00          jmpq   fffffff0002002ba <kinit+0x27c>
    {
        switch (p->type)
fffffff000200113:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff000200117:   8b 00                   mov    (%rax),%eax
fffffff000200119:   83 f8 0f                cmp    $0xf,%eax
fffffff00020011c:   0f 87 5e 01 00 00       ja     fffffff000200280 <kinit+0x242>
fffffff000200122:   89 c0                   mov    %eax,%eax
fffffff000200124:   48 8d 14 c5 00 00 00    lea    0x0(,%rax,8),%rdx
fffffff00020012b:   00 
fffffff00020012c:   48 b8 68 2e 20 00 f0    movabs $0xfffffff000202e68,%rax
fffffff000200133:   ff ff ff 
fffffff000200136:   48 01 d0                add    %rdx,%rax
fffffff000200139:   48 8b 00                mov    (%rax),%rax
fffffff00020013c:   ff e0                   jmpq   *%rax
        {
        case MBT_BASIC_MEM:
            break;
        case MBT_CMD:
           ...(as well)
        default:
            break;
fffffff000200280:   90                      nop
        }
        p = (struct mbt_tag_header *)((char *)p + p->size);
fffffff000200281:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff000200285:   8b 40 04                mov    0x4(%rax),%eax
fffffff000200288:   89 c0                   mov    %eax,%eax
fffffff00020028a:   48 01 45 f8             add    %rax,-0x8(%rbp)
        p = ((uintptr_t)p % 8) ? (struct mbt_tag_header *)((char *)p + 8 - ((uintptr_t)p % 8)) : p;
fffffff00020028e:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff000200292:   83 e0 07                and    $0x7,%eax
fffffff000200295:   48 85 c0                test   %rax,%rax
fffffff000200298:   74 18                   je     fffffff0002002b2 <kinit+0x274>
fffffff00020029a:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff00020029e:   83 e0 07                and    $0x7,%eax
fffffff0002002a1:   ba 08 00 00 00          mov    $0x8,%edx
fffffff0002002a6:   48 29 c2                sub    %rax,%rdx
fffffff0002002a9:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff0002002ad:   48 01 d0                add    %rdx,%rax
fffffff0002002b0:   eb 04                   jmp    fffffff0002002b6 <kinit+0x278>
fffffff0002002b2:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff0002002b6:   48 89 45 f8             mov    %rax,-0x8(%rbp)
    while (p->type != 0)
fffffff0002002ba:   48 8b 45 f8             mov    -0x8(%rbp),%rax
fffffff0002002be:   8b 00                   mov    (%rax),%eax
fffffff0002002c0:   85 c0                   test   %eax,%eax
fffffff0002002c2:   0f 85 4b fe ff ff       jne    fffffff000200113 <kinit+0xd5>
    }
    ...
    while (1)
fffffff0002002e0:   eb fe                   jmp    fffffff0002002e0 <kinit+0x2a2>

GCC -S output

    .file   "init.c"
    .text
    .section    .rodata
.LC0:
    .string "nST kernel\r\n"
.LC1:
    .string "CMD line:"
.LC2:
    .string "\r\n"
.LC3:
    .string "loaded by:"
.LC4:
    .string "frame buffer @"
    .text
    .globl  kinit
    .type   kinit, @function
kinit:
.LFB3:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    pushq   %r15
    pushq   %rbx
    subq    $160, %rsp
    .cfi_offset 15, -24
    .cfi_offset 3, -32
.L16:
    leaq    .L16(%rip), %rbx
    movabsq $_GLOBAL_OFFSET_TABLE_-.L16, %r11
    addq    %r11, %rbx
    movl    %edi, -164(%rbp)
    movq    $30768, -160(%rbp)
    ...
    movl    -164(%rbp), %eax
    movq    %rax, -24(%rbp)
    movq    %rbx, %r15
    movabsq $printk_init@PLTOFF, %rax
    addq    %rbx, %rax
    call    *%rax
    movabsq $.LC0@GOTOFF, %rax
    leaq    (%rbx,%rax), %rdi
    movq    %rbx, %r15
    movl    $0, %eax
    movabsq $printk@PLTOFF, %rdx
    addq    %rbx, %rdx
    call    *%rdx
    addq    $8, -24(%rbp)
    jmp .L2
.L13:
    movq    -24(%rbp), %rax
    movl    (%rax), %eax
    cmpl    $15, %eax
    ja  .L15
    movl    %eax, %eax
    leaq    0(,%rax,8), %rdx
    leaq    .L5(%rip), %rax
    movq    (%rdx,%rax), %rax
    leaq    .L5(%rip), %rdx
    addq    %rdx, %rax
    jmp *%rax
    .section    .rodata
    .align 8
    .align 4
.L5:
    .quad   .L15-.L5
    .quad   .L9-.L5
    .quad   .L8-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L6-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .quad   .L15-.L5
    .text
.L9:
    movabsq $.LC1@GOTOFF, %rax
    leaq    (%rbx,%rax), %rdi
    movq    %rbx, %r15
    ...
    jmp .L10
.L8:
    movabsq $.LC3@GOTOFF, %rax
    leaq    (%rbx,%rax), %rdi
    movq    %rbx, %r15
    movl    $0, %eax
    movabsq $printk@PLTOFF, %rdx
    addq    %rbx, %rdx
    call    *%rdx
    movq    -24(%rbp), %rax
    addq    $12, %rax
    movq    %rax, %rdi
    movq    %rbx, %r15
    movl    $0, %eax
    movabsq $printk@PLTOFF, %rdx
    addq    %rbx, %rdx
    call    *%rdx
    movabsq $.LC2@GOTOFF, %rax
    leaq    (%rbx,%rax), %rdi
    movq    %rbx, %r15
    movl    $0, %eax
    movabsq $printk@PLTOFF, %rdx
    addq    %rbx, %rdx
    call    *%rdx
    jmp .L10
.L6:
    movabsq $.LC4@GOTOFF, %rax
    leaq    (%rbx,%rax), %rdi
    movq    %rbx, %r15
    movl    $0, %eax
    ...
    jmp .L10
.L15:
    nop
.L10:
    movq    -24(%rbp), %rax
    movl    4(%rax), %eax
    movl    %eax, %eax
    addq    %rax, -24(%rbp)
    movq    -24(%rbp), %rax
    andl    $7, %eax
    testq   %rax, %rax
    je  .L11
    movq    -24(%rbp), %rax
    andl    $7, %eax
    movl    $8, %edx
    subq    %rax, %rdx
    movq    -24(%rbp), %rax
    addq    %rdx, %rax
    jmp .L12
.L11:
    movq    -24(%rbp), %rax
.L12:
    movq    %rax, -24(%rbp)
.L2:
    movq    -24(%rbp), %rax
    movl    (%rax), %eax
    testl   %eax, %eax
    jne .L13
    movq    %rbx, %r15
    movabsq $display_init@PLTOFF, %rax
    addq    %rbx, %rax
    call    *%rax
    movq    %rbx, %r15
    movabsq $console_init@PLTOFF, %rax
    addq    %rbx, %rax
    call    *%rax
.L14:
    jmp .L14
    .cfi_endproc
.LFE3:
    .size   kinit, .-kinit
    .ident  "GCC: (Debian 10.2.1-6) 10.2.1 20210110"
    .section    .note.GNU-stack,"",@progbits

That's the .rodata:

Contents of section .rodata:
 fffffff000202e20 00460c00 00000000 450c0000 00000000  .F......E.......
 fffffff000202e30 6e535420 6b65726e 656c0d0a 00434d44  nST kernel...CMD
 fffffff000202e40 206c696e 653a000d 0a006c6f 61646564   line:....loaded
 fffffff000202e50 2062793a 00667261 6d652062 75666665   by:.frame buffe
 fffffff000202e60 72204000 00000000 80022000 f0ffffff  r @....... .....
 fffffff000202e70 3e012000 f0ffffff 95012000 f0ffffff  >. ....... .....
 fffffff000202e80 80022000 f0ffffff 80022000 f0ffffff  .. ....... .....
 fffffff000202e90 80022000 f0ffffff 80022000 f0ffffff  .. ....... .....
 fffffff000202ea0 80022000 f0ffffff ec012000 f0ffffff  .. ....... .....
 fffffff000202eb0 80022000 f0ffffff 80022000 f0ffffff  .. ....... .....
 fffffff000202ec0 80022000 f0ffffff 80022000 f0ffffff  .. ....... .....
 fffffff000202ed0 80022000 f0ffffff 80022000 f0ffffff  .. ....... .....
 fffffff000202ee0 80022000 f0ffffff c70c2000 f0ffffff  .. ....... .....
 fffffff000202ef0 da0c2000 f0ffffff ed0c2000 f0ffffff  .. ....... .....
 fffffff000202f00 000d2000 f0ffffff 130d2000 f0ffffff  .. ....... .....
 fffffff000202f10 260d2000 f0ffffff 390d2000 f0ffffff  &. .....9. .....
 fffffff000202f20 4c0d2000 f0ffffff 5f0d2000 f0ffffff  L. ....._. .....
 fffffff000202f30 6f0d2000 f0ffffff 7f0d2000 f0ffffff  o. ....... .....
 fffffff000202f40 8f0d2000 f0ffffff 9f0d2000 f0ffffff  .. ....... .....
 fffffff000202f50 af0d2000 f0ffffff bf0d2000 f0ffffff  .. ....... .....
 fffffff000202f60 cf0d2000 f0ffffff              

the kernel linker script:

ENTRY(kinit)
SECTIONS
{

    . = 0xfffffff000000000;
    . += 2M;
    .init :
    {
        *(.init)
    }
    .text :
    {
        __kernel_start = .;
        *(.text)
    }
    .data :
    {
        *(.data)
    }
    .rodata :
    {
        *(.rodata)
    }
    .bss :
    {
        *(.bss)
    }

}
CMDLINE:-c -Wall -mcmodel=large -nostdinc -nostdlib -I ~/nst/include -g -fno-pic

Solution

  • That's because:

    in mcmodel=large compliers assumes all sizes and distances might need a full 64 bits. Using GOT table (when -fno-pic, it will create a local offset table in .rodata). Which will not work with broken GOT table.

    Solution:

    1. Fix GOT PLT table up

    Add following codes into your ld linker script

        .got :
        {
            *(.got)
            *(.igot)
        }
        .got.plt :
        {
            *(.got.plt)
            *(.igot.plt)
        }
    
    1. Turn off the jump table (not supposed)

    https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html

    -fno-jump-tables

    Do not use jump tables for switch statements even where it would be more efficient than other code generation strategies. This option is of use in conjunction with -fpic or -fPIC for building code that forms part of a dynamic linker and cannot reference the address of a jump table. On some targets, jump tables do not require a GOT and this option is not needed.