Search code examples
c++assemblyx86-64relative-addressing

Deciphering RIP-relative LEA instructions as part of a switch statement


What is the compiler doing in the beginning of the switch statement (snippet section below) to come up with the address in %rax so it can notrack jmpq *%rax to the correct offset ?

Are the constants 0xe07 and 0xdfb padding ?

lea    0xe07(%rip),%rax 
lea    0xdfb(%rip),%rdx

Snippet

10 [1]      switch (c) {
    0x5555555551ec  <+   35>        0f be 45 fc              movsbl -0x4(%rbp),%eax
    0x5555555551f0  <+   39>        83 e8 2d                 sub    $0x2d,%eax
    0x5555555551f3  <+   42>        83 f8 32                 cmp    $0x32,%eax
    0x5555555551f6  <+   45>        0f 87 18 01 00 00        ja     0x555555555314 <aa2i(char)+331>
    0x5555555551fc  <+   51>        89 c0                    mov    %eax,%eax
    0x5555555551fe  <+   53>        48 8d 14 85 00 00 00 00  lea    0x0(,%rax,4),%rdx
    0x555555555206  <+   61>        48 8d 05 07 0e 00 00     lea    0xe07(%rip),%rax        # 0x555555556014
    0x55555555520d  <+   68>        8b 04 02                 mov    (%rdx,%rax,1),%eax
    0x555555555210  <+   71>        48 98                    cltq
    0x555555555212  <+   73>        48 8d 15 fb 0d 00 00     lea    0xdfb(%rip),%rdx        # 0x555555556014
    0x555555555219  <+   80>        48 01 d0                 add    %rdx,%rax
    0x55555555521c  <+   83>        3e ff e0                 notrack jmpq *%rax

cpp code

#include <QCoreApplication>

const int ANY=20;       //number representing an X (any amino acid) internally
const int GAP=21;       //number representing a gap internally

char aa2i(char c) {

    //A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V
    if (c >= 'a' && c <= 'z') c += 'A' - 'a';
    switch (c) {
        case 'A':
            return 0;
        case 'R':
            return 1;
        case 'N':
            return 2;
        case 'D':
            return 3;
        case 'C':
            return 4;
        case 'Q':
            return 5;
        case 'E':
            return 6;
        case 'G':
            return 7;
        case 'H':
            return 8;
        case 'I':
            return 9;
        case 'L':
            return 10;
        case 'K':
            return 11;
        case 'M':
            return 12;
        case 'F':
            return 13;
        case 'P':
            return 14;
        case 'S':
            return 15;
        case 'T':
            return 16;
        case 'W':
            return 17;
        case 'Y':
            return 18;
        case 'V':
            return 19;
        case 'X':
            return ANY;
        case 'J':
            return ANY;
        case 'O':
            return ANY;
        case 'U':
            return 4;  //Selenocystein -> Cystein
        case 'B':
            return 3;  //D (or N)
        case 'Z':
            return 6;  //E (or Q)
        case '-':
            return GAP;
        case '.':
            return GAP;
        case '_':
            return GAP;
    }
    if (c >= 0 && c <= 32) return -1; // white space and control characters
    return -2;
}

int main(int argc, char *argv[])
{   
    aa2i('R');

}

Disassembly:

            6 [1]   char aa2i(char c) {
    0x5555555551c9                  f3 0f 1e fa              endbr64
    0x5555555551cd  <+    4>        55                       push   %rbp
    0x5555555551ce  <+    5>        48 89 e5                 mov    %rsp,%rbp
    0x5555555551d1  <+    8>        89 f8                    mov    %edi,%eax
    0x5555555551d3  <+   10>        88 45 fc                 mov    %al,-0x4(%rbp)
            9 [1]       if (c >= 'a' && c <= 'z') c += 'A' - 'a';
    0x5555555551d6  <+   13>        80 7d fc 60              cmpb   $0x60,-0x4(%rbp)
    0x5555555551da  <+   17>        7e 10                    jle    0x5555555551ec <aa2i(char)+35>
    0x5555555551dc  <+   19>        80 7d fc 7a              cmpb   $0x7a,-0x4(%rbp)
    0x5555555551e0  <+   23>        7f 0a                    jg     0x5555555551ec <aa2i(char)+35>
    0x5555555551e2  <+   25>        0f b6 45 fc              movzbl -0x4(%rbp),%eax
    0x5555555551e6  <+   29>        83 e8 20                 sub    $0x20,%eax
    0x5555555551e9  <+   32>        88 45 fc                 mov    %al,-0x4(%rbp)
            10 [1]      switch (c) {
    0x5555555551ec  <+   35>        0f be 45 fc              movsbl -0x4(%rbp),%eax
    0x5555555551f0  <+   39>        83 e8 2d                 sub    $0x2d,%eax
    0x5555555551f3  <+   42>        83 f8 32                 cmp    $0x32,%eax
    0x5555555551f6  <+   45>        0f 87 18 01 00 00        ja     0x555555555314 <aa2i(char)+331>
    0x5555555551fc  <+   51>        89 c0                    mov    %eax,%eax
    0x5555555551fe  <+   53>        48 8d 14 85 00 00 00 00  lea    0x0(,%rax,4),%rdx
    0x555555555206  <+   61>        48 8d 05 07 0e 00 00     lea    0xe07(%rip),%rax        # 0x555555556014
    0x55555555520d  <+   68>        8b 04 02                 mov    (%rdx,%rax,1),%eax
    0x555555555210  <+   71>        48 98                    cltq
    0x555555555212  <+   73>        48 8d 15 fb 0d 00 00     lea    0xdfb(%rip),%rdx        # 0x555555556014
    0x555555555219  <+   80>        48 01 d0                 add    %rdx,%rax
    0x55555555521c  <+   83>        3e ff e0                 notrack jmpq *%rax
            12 [1]              return 0;
    0x55555555521f  <+   86>        b8 00 00 00 00           mov    $0x0,%eax
    0x555555555224  <+   91>        e9 03 01 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            14 [1]              return 1;
    0x555555555229  <+   96>        b8 01 00 00 00           mov    $0x1,%eax
    0x55555555522e  <+  101>        e9 f9 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            16 [1]              return 2;
    0x555555555233  <+  106>        b8 02 00 00 00           mov    $0x2,%eax
    0x555555555238  <+  111>        e9 ef 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            18 [1]              return 3;
    0x55555555523d  <+  116>        b8 03 00 00 00           mov    $0x3,%eax
    0x555555555242  <+  121>        e9 e5 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            20 [1]              return 4;
    0x555555555247  <+  126>        b8 04 00 00 00           mov    $0x4,%eax
    0x55555555524c  <+  131>        e9 db 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            22 [1]              return 5;
    0x555555555251  <+  136>        b8 05 00 00 00           mov    $0x5,%eax
    0x555555555256  <+  141>        e9 d1 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            24 [1]              return 6;
    0x55555555525b  <+  146>        b8 06 00 00 00           mov    $0x6,%eax
    0x555555555260  <+  151>        e9 c7 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            26 [1]              return 7;
    0x555555555265  <+  156>        b8 07 00 00 00           mov    $0x7,%eax
    0x55555555526a  <+  161>        e9 bd 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            28 [1]              return 8;
    0x55555555526f  <+  166>        b8 08 00 00 00           mov    $0x8,%eax
    0x555555555274  <+  171>        e9 b3 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            30 [1]              return 9;
    0x555555555279  <+  176>        b8 09 00 00 00           mov    $0x9,%eax
    0x55555555527e  <+  181>        e9 a9 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            32 [1]              return 10;
    0x555555555283  <+  186>        b8 0a 00 00 00           mov    $0xa,%eax
    0x555555555288  <+  191>        e9 9f 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            34 [1]              return 11;
    0x55555555528d  <+  196>        b8 0b 00 00 00           mov    $0xb,%eax
    0x555555555292  <+  201>        e9 95 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            36 [1]              return 12;
    0x555555555297  <+  206>        b8 0c 00 00 00           mov    $0xc,%eax
    0x55555555529c  <+  211>        e9 8b 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            38 [1]              return 13;
    0x5555555552a1  <+  216>        b8 0d 00 00 00           mov    $0xd,%eax
    0x5555555552a6  <+  221>        e9 81 00 00 00           jmpq   0x55555555532c <aa2i(char)+355>
            40 [1]              return 14;
    0x5555555552ab  <+  226>        b8 0e 00 00 00           mov    $0xe,%eax
    0x5555555552b0  <+  231>        eb 7a                    jmp    0x55555555532c <aa2i(char)+355>
            42 [1]              return 15;
    0x5555555552b2  <+  233>        b8 0f 00 00 00           mov    $0xf,%eax
    0x5555555552b7  <+  238>        eb 73                    jmp    0x55555555532c <aa2i(char)+355>
            44 [1]              return 16;
    0x5555555552b9  <+  240>        b8 10 00 00 00           mov    $0x10,%eax
    0x5555555552be  <+  245>        eb 6c                    jmp    0x55555555532c <aa2i(char)+355>
            46 [1]              return 17;
    0x5555555552c0  <+  247>        b8 11 00 00 00           mov    $0x11,%eax
    0x5555555552c5  <+  252>        eb 65                    jmp    0x55555555532c <aa2i(char)+355>
            48 [1]              return 18;
    0x5555555552c7  <+  254>        b8 12 00 00 00           mov    $0x12,%eax
    0x5555555552cc  <+  259>        eb 5e                    jmp    0x55555555532c <aa2i(char)+355>
            50 [1]              return 19;
    0x5555555552ce  <+  261>        b8 13 00 00 00           mov    $0x13,%eax
    0x5555555552d3  <+  266>        eb 57                    jmp    0x55555555532c <aa2i(char)+355>
            52 [1]              return ANY;
    0x5555555552d5  <+  268>        b8 14 00 00 00           mov    $0x14,%eax
    0x5555555552da  <+  273>        eb 50                    jmp    0x55555555532c <aa2i(char)+355>
            54 [1]              return ANY;
    0x5555555552dc  <+  275>        b8 14 00 00 00           mov    $0x14,%eax
    0x5555555552e1  <+  280>        eb 49                    jmp    0x55555555532c <aa2i(char)+355>
            56 [1]              return ANY;
    0x5555555552e3  <+  282>        b8 14 00 00 00           mov    $0x14,%eax
    0x5555555552e8  <+  287>        eb 42                    jmp    0x55555555532c <aa2i(char)+355>
            58 [1]              return 4;  //Selenocystein -> Cystein
    0x5555555552ea  <+  289>        b8 04 00 00 00           mov    $0x4,%eax
    0x5555555552ef  <+  294>        eb 3b                    jmp    0x55555555532c <aa2i(char)+355>
            60 [1]              return 3;  //D (or N)
    0x5555555552f1  <+  296>        b8 03 00 00 00           mov    $0x3,%eax
    0x5555555552f6  <+  301>        eb 34                    jmp    0x55555555532c <aa2i(char)+355>
            62 [1]              return 6;  //E (or Q)
    0x5555555552f8  <+  303>        b8 06 00 00 00           mov    $0x6,%eax
    0x5555555552fd  <+  308>        eb 2d                    jmp    0x55555555532c <aa2i(char)+355>
            64 [1]              return GAP;
    0x5555555552ff  <+  310>        b8 15 00 00 00           mov    $0x15,%eax
    0x555555555304  <+  315>        eb 26                    jmp    0x55555555532c <aa2i(char)+355>
            66 [1]              return GAP;
    0x555555555306  <+  317>        b8 15 00 00 00           mov    $0x15,%eax
    0x55555555530b  <+  322>        eb 1f                    jmp    0x55555555532c <aa2i(char)+355>
            68 [1]              return GAP;
    0x55555555530d  <+  324>        b8 15 00 00 00           mov    $0x15,%eax
    0x555555555312  <+  329>        eb 18                    jmp    0x55555555532c <aa2i(char)+355>
            70 [1]      if (c >= 0 && c <= 32) return -1; // white space and control characters
    0x555555555314  <+  331>        80 7d fc 00              cmpb   $0x0,-0x4(%rbp)
    0x555555555318  <+  335>        78 0d                    js     0x555555555327 <aa2i(char)+350>
    0x55555555531a  <+  337>        80 7d fc 20              cmpb   $0x20,-0x4(%rbp)
    0x55555555531e  <+  341>        7f 07                    jg     0x555555555327 <aa2i(char)+350>
    0x555555555320  <+  343>        b8 ff ff ff ff           mov    $0xffffffff,%eax
    0x555555555325  <+  348>        eb 05                    jmp    0x55555555532c <aa2i(char)+355>
            71 [1]      return -2;
    0x555555555327  <+  350>        b8 fe ff ff ff           mov    $0xfffffffe,%eax
            72 [1]  }
    0x55555555532c  <+  355>        5d                       pop    %rbp
    0x55555555532d  <+  356>        c3                       retq

Solution

  • This is the so called RIP-Relative Addressing (described in Intel Volume 2. Chapter 2.2.1.6). Basically it is adding the offset (0xe07 and 0xdfb) to the address of the next instruction. Most probably this is the location where the jump table is located, and it looks like it is just after the code of the function.

    In first case address of the next instruction is 0x55555555520d + 0xe07 = 0x555555556014. In second case address of the next instruction is 0x555555555219 + 0xdfb = 0x555555556014.