How does Clang compute indices of GEP instructions in virtual tables when compiling virtual inheritance classes in C++?

I'm trying to understand the way Clang compiles virtual inheritance classes in C++. Here is my code:

// test.cpp
#include <stdio.h>

int global_obj;
int *global_ptr = &global_obj;

class A {
  public:
    virtual int f(int *i) { return *i; }
};

class B: virtual public A {                  // class B is virtual inheritance class of A

};

int main(int argc, char **argv)
{
  int *ptr = &global_obj;

  B *pb = new B;
  int a = pb->f(ptr);

  return a;
}

My compilation command is:

clang -O0 -Xclang -disable-llvm-passes -Xclang -disable-O0-optnone -c -emit-llvm test.c
opt -mem2reg test.bc

and below is the compiled LLVM bitcode, where _ZN1BC1Ev and _ZN1AC2Ev are the compiled constructors of class B and A.

%class.B = type { %class.A }
%class.A = type { i32 (...)** }

@global_obj = global i32 0, align 4
@global_ptr = global i32* @global_obj, align 8
@_ZTV1B = linkonce_odr unnamed_addr constant { [5 x i8*] } { [5 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8
@_ZTT1B = linkonce_odr unnamed_addr constant [2 x i8*] [i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*), i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*)], align 8
@_ZTV1A = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8

; Function Attrs: noinline norecurse ssp uwtable
define i32 @main(i32, i8**) #0 {
  %3 = call i8* @_Znwm(i64 8) #3
  %4 = bitcast i8* %3 to %class.B*
  call void @_ZN1BC1Ev(%class.B* %4) #4
  %5 = bitcast %class.B* %4 to i8**
  %6 = load i8*, i8** %5, align 8
  %7 = getelementptr i8, i8* %6, i64 -32
  %8 = bitcast i8* %7 to i64*
  %9 = load i64, i64* %8, align 8
  %10 = bitcast %class.B* %4 to i8*
  %11 = getelementptr inbounds i8, i8* %10, i64 %9
  %12 = bitcast i8* %11 to %class.A*
  %13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
  %14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
  %15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
  %16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8
  %17 = call i32 %16(%class.A* %12, i32* @global_obj)
  ret i32 %17
}

; Function Attrs: nobuiltin
declare noalias i8* @_Znwm(i64) #1

; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1BC1Ev(%class.B*) unnamed_addr #2 align 2 {
  %2 = bitcast %class.B* %0 to %class.A*
  call void @_ZN1AC2Ev(%class.A* %2) #4
  %3 = bitcast %class.B* %0 to i32 (...)***
  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %3, align 8
  %4 = bitcast %class.B* %0 to i32 (...)***
  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %4, align 8
  ret void
}

; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1AC2Ev(%class.A*) unnamed_addr #2 align 2 {
  %2 = bitcast %class.A* %0 to i32 (...)***
  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %2, align 8
  ret void
}

; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr i32 @_ZN1A1fEPi(%class.A*, i32*) unnamed_addr #2 align 2 {
  %3 = load i32, i32* %1, align 4
  ret i32 %3
}

I understand that Clang will introduce a virtual table to capture objects of the classes A and B.

But when delving deeper into the compilation of the main function, I don't really understand why Clang introduces a GEP of index -32 in the main function.

And what is the value of index %9 in the next GEP. Why can't it be determined at compile time?

  %7 = getelementptr i8, i8* %6, i64 -32
  %8 = bitcast i8* %7 to i64*
  %9 = load i64, i64* %8, align 8
  %10 = bitcast %class.B* %4 to i8*
  %11 = getelementptr inbounds i8, i8* %10, i64 %9

Does anyone know why Clang does so?

Thank you very much for reading my very long question!

Solution

Location of virtually inherited base class can not be determined at compile-time and is delayed until runtime. The virtual base offset (vbase offset) is located within the vtable so first your code loads the vtable pointer:

  %5 = bitcast %class.B* %4 to i8**
  %6 = load i8*, i8** %5, align 8

and then loads the vbase offset (from pre-defined location at vptr - 32):

  %7 = getelementptr i8, i8* %6, i64 -32
  %8 = bitcast i8* %7 to i64*
  %9 = load i64, i64* %8, align 8

This is used to compute the offset to base class:

  %10 = bitcast %class.B* %4 to i8*
  %11 = getelementptr inbounds i8, i8* %10, i64 %9
  %12 = bitcast i8* %11 to %class.A*

and load pointer to virtual method from base class's vtable:

  %13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
  %14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
  %15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
  %16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8

and finally call it:

  %17 = call i32 %16(%class.A* %12, i32* @global_obj)

You can find more details on how vtables are organized in Itanium ABI (but beware that it's not for the faint-hearted).