Search code examples
c++compiler-constructionllvm

LLVM pass for generating object code is giving segmentation fault


I'm writing a C-subset compiler using the LLVM C++ API, and using a pass for generating object code as well. But the object code generation step is giving segfault when the source files include an if-else block. Below are the codes I'm using

Compiler code for generating the if-else block

llvm::Value *Conditional::generateCode(CodeKit &kit) {
    auto *cond = condition->generateCode(kit);
    if (cond == nullptr) {
        return nullptr;
    }
    llvm::Function *func = kit.builder.GetInsertBlock()->getParent();
    auto *ifBlock = llvm::BasicBlock::Create(kit.context, "if", func);
    auto *elseBlock = llvm::BasicBlock::Create(kit.context, "else");
    auto *mergeBlock = llvm::BasicBlock::Create(kit.context, "ifelsemerge");

    kit.builder.CreateCondBr(cond, ifBlock, elseBlock);
    kit.builder.SetInsertPoint(ifBlock);
    if (ifstmt != nullptr) {
        kit.symbolTable.enterScope();
        ifstmt->generateCode(kit);
        kit.symbolTable.exitScope();
    }
    kit.builder.CreateBr(mergeBlock);
    ifBlock = kit.builder.GetInsertBlock();

    func->getBasicBlockList().push_back(elseBlock);
    kit.builder.SetInsertPoint(elseBlock);
    if (elsestmt != nullptr) {
        kit.symbolTable.enterScope();
        elsestmt->generateCode(kit);
        kit.symbolTable.exitScope();
    }
    kit.builder.CreateBr(mergeBlock);
    elseBlock = kit.builder.GetInsertBlock();

    func->getBasicBlockList().push_back(mergeBlock);
    kit.builder.SetInsertPoint(mergeBlock);
    return nullptr;

Code for generating object code from the llvm module.

void emitCode(CodeKit &kit) {
    kit.module.print(llvm::outs(), nullptr);

    auto irFile = "output.bc";
    error_code ec;
    llvm::raw_fd_ostream irFileStream(irFile, ec, llvm::sys::fs::F_None);
    llvm::WriteBitcodeToFile(kit.module, irFileStream);
    irFileStream.flush();

    auto targetTriple = llvm::sys::getDefaultTargetTriple();
    llvm::InitializeAllTargetInfos();
    llvm::InitializeAllTargets();
    llvm::InitializeAllTargetMCs();
    llvm::InitializeAllAsmParsers();
    llvm::InitializeAllAsmPrinters();

    string error;
    auto target = llvm::TargetRegistry::lookupTarget(targetTriple, error);
    auto cpu = "generic";
    auto features = "";
    llvm::TargetOptions opt;
    auto rm = llvm::Optional<llvm::Reloc::Model>();
    auto targetMachine =
        target->createTargetMachine(targetTriple, cpu, features, opt, rm);

    kit.module.setDataLayout(targetMachine->createDataLayout());
    kit.module.setTargetTriple(targetTriple);

    auto objectFile = "output.o";
    llvm::raw_fd_ostream objectFileStream(objectFile, ec,
                                          llvm::sys::fs::OF_None);

    llvm::legacy::PassManager pass;
    auto fileType = llvm::CGFT_ObjectFile;
    targetMachine->addPassesToEmitFile(pass, objectFileStream, nullptr,
                                       fileType);
    pass.run(kit.module);
    objectFileStream.flush();
}

Source file on which I'm testing at the moment

int factorial(int n) {
    if (n <= 0)
        return 1;
    else
        return n * factorial(n - 1);
}

Generated IR code

; ModuleID = 'bootleg c compiler'
source_filename = "bootleg c compiler"

define i32 @factorial(i32 %n) {
entry:
  %n1 = alloca i32
  store i32 %n, i32* %n1
  %n2 = load i32, i32* %n1
  %"Less or Equal" = icmp sle i32 %n2, 0
  br i1 %"Less or Equal", label %if, label %else

if:                                               ; preds = %entry
  ret i32 1
  br label %ifelsemerge

else:                                             ; preds = %entry
  %n3 = load i32, i32* %n1
  %n4 = load i32, i32* %n1
  %Subtract = sub i32 %n4, 1
  %call = call i32 @factorial(i32 %Subtract)
  %Multiply = mul i32 %n3, %call
  ret i32 %Multiply
  br label %ifelsemerge

ifelsemerge:                                      ; preds = %else, %if
}

GDB stack trace of the segfault

#0  0x00007ffff4386bc0 in llvm::Instruction::getNumSuccessors() const () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#1  0x00007ffff4fd643c in llvm::BranchProbabilityInfo::computePostDominatedByUnreachable(llvm::Function const&, llvm::PostDominatorTree*) ()
   from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#2  0x00007ffff4fdb175 in llvm::BranchProbabilityInfo::calculate(llvm::Function const&, llvm::LoopInfo const&, llvm::TargetLibraryInfo const*) ()
   from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#3  0x00007ffff4fdb9c4 in llvm::BranchProbabilityInfoWrapperPass::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#4  0x00007ffff43a7d76 in llvm::FPPassManager::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#5  0x00007ffff43a7ff3 in llvm::FPPassManager::runOnModule(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#6  0x00007ffff43a84a0 in llvm::legacy::PassManagerImpl::run(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#7  0x0000555555565b73 in emitCode (kit=...) at c.ast.cpp:88
#8  0x0000555555579957 in generateCode (ast=...) at cc.cpp:25
#9  0x0000555555579a83 in main (argc=2, argv=0x7fffffffdfe8) at cc.cpp:41

Solution

  •   ret i32 1
      br label %ifelsemerge
    

    is invalid IR, you must have exactly one terminator instruction (e.g. br or ret) in a BasicBlock. You can avoid this by checking for a terminator instruction before inserting the branch,

        if (ifBlock->size() == 0 || !ifBlock->back().isTerminator()) {
            kit.builder.CreateBr(mergeBlock);
        }
        // ...
        if (elseBlock->size() == 0 || !elseBlock->back().isTerminator()) {
            kit.builder.CreateBr(mergeBlock);
        }
    

    Note that empty blocks are also invalid so in case both branches of the conditional exit the function, you probably shouldn't generate mergeBlock. AFAIK if you have no branches to an empty block it's fine to leave it in.


    Usually you can catch these kinds of bugs in your front-end by adding a verification pass or using llvm::verifyFunction or llvm::verifyModule. Also running llc with the generated IR code will give a more detailed error message.