I'm writing a C-subset compiler using the LLVM C++ API, and using a pass for generating object code as well. But the object code generation step is giving segfault when the source files include an if-else block. Below are the codes I'm using
Compiler code for generating the if-else block
llvm::Value *Conditional::generateCode(CodeKit &kit) {
auto *cond = condition->generateCode(kit);
if (cond == nullptr) {
return nullptr;
}
llvm::Function *func = kit.builder.GetInsertBlock()->getParent();
auto *ifBlock = llvm::BasicBlock::Create(kit.context, "if", func);
auto *elseBlock = llvm::BasicBlock::Create(kit.context, "else");
auto *mergeBlock = llvm::BasicBlock::Create(kit.context, "ifelsemerge");
kit.builder.CreateCondBr(cond, ifBlock, elseBlock);
kit.builder.SetInsertPoint(ifBlock);
if (ifstmt != nullptr) {
kit.symbolTable.enterScope();
ifstmt->generateCode(kit);
kit.symbolTable.exitScope();
}
kit.builder.CreateBr(mergeBlock);
ifBlock = kit.builder.GetInsertBlock();
func->getBasicBlockList().push_back(elseBlock);
kit.builder.SetInsertPoint(elseBlock);
if (elsestmt != nullptr) {
kit.symbolTable.enterScope();
elsestmt->generateCode(kit);
kit.symbolTable.exitScope();
}
kit.builder.CreateBr(mergeBlock);
elseBlock = kit.builder.GetInsertBlock();
func->getBasicBlockList().push_back(mergeBlock);
kit.builder.SetInsertPoint(mergeBlock);
return nullptr;
Code for generating object code from the llvm module.
void emitCode(CodeKit &kit) {
kit.module.print(llvm::outs(), nullptr);
auto irFile = "output.bc";
error_code ec;
llvm::raw_fd_ostream irFileStream(irFile, ec, llvm::sys::fs::F_None);
llvm::WriteBitcodeToFile(kit.module, irFileStream);
irFileStream.flush();
auto targetTriple = llvm::sys::getDefaultTargetTriple();
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
string error;
auto target = llvm::TargetRegistry::lookupTarget(targetTriple, error);
auto cpu = "generic";
auto features = "";
llvm::TargetOptions opt;
auto rm = llvm::Optional<llvm::Reloc::Model>();
auto targetMachine =
target->createTargetMachine(targetTriple, cpu, features, opt, rm);
kit.module.setDataLayout(targetMachine->createDataLayout());
kit.module.setTargetTriple(targetTriple);
auto objectFile = "output.o";
llvm::raw_fd_ostream objectFileStream(objectFile, ec,
llvm::sys::fs::OF_None);
llvm::legacy::PassManager pass;
auto fileType = llvm::CGFT_ObjectFile;
targetMachine->addPassesToEmitFile(pass, objectFileStream, nullptr,
fileType);
pass.run(kit.module);
objectFileStream.flush();
}
Source file on which I'm testing at the moment
int factorial(int n) {
if (n <= 0)
return 1;
else
return n * factorial(n - 1);
}
Generated IR code
; ModuleID = 'bootleg c compiler'
source_filename = "bootleg c compiler"
define i32 @factorial(i32 %n) {
entry:
%n1 = alloca i32
store i32 %n, i32* %n1
%n2 = load i32, i32* %n1
%"Less or Equal" = icmp sle i32 %n2, 0
br i1 %"Less or Equal", label %if, label %else
if: ; preds = %entry
ret i32 1
br label %ifelsemerge
else: ; preds = %entry
%n3 = load i32, i32* %n1
%n4 = load i32, i32* %n1
%Subtract = sub i32 %n4, 1
%call = call i32 @factorial(i32 %Subtract)
%Multiply = mul i32 %n3, %call
ret i32 %Multiply
br label %ifelsemerge
ifelsemerge: ; preds = %else, %if
}
GDB stack trace of the segfault
#0 0x00007ffff4386bc0 in llvm::Instruction::getNumSuccessors() const () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#1 0x00007ffff4fd643c in llvm::BranchProbabilityInfo::computePostDominatedByUnreachable(llvm::Function const&, llvm::PostDominatorTree*) ()
from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#2 0x00007ffff4fdb175 in llvm::BranchProbabilityInfo::calculate(llvm::Function const&, llvm::LoopInfo const&, llvm::TargetLibraryInfo const*) ()
from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#3 0x00007ffff4fdb9c4 in llvm::BranchProbabilityInfoWrapperPass::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#4 0x00007ffff43a7d76 in llvm::FPPassManager::runOnFunction(llvm::Function&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#5 0x00007ffff43a7ff3 in llvm::FPPassManager::runOnModule(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#6 0x00007ffff43a84a0 in llvm::legacy::PassManagerImpl::run(llvm::Module&) () from /lib/x86_64-linux-gnu/libLLVM-10.so.1
#7 0x0000555555565b73 in emitCode (kit=...) at c.ast.cpp:88
#8 0x0000555555579957 in generateCode (ast=...) at cc.cpp:25
#9 0x0000555555579a83 in main (argc=2, argv=0x7fffffffdfe8) at cc.cpp:41
ret i32 1
br label %ifelsemerge
is invalid IR, you must have exactly one terminator instruction (e.g. br
or ret
) in a BasicBlock
. You can avoid this by checking for a terminator instruction before inserting the branch,
if (ifBlock->size() == 0 || !ifBlock->back().isTerminator()) {
kit.builder.CreateBr(mergeBlock);
}
// ...
if (elseBlock->size() == 0 || !elseBlock->back().isTerminator()) {
kit.builder.CreateBr(mergeBlock);
}
Note that empty blocks are also invalid so in case both branches of the conditional exit the function, you probably shouldn't generate mergeBlock
. AFAIK if you have no branches to an empty block it's fine to leave it in.
Usually you can catch these kinds of bugs in your front-end by adding a verification pass or using llvm::verifyFunction
or llvm::verifyModule
. Also running llc
with the generated IR code will give a more detailed error message.