I'm trying to implement a simple JIT compiler using LLVM, following along with the tutorial (http://releases.llvm.org/4.0.1/docs/tutorial/BuildingAJIT1.html), and I'm running into a segfault. I've rewritten my code in the form a minimal (albeit still kinda long) example. The example loops through the integers 0 through 9 and for each one attempts to compile a function that prints that integer, add it to a module, execute the function, and then remove the module from the JIT. This is to simulate an interactive session in which a user inputs commands such as print 0
, print 1
, etc.
#include <array>
#include <cstdint>
#include <iostream>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITSymbol.h>
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/LambdaResolver.h>
#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/RuntimeDyld.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/GlobalValue.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Mangler.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/Value.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Scalar/GVN.h>
#include <memory>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
int main() {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
auto machine = llvm::EngineBuilder().selectTarget();
llvm::orc::ObjectLinkingLayer<> linking_layer;
llvm::orc::IRCompileLayer<llvm::orc::ObjectLinkingLayer<>> compile_layer(
linking_layer, llvm::orc::SimpleCompiler(*machine)
);
llvm::LLVMContext context;
llvm::IRBuilder<> builder(context);
auto layout = machine->createDataLayout();
auto module = std::make_unique<llvm::Module>("module", context);
auto manager = std::make_unique<llvm::legacy::FunctionPassManager>(
module.get()
);
for (
auto p : {
llvm::createInstructionCombiningPass(),
llvm::createReassociatePass(), llvm::createGVNPass(),
llvm::createCFGSimplificationPass()
}
) manager->add(p);
module->setDataLayout(layout);
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
auto index = llvm::ConstantInt::get(context, llvm::APInt(8, 0));
std::vector<llvm::Constant*> indices = {index, index};
std::string func_name = "func";
for (auto i = 0; i < 10; ++i) {
auto format_str = new llvm::GlobalVariable(
*module, llvm::ArrayType::get(llvm::Type::getInt8Ty(context), 4),
true, llvm::GlobalValue::PrivateLinkage,
llvm::ConstantDataArray::getString(context, "%i\n"), "format_str"
);
format_str->setAlignment(1);
auto function = llvm::Function::Create(
llvm::FunctionType::get(
llvm::Type::getVoidTy(context), std::vector<llvm::Type*>{},
false
), llvm::Function::ExternalLinkage, func_name, module.get()
);
builder.SetInsertPoint(
llvm::BasicBlock::Create(context, "entry", function)
);
builder.CreateCall(
module->getOrInsertFunction(
"printf", llvm::FunctionType::get(
llvm::IntegerType::getInt32Ty(context),
llvm::PointerType::get(llvm::Type::getInt8Ty(context), 0),
true
)
), std::vector<llvm::Value*>{
llvm::ConstantExpr::getGetElementPtr(
nullptr, format_str, indices
), llvm::ConstantInt::get(context, llvm::APInt(32, i))
}, "call"
);
builder.CreateRetVoid();
std::string message;
llvm::raw_string_ostream message_stream(message);
if (llvm::verifyFunction(*function, &message_stream))
throw std::runtime_error(message_stream.str());
auto handle = compile_layer.addModuleSet(
std::array<std::unique_ptr<llvm::Module>, 1>{std::move(module)},
std::make_unique<llvm::SectionMemoryManager>(),
llvm::orc::createLambdaResolver(
[&](const std::string& name) {
auto symbol = compile_layer.findSymbol(name, false);
return symbol ? symbol : llvm::JITSymbol(nullptr);
}, [](const std::string& name) {
auto address = llvm::RTDyldMemoryManager::
getSymbolAddressInProcess(name);
return address ? llvm::JITSymbol(
address, llvm::JITSymbolFlags::Exported
) : llvm::JITSymbol(nullptr);
}
)
);
std::string mangled_name;
llvm::raw_string_ostream mangled_name_stream(mangled_name);
llvm::Mangler::getNameWithPrefix(
mangled_name_stream, func_name, layout
);
(
reinterpret_cast <void(*)()> (
static_cast <intptr_t> (
compile_layer.findSymbol(
mangled_name_stream.str(), true
).getAddress()
)
)
)();
compile_layer.removeModuleSet(handle);
}
}
The expected output is as follows.
0
1
2
3
4
5
6
7
8
9
Instead I get this.
0
Segmentation fault (core dumped)
According to GDB, the segfault is occuring during the call to llvm::GlobalVariable::GlobalVariable
. Here's the backtrace.
#0 0x00007ffcdb8b6541 in llvm::GlobalVariable::GlobalVariable(llvm::Module&, llvm::Type*, bool, llvm::GlobalValue::LinkageTypes, llvm::Constant*, llvm::Twine const&, llvm::GlobalVariable*, llvm::GlobalValue::ThreadLocalMode, unsigned int, bool) () from /usr/lib/libLLVM-4.0.so
#1 0x000000010000698a in main () at main.cc:83
I'm using LLVM version 4.0.1 and GCC version 7.1.1 and compiling with the following command.
g++ -std=c++17 main.cc -o main -O0 -Wall -Wextra -Wno-unused-function \
-Wno-unused-value -Wno-unused-parameter -Werror -ggdb \
`llvm-config --system-libs --libs core`
I'm hoping some LLVM veteran can find my mistake. Thanks, guys!
module
is initialized before the for
loop:
auto module = std::make_unique<llvm::Module>("module", context);
then in the for
loop:
for(...)
{
auto format_str = new llvm::GlobalVariable(*module, ...);
^~~~~~~
...
std::array<std::unique_ptr<llvm::Module>, 1>{std::move(module)},
^~~~~~~~~~~~~~~~~
}
At first iteration you access the object owned by module
(ok) and then move from it. This will transfer the ownership of the managed object away from module
.
At the second iteration you access the object managed by module
-> crash (because it doesn't have a managed object anymore)