Search code examples
c++segmentation-faultllvmjitllvm-c++-api

Why is my LLVM JIT implementation segfaulting?


I'm trying to implement a simple JIT compiler using LLVM, following along with the tutorial (http://releases.llvm.org/4.0.1/docs/tutorial/BuildingAJIT1.html), and I'm running into a segfault. I've rewritten my code in the form a minimal (albeit still kinda long) example. The example loops through the integers 0 through 9 and for each one attempts to compile a function that prints that integer, add it to a module, execute the function, and then remove the module from the JIT. This is to simulate an interactive session in which a user inputs commands such as print 0, print 1, etc.

#include <array>
#include <cstdint>
#include <iostream>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITSymbol.h>
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/LambdaResolver.h>
#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/RuntimeDyld.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/GlobalValue.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Mangler.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/Value.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Scalar/GVN.h>
#include <memory>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>

int main() {

    llvm::InitializeNativeTarget();
    llvm::InitializeNativeTargetAsmPrinter();
    llvm::InitializeNativeTargetAsmParser();

    auto machine = llvm::EngineBuilder().selectTarget();

    llvm::orc::ObjectLinkingLayer<> linking_layer;
    llvm::orc::IRCompileLayer<llvm::orc::ObjectLinkingLayer<>> compile_layer(
        linking_layer, llvm::orc::SimpleCompiler(*machine)
    );

    llvm::LLVMContext context;
    llvm::IRBuilder<> builder(context);
    auto layout = machine->createDataLayout();

    auto module  = std::make_unique<llvm::Module>("module", context);

    auto manager = std::make_unique<llvm::legacy::FunctionPassManager>(
        module.get()
    );
    for (
        auto p : {
            llvm::createInstructionCombiningPass(),
            llvm::createReassociatePass(), llvm::createGVNPass(),
            llvm::createCFGSimplificationPass()
        }
    ) manager->add(p);

    module->setDataLayout(layout);

    llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);

    auto index = llvm::ConstantInt::get(context, llvm::APInt(8, 0));
    std::vector<llvm::Constant*> indices = {index, index};

    std::string func_name = "func";

    for (auto i = 0; i < 10; ++i) {

        auto format_str = new llvm::GlobalVariable(
            *module, llvm::ArrayType::get(llvm::Type::getInt8Ty(context), 4),
            true, llvm::GlobalValue::PrivateLinkage,
            llvm::ConstantDataArray::getString(context, "%i\n"), "format_str"
        );
        format_str->setAlignment(1);

        auto function = llvm::Function::Create(
            llvm::FunctionType::get(
                llvm::Type::getVoidTy(context), std::vector<llvm::Type*>{},
                false
            ), llvm::Function::ExternalLinkage, func_name, module.get()
        );

        builder.SetInsertPoint(
            llvm::BasicBlock::Create(context, "entry", function)
        );

        builder.CreateCall(
            module->getOrInsertFunction(
                "printf", llvm::FunctionType::get(
                    llvm::IntegerType::getInt32Ty(context),
                    llvm::PointerType::get(llvm::Type::getInt8Ty(context), 0),
                    true
                )
            ), std::vector<llvm::Value*>{
                llvm::ConstantExpr::getGetElementPtr(
                    nullptr, format_str, indices
                ), llvm::ConstantInt::get(context, llvm::APInt(32, i))
            }, "call"
        );

        builder.CreateRetVoid();

        std::string message;
        llvm::raw_string_ostream message_stream(message);
        if (llvm::verifyFunction(*function, &message_stream))
            throw std::runtime_error(message_stream.str());

        auto handle = compile_layer.addModuleSet(
            std::array<std::unique_ptr<llvm::Module>, 1>{std::move(module)},
            std::make_unique<llvm::SectionMemoryManager>(),
            llvm::orc::createLambdaResolver(
                [&](const std::string& name) {
                    auto symbol = compile_layer.findSymbol(name, false);
                    return symbol ? symbol : llvm::JITSymbol(nullptr);
                }, [](const std::string& name) {
                    auto address = llvm::RTDyldMemoryManager::
                        getSymbolAddressInProcess(name);
                    return address ? llvm::JITSymbol(
                        address, llvm::JITSymbolFlags::Exported
                    ) : llvm::JITSymbol(nullptr);
                }
            )
        );

        std::string mangled_name;
        llvm::raw_string_ostream mangled_name_stream(mangled_name);
        llvm::Mangler::getNameWithPrefix(
            mangled_name_stream, func_name, layout
        );

        (
            reinterpret_cast <void(*)()> (
                static_cast <intptr_t> (
                    compile_layer.findSymbol(
                        mangled_name_stream.str(), true
                    ).getAddress()
                )
            )
        )();

        compile_layer.removeModuleSet(handle);

    }

}

The expected output is as follows.

0
1
2
3
4
5
6
7
8
9

Instead I get this.

0
Segmentation fault (core dumped)

According to GDB, the segfault is occuring during the call to llvm::GlobalVariable::GlobalVariable. Here's the backtrace.

#0  0x00007ffcdb8b6541 in llvm::GlobalVariable::GlobalVariable(llvm::Module&, llvm::Type*, bool, llvm::GlobalValue::LinkageTypes, llvm::Constant*, llvm::Twine const&, llvm::GlobalVariable*, llvm::GlobalValue::ThreadLocalMode, unsigned int, bool) () from /usr/lib/libLLVM-4.0.so
#1  0x000000010000698a in main () at main.cc:83

I'm using LLVM version 4.0.1 and GCC version 7.1.1 and compiling with the following command.

g++ -std=c++17 main.cc -o main -O0 -Wall -Wextra -Wno-unused-function \
    -Wno-unused-value -Wno-unused-parameter -Werror -ggdb             \
    `llvm-config --system-libs --libs core`

I'm hoping some LLVM veteran can find my mistake. Thanks, guys!


Solution

  • module is initialized before the for loop:

     auto module  = std::make_unique<llvm::Module>("module", context);
    

    then in the for loop:

    for(...)
    {
        auto format_str = new llvm::GlobalVariable(*module, ...);
                                                   ^~~~~~~
        ...
        std::array<std::unique_ptr<llvm::Module>, 1>{std::move(module)},
                                                     ^~~~~~~~~~~~~~~~~
    }
    

    At first iteration you access the object owned by module (ok) and then move from it. This will transfer the ownership of the managed object away from module.

    At the second iteration you access the object managed by module -> crash (because it doesn't have a managed object anymore)