Search code examples
cllvminstrumentationllvm-ir

LLVM Pass to insert an external function call to LLVM bitcode


I am writing an LLVM pass to instrument a C source program. I want to insert a function call before each branch instruction which calls an external function like this :

void print(int x){
    printf("x = %d\n", x);

    return;
}

I want to link this external function to C source code using llvm-link tool and then instrument the code using opt tool.

Ths pass that I have implemented is like this:

#include "llvm/Pass.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/IRBuilder.h"
#include <vector>

using namespace llvm;

namespace{
    struct ir_instrumentation : public ModulePass{
    static char ID;
    Function *monitor;

    ir_instrumentation() : ModulePass(ID) {}

    virtual bool runOnModule(Module &M)
    {
        std::vector<Type *> args;
        args.push_back(Type::getInt32Ty(M.getContext()));
        ArrayRef<Type*>  argsRef(args);
        FunctionType *FT = FunctionType::get(Type::getVoidTy(M.getContext()), args, false);
        Constant* myFunc = M.getOrInsertFunction("print", FT, NULL);
        minitor = cast<Function>(myFunc);


        for(Module::iterator F = M.begin(), E = M.end(); F!= E; ++F)
        {
            for(Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
            {
                for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI)
                {
                    if(isa<BranchInst>(&(*BI)) )
                    {
                        errs() << "found a branch instruction!\n";
                        ArrayRef< Value* > arguments(ConstantInt::get(Type::getInt32Ty(M.getContext()), 5, true));
                        Instruction *newInst = CallInst::Create(monitor, arguments, "");
                        BB->getInstList().insert(BI, newInst); 
                        errs() << "Inserted the function!\n";
                    }

                }
            }
        }

        return true;
    }
};
char ir_instrumentation::ID = 0;
static RegisterPass<ir_instrumentation> X("ir-instrumentation", "LLVM IR Instrumentation Pass");

}

LLVM is configured and built fine with this pass but when I use opt, I get this error :

opt: /llvm/lib/IR/Type.cpp:281:

llvm::FunctionType::FunctionType(llvm::Type*, llvm::ArrayRefllvm::Type*, bool):

Assertion `isValidReturnType(Result) && "invalid return type for function"' failed.

I think the problem is something like mismatching between the function type that I have declared and the external function (like the context).

LLVM version: LLVM version 7.0.0svn

Until now I have not solved the problem.

Thanks


Solution

  • I could finally solve this problem and successfully instrument LLVM bitcode. After a lot of trouble with function getOrInsertFunction, I found out it is not really necessary to use this method in my case. I just simply changed my pass to this:

    #include "llvm/Pass.h"
    #include "llvm/IR/Module.h"
    #include "llvm/IR/Function.h"
    #include "llvm/Support/raw_ostream.h"
    #include "llvm/IR/Type.h"
    #include "llvm/IR/Instructions.h"
    #include "llvm/IR/Instruction.h"
    #include "llvm/IR/LLVMContext.h"
    #include "llvm/Support/raw_ostream.h"
    
    #include "llvm/IR/IRBuilder.h"
    
    #include <vector>
    
    using namespace llvm;
    
    namespace{
    struct ir_instrumentation : public ModulePass{
        static char ID;
        Function *monitor;
    
        ir_instrumentation() : ModulePass(ID) {}
    
        virtual bool runOnModule(Module &M)
        {
            errs() << "====----- Entered Module " << M.getName() << ".\n";
    
            int counter = 0;
    
            for(Module::iterator F = M.begin(), E = M.end(); F!= E; ++F)
            {
                errs() << "Function name: " << F->getName() << ".\n";
                if(F->getName() == "print"){
                    monitor = cast<Function>(F);
                    continue;
                }
    
                for(Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
                {
                    for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI)
                    {
                        if(isa<BranchInst>(&(*BI)) )
                        {
                            errs() << "found a brach instruction!\n";
                            ArrayRef< Value* > arguments(ConstantInt::get(Type::getInt32Ty(M.getContext()), counter, true));
                            counter++;
                            Instruction *newInst = CallInst::Create(monitor, arguments, "");
                            BB->getInstList().insert(BI, newInst); 
                            errs() << "Inserted the function!\n";
                        }
    
                    }
                }
            }
    
            return true;
        }
    };
    char ir_instrumentation::ID = 0;
    static RegisterPass<ir_instrumentation> X("ir-instrumentation", "LLVM IR Instrumentation Pass");
    
    }
    

    As I know the name of the external function, I can simply find it through iterating over all functions of module and then use it in the desired way.

    Obviously the problem was caused by calling module->getOrInsertFunction and the function type. My experience says that this method is more useful when you want to insert a new function and declare the prototype of your own function. Using it to get an existing function is challenging (e.g. setting the right prototype, ...)

    Thanks