Search code examples
c++pybind11

call a Python function from c++ using pybind11


I am trying to call a python function from a C++ code which contains main() function using Pybind11. But I found very few references are available. Most of existing documents talk about the reversed direction, i.e. calling C++ from Python.

Is there any complete example showing how to do that? The only reference I found is: https://github.com/pybind/pybind11/issues/30

But it has very little information.


Solution

    1. project structure
    • CMakeLists.txt
    • calc.py
    • main.cpp
    1. main.cpp

      #include <pybind11/embed.h>
      #include <iostream>
      namespace py = pybind11;
      using namespace py::literals;
      
      
      int main() {
          py::scoped_interpreter guard{};
      
          // append source dir to sys.path, and python interpreter would find your custom python file
          py::module_ sys = py::module_::import("sys");
          py::list path = sys.attr("path");
          path.attr("append")("..");
      
          // import custom python class and call it
          py::module_ tokenize = py::module_::import("calc");
          py::type customTokenizerClass = tokenize.attr("CustomTokenizer");
          py::object customTokenizer = customTokenizerClass("/Users/Caleb/Desktop/codes/ptms/bert-base");
          py::object res = customTokenizer.attr("custom_tokenize")("good luck");
      
          // show the result
          py::list input_ids = res.attr("input_ids");
          py::list token_type_ids = res.attr("token_type_ids");
          py::list attention_mask = res.attr("attention_mask");
          py::list offsets = res.attr("offset_mapping");
          std::string message = "input ids is {},\noffsets is {}"_s.format(input_ids, offsets);
          std::cout << message << std::endl;
      }
      
    2. calc.py

      from transformers import BertTokenizerFast
      
      
      class CustomTokenizer(object):
          def __init__(self, vocab_dir):
              self._tokenizer = BertTokenizerFast.from_pretrained(vocab_dir)
      
          def custom_tokenize(self, text):
              return self._tokenizer(text, return_offsets_mapping=True)
      
      
      def build_tokenizer(vocab_dir: str) -> BertTokenizerFast:
          tokenizer = BertTokenizerFast.from_pretrained(vocab_dir)
          return tokenizer
      
      
      def tokenize_text(tokenizer: BertTokenizerFast, text: str) -> dict:
          res = tokenizer(text, return_offsets_mapping=True)
          return dict(res)
      
      
    3. CMakeLists.txt

      cmake_minimum_required(VERSION 3.4)
      project(example)
      set(CMAKE_CXX_STANDARD 11)
      
      # set pybind11 dir
      set(pybind11_DIR /Users/Caleb/Softwares/pybind11)
      find_package(pybind11 REQUIRED)
      
      # set custom python interpreter(under macos)
      link_libraries(/Users/Caleb/miniforge3/envs/py38/lib/libpython3.8.dylib)
      
      add_executable(example main.cpp)
      target_link_libraries(example PRIVATE pybind11::embed)