Search code examples
c++cmaketesseractlibcurlcmakelists-options

How to correctly define CMakeLists.txt for Tesseract?


I want to compile and build with CMake this simple tesseract C++ code:

// https://tesseract-ocr.github.io/tessdoc/Examples_C++.html
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    char *outText;

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // Initialize tesseract-ocr with English, without specifying tessdata path
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    // Open input image with leptonica library
    Pix *image = pixRead("phototest.tif");
    api->SetImage(image);
    // Get OCR result
    outText = api->GetUTF8Text();
    printf("OCR output:\n%s", outText);

    // Destroy used object and release memory
    api->End();
    delete api;
    delete [] outText;
    pixDestroy(&image);

    return 0;
}

With this CMakeLists.txt file:

cmake_minimum_required(VERSION 3.5)
project(BasicExample)

set(CMAKE_CXX_STANDARD 17)

find_package(PkgConfig REQUIRED)

pkg_check_modules(tesseract REQUIRED IMPORTED_TARGET tesseract)
pkg_check_modules(leptonica REQUIRED IMPORTED_TARGET lept)
pkg_check_moduleS(libcurl REQUIRED IMPORTED_TARGET libcurl)

add_executable(${PROJECT_NAME} WIN32 MACOSX_BUNDLE BasicExample.cpp)


target_link_libraries(BasicExample PUBLIC
    PkgConfig::leptonica
    PkgConfig::tesseract
    -lcurl
)

I'm getting these errors:

raphy@raohy:~/tesseract/Examples$ cmake -B builddir
-- The C compiler identification is GNU 12.3.0
-- The CXX compiler identification is GNU 13.2.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Found PkgConfig: /usr/bin/pkg-config (found version "1.8.1")
-- Checking for module 'tesseract'
--   Found tesseract, version 5.3.4
-- Checking for module 'lept'
--   Found lept, version 1.82.0
-- Checking for module 'libcurl'
--   Found libcurl, version 8.2.1
-- Configuring done (0.3s)
-- Generating done (0.0s)
-- Build files have been written to: /home/raphy/tesseract/Examples/builddir
raphy@raohy:~/tesseract/Examples$ 
raphy@raohy:~/tesseract/Examples$ cmake --build builddir/
[ 50%] Building CXX object CMakeFiles/BasicExample.dir/BasicExample.cpp.o
[100%] Linking CXX executable BasicExample
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)::{lambda(char const*)#1}::operator()(char const*) const':
baseapi.cpp:(.text+0x13): undefined reference to `curl_easy_strerror'
/usr/bin/ld: baseapi.cpp:(.text+0x3b): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)':
baseapi.cpp:(.text+0xad07): undefined reference to `curl_easy_init'
/usr/bin/ld: baseapi.cpp:(.text+0xad48): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xad5d): undefined reference to `curl_easy_strerror'
/usr/bin/ld: baseapi.cpp:(.text+0xad89): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: baseapi.cpp:(.text+0xb26f): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb298): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb2c1): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb2fa): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb324): undefined reference to `curl_easy_setopt'
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o):baseapi.cpp:(.text+0xb3c9): more undefined references to `curl_easy_setopt' follow
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)':
baseapi.cpp:(.text+0xb455): undefined reference to `curl_easy_perform'
/usr/bin/ld: baseapi.cpp:(.text+0xb6b0): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: /usr/local/lib/libtesseract.a(tessdatamanager.cpp.o): in function `tesseract::TessdataManager::LoadArchiveFile(char const*)':
tessdatamanager.cpp:(.text+0x199): undefined reference to `archive_read_new'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1ad): undefined reference to `archive_read_support_filter_all'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1b5): undefined reference to `archive_read_support_format_all'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1c5): undefined reference to `archive_read_open_filename'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1e6): undefined reference to `archive_entry_pathname'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1f6): undefined reference to `archive_read_next_header'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x208): undefined reference to `archive_read_free'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x294): undefined reference to `archive_entry_size'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x2e1): undefined reference to `archive_read_data'
collect2: error: ld returned 1 exit status
gmake[2]: *** [CMakeFiles/BasicExample.dir/build.make:99: BasicExample] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/BasicExample.dir/all] Error 2
gmake: *** [Makefile:91: all] Error 2

But this CMakeLists.txt :

# https://stackoverflow.com/questions/28165518/use-libcurl-undefined-reference-to-curl-easy-init
cmake_minimum_required(VERSION 3.5)
project(Url2File)

set(CMAKE_CXX_STANDARD 17)

find_package(PkgConfig REQUIRED)

pkg_check_modules(libcurl REQUIRED IMPORTED_TARGET libcurl)

add_executable(${PROJECT_NAME} WIN32 MACOSX_BUNDLE url2file.c)

target_link_libraries(${PROJECT_NAME} PUBLIC
    -lcurl
)

for this libcurl usage example:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <curl/curl.h>

static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
  size_t written = fwrite(ptr, size, nmemb, (FILE *)stream);
  return written;
}

int main(int argc, char *argv[])
{
  CURL *curl_handle;
  static const char *pagefilename = "page.out";
  FILE *pagefile;

  if(argc < 2) {
    printf("Usage: %s <URL>\n", argv[0]);
    return 1;
  }

  curl_global_init(CURL_GLOBAL_ALL);

  /* init the curl session */
  curl_handle = curl_easy_init();

  /* set URL to get here */
  curl_easy_setopt(curl_handle, CURLOPT_URL, argv[1]);

  /* Switch on full protocol/debug output while testing */
  curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L);

  /* disable progress meter, set to 0L to enable it */
  curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);

  /* send all data to this function  */
  curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);

  /* open the file */
  pagefile = fopen(pagefilename, "wb");
  if(pagefile) {

    /* write the page body to this file handle */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, pagefile);

    /* get it! */
    curl_easy_perform(curl_handle);

    /* close the header file */
    fclose(pagefile);
  }

  /* cleanup curl stuff */
  curl_easy_cleanup(curl_handle);

  curl_global_cleanup();

  return 0;
}

works fine :

raphy@raohy:~/curlExample$ cmake --build builddir/
[ 50%] Building C object CMakeFiles/Url2File.dir/url2file.c.o
[100%] Linking C executable Url2File
[100%] Built target Url2File

Update 1)

I've tried to leave only lcurl as target_link_libraries:

target_link_libraries(BasicExample PUBLIC
    #PkgConfig::leptonica
    #PkgConfig::tesseract
    -lcurl
)

and, actually, the errors about libcurl disappeared:

raphy@raohy:~/tesseract/Examples$ cmake --build builddir/
-- Configuring done (0.0s)
-- Generating done (0.0s)
-- Build files have been written to: /home/raphy/tesseract/Examples/builddir
[ 50%] Building CXX object CMakeFiles/BasicExample.dir/BasicExample.cpp.o
[100%] Linking CXX executable BasicExample
/usr/bin/ld: CMakeFiles/BasicExample.dir/BasicExample.cpp.o: in function `main':
BasicExample.cpp:(.text+0x37): undefined reference to `tesseract::TessBaseAPI::TessBaseAPI()'
/usr/bin/ld: BasicExample.cpp:(.text+0x98): undefined reference to `pixRead'
/usr/bin/ld: BasicExample.cpp:(.text+0xaf): undefined reference to `tesseract::TessBaseAPI::SetImage(Pix*)'
/usr/bin/ld: BasicExample.cpp:(.text+0xbb): undefined reference to `tesseract::TessBaseAPI::GetUTF8Text()'
/usr/bin/ld: BasicExample.cpp:(.text+0xe6): undefined reference to `tesseract::TessBaseAPI::End()'
/usr/bin/ld: BasicExample.cpp:(.text+0x11d): undefined reference to `pixDestroy'
/usr/bin/ld: CMakeFiles/BasicExample.dir/BasicExample.cpp.o: in function `tesseract::TessBaseAPI::Init(char const*, char const*)':
BasicExample.cpp:(.text._ZN9tesseract11TessBaseAPI4InitEPKcS2_[_ZN9tesseract11TessBaseAPI4InitEPKcS2_]+0x43): undefined reference to `tesseract::TessBaseAPI::Init(char const*, char const*, tesseract::OcrEngineMode, char**, int, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const*, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const*, bool)'
collect2: error: ld returned 1 exit status
gmake[2]: *** [CMakeFiles/BasicExample.dir/build.make:97: BasicExample] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/BasicExample.dir/all] Error 2
gmake: *** [Makefile:91: all] Error 2

But got the linking errors of tesseract and leptonica

Based on the answers of this post: Use libcurl undefined reference to 'curl_easy_init' , -lcurl has to be put in the end of gcc command . So... how to link them all correctly?

With this CmakeLists.txt :

cmake_minimum_required(VERSION 3.5)
project(BasicExample)

set(CMAKE_CXX_STANDARD 17)

find_package(PkgConfig REQUIRED)

pkg_check_modules(tesseract REQUIRED IMPORTED_TARGET tesseract)
pkg_check_modules(leptonica REQUIRED IMPORTED_TARGET lept)
pkg_check_moduleS(libcurl REQUIRED IMPORTED_TARGET libcurl)

add_executable(${PROJECT_NAME} WIN32 MACOSX_BUNDLE BasicExample.cpp)


target_link_libraries(BasicExample PUBLIC
    PkgConfig::leptonica
    PkgConfig::tesseract
    -lcurl
)

This is the output of /builddir$ make VERBOSE=1 :

raphy@raohy:~/tesseract/Examples/builddir$ make VERBOSE=1
/usr/local/bin/cmake -S/home/raphy/tesseract/Examples -B/home/raphy/tesseract/Examples/builddir --check-build-system CMakeFiles/Makefile.cmake 0
/usr/local/bin/cmake -E cmake_progress_start /home/raphy/tesseract/Examples/builddir/CMakeFiles /home/raphy/tesseract/Examples/builddir//CMakeFiles/progress.marks
make  -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/raphy/tesseract/Examples/builddir'
make  -f CMakeFiles/BasicExample.dir/build.make CMakeFiles/BasicExample.dir/depend
make[2]: Entering directory '/home/raphy/tesseract/Examples/builddir'
cd /home/raphy/tesseract/Examples/builddir && /usr/local/bin/cmake -E cmake_depends "Unix Makefiles" /home/raphy/tesseract/Examples /home/raphy/tesseract/Examples /home/raphy/tesseract/Examples/builddir /home/raphy/tesseract/Examples/builddir /home/raphy/tesseract/Examples/builddir/CMakeFiles/BasicExample.dir/DependInfo.cmake "--color="
Dependencies file "CMakeFiles/BasicExample.dir/BasicExample.cpp.o.d" is newer than depends file "/home/raphy/tesseract/Examples/builddir/CMakeFiles/BasicExample.dir/compiler_depend.internal".
Consolidate compiler generated dependencies of target BasicExample
make[2]: Leaving directory '/home/raphy/tesseract/Examples/builddir'
make  -f CMakeFiles/BasicExample.dir/build.make CMakeFiles/BasicExample.dir/build
make[2]: Entering directory '/home/raphy/tesseract/Examples/builddir'
[ 50%] Linking CXX executable BasicExample
/usr/local/bin/cmake -E cmake_link_script CMakeFiles/BasicExample.dir/link.txt --verbose=1
/usr/bin/c++ CMakeFiles/BasicExample.dir/BasicExample.cpp.o -o BasicExample  -lcurl /usr/lib/x86_64-linux-gnu/liblept.so /usr/local/lib/libtesseract.a
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)::{lambda(char const*)#1}::operator()(char const*) const':
baseapi.cpp:(.text+0x13): undefined reference to `curl_easy_strerror'
/usr/bin/ld: baseapi.cpp:(.text+0x3b): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)':
baseapi.cpp:(.text+0xad07): undefined reference to `curl_easy_init'
/usr/bin/ld: baseapi.cpp:(.text+0xad48): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xad5d): undefined reference to `curl_easy_strerror'
/usr/bin/ld: baseapi.cpp:(.text+0xad89): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: baseapi.cpp:(.text+0xb26f): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb298): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb2c1): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb2fa): undefined reference to `curl_easy_setopt'
/usr/bin/ld: baseapi.cpp:(.text+0xb324): undefined reference to `curl_easy_setopt'
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o):baseapi.cpp:(.text+0xb3c9): more undefined references to `curl_easy_setopt' follow
/usr/bin/ld: /usr/local/lib/libtesseract.a(baseapi.cpp.o): in function `tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*)':
baseapi.cpp:(.text+0xb455): undefined reference to `curl_easy_perform'
/usr/bin/ld: baseapi.cpp:(.text+0xb6b0): undefined reference to `curl_easy_cleanup'
/usr/bin/ld: /usr/local/lib/libtesseract.a(tessdatamanager.cpp.o): in function `tesseract::TessdataManager::LoadArchiveFile(char const*)':
tessdatamanager.cpp:(.text+0x199): undefined reference to `archive_read_new'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1ad): undefined reference to `archive_read_support_filter_all'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1b5): undefined reference to `archive_read_support_format_all'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1c5): undefined reference to `archive_read_open_filename'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1e6): undefined reference to `archive_entry_pathname'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x1f6): undefined reference to `archive_read_next_header'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x208): undefined reference to `archive_read_free'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x294): undefined reference to `archive_entry_size'
/usr/bin/ld: tessdatamanager.cpp:(.text+0x2e1): undefined reference to `archive_read_data'
collect2: error: ld returned 1 exit status
make[2]: *** [CMakeFiles/BasicExample.dir/build.make:99: BasicExample] Error 1
make[2]: Leaving directory '/home/raphy/tesseract/Examples/builddir'
make[1]: *** [CMakeFiles/Makefile2:83: CMakeFiles/BasicExample.dir/all] Error 2
make[1]: Leaving directory '/home/raphy/tesseract/Examples/builddir'
make: *** [Makefile:91: all] Error 2
raphy@raohy:~/tesseract/Examples/builddir$ 

What am I missing ? How to make the CMakeLists.txt work for the tesseract C++ code example?


Solution

  • It is tesseract library which needs curl. Because PkgConfig::tesseract is an INTERFACE library, you may tell CMake about that dependency via appending to its INTERFACE_LINK_LIBRARIES property:

    ...
    pkg_check_modules(tesseract REQUIRED IMPORTED_TARGET tesseract)
    # pkg-config doesn't know about dependencies of static libraries, so add these dependencies manually.
    set_property(TARGET PkgConfig::tesseract APPEND PROPERTY INTERFACE_LINK_LIBRARIES curl)
    
    target_link_libraries(BasicExample PUBLIC
        PkgConfig::tesseract # With that linkage CMake will automatically add linkage with curl.
    )
    

    The approach above relies on the way how module FindPkgConfig.cmake actually creates a target for IMPORTED_TARGET option. So that approach would work for other static libraries obtained via pkg_check_modules(... IMPORTED_TARGET), but may fail for libraries obtained by other ways.


    Details

    As you can see from VERBOSE output, your linkage with curl comes before the linkage with tesseract. This is the reason of the "undefined reference" error: it is tesseract library which uses curl, and for correctly resolve that linkage curl should come after the tesseract.

    Normally, CMake preserves order of libraries specified in target_link_libraries command, so with the line

    target_link_libraries(BasicExample PUBLIC
        PkgConfig::leptonica
        PkgConfig::tesseract
        -lcurl
    )
    

    one would expect CMake to place linkage with curl after all other libraries.

    Unfortunately, the PkgConfig::tesseract is actually an INTERFACE library, which doesn't reference tesseract by itself, but contains it in its INTERFACE_LINK_LIBRARIES property. When form a linker's command line, CMake adds value of that property after all "immediate" operands of target_link_libraries. This is why CMake places curl after tesseract.

    With suggested approach both tesseract and curl become part of the single property INTERFACE_LINK_LIBRARIES, so CMake preserves their order when create a linker's command line.