I am trying to build Apache Arrow on Windows offline. As per instruction in the website, I have downloaded all the dependencies and set the environment variables:
SET ARROW_BOOST_URL=%ARROW_DEPENDENCY_ROOT%boost-1.67.0.tar.gz
SET ARROW_BROTLI_URL=%ARROW_DEPENDENCY_ROOT%brotli-v1.0.7.tar.gz
SET ARROW_CARES_URL=%ARROW_DEPENDENCY_ROOT%cares-1.15.0.tar.gz
SET ARROW_DOUBLE_CONVERSION_URL=%ARROW_DEPENDENCY_ROOT%double-conversion-v3.1.4.tar.gz
SET ARROW_FLATBUFFERS_URL=%ARROW_DEPENDENCY_ROOT%flatbuffers-v1.10.0.tar.gz
SET ARROW_GBENCHMARK_URL=%ARROW_DEPENDENCY_ROOT%gbenchmark-v1.4.1.tar.gz
SET ARROW_GFLAGS_URL=%ARROW_DEPENDENCY_ROOT%gflags-v2.2.0.tar.gz
SET ARROW_GLOG_URL=%ARROW_DEPENDENCY_ROOT%glog-v0.3.5.tar.gz
SET ARROW_GRPC_URL=%ARROW_DEPENDENCY_ROOT%grpc-v1.20.0.tar.gz
SET ARROW_GTEST_URL=%ARROW_DEPENDENCY_ROOT%gtest-1.8.1.tar.gz
SET ARROW_JEMALLOC_URL=%ARROW_DEPENDENCY_ROOT%jemalloc-5.2.0.tar.gz
SET ARROW_LZ4_URL=%ARROW_DEPENDENCY_ROOT%lz4-v1.8.3.tar.gz
SET ARROW_ORC_URL=%ARROW_DEPENDENCY_ROOT%orc-1.5.5.tar.gz
SET ARROW_PROTOBUF_URL=%ARROW_DEPENDENCY_ROOT%protobuf-v3.7.1.tar.gz
SET ARROW_RAPIDJSON_URL=%ARROW_DEPENDENCY_ROOT%rapidjson-2bbd33b33217ff4a73434ebf10cdac41e2ef5e34.tar.gz
SET ARROW_RE2_URL=%ARROW_DEPENDENCY_ROOT%re2-2019-04-01.tar.gz
SET ARROW_SNAPPY_URL=%ARROW_DEPENDENCY_ROOT%snappy-1.1.7.tar.gz
SET ARROW_THRIFT_URL=%ARROW_DEPENDENCY_ROOT%thrift-0.12.0.tar.gz
SET ARROW_URIPARSER_URL=%ARROW_DEPENDENCY_ROOT%uriparser-0.9.2.tar.gz
SET ARROW_ZLIB_URL=%ARROW_DEPENDENCY_ROOT%zlib-1.2.11.tar.gz
SET ARROW_ZSTD_URL=%ARROW_DEPENDENCY_ROOT%zstd-v1.4.0.tar.gz
I am running the build using the follwoing command:
if not defined DevEnvDir (
call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 -host_arch=amd64
)
cmake -G %GENERATOR% %CMAKE_ARGS% ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DARROW_BUILD_TESTS=OFF ^
-DARROW_BUILD_EXAMPLES=OFF ^
-DARROW_BUILD_STATIC=ON ^
-DARROW_PARQUET=ON ^
-DPARQUET_BUILD_EXECUTABLES=OFF ^
-DARROW_IPC=OFF ^
-DARROW_BUILD_UTILITIES=OFF ^
-DARROW_HDFS=OFF ^
-DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
-DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
-DARROW_BOOST_USE_SHARED=OFF ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
..
cmake --build . --target install --config %CONFIGURATION%
where my variables are set as follows:
set CMAKE_ARGS=%CMAKE_ARGS% ^
-DARROW_DEPENDENCY_SOURCE=BUNDLED ^
-DBOOST_SOURCE=SYSTEM ^
-DTHRIFT_SOURCE=SYSTEM ^
-DBOOST_DEBUG=OFF ^
-DBOOST_USE_STATIC_LIBS=ON ^
-DBOOST_LIBRARYDIR=%BOOST_LIBRARYDIR% ^
-DBOOST_INCLUDEDIR=%BOOST_INCLUDEDIR% ^
-DRAPIDJSON_ROOT=%RAPIDJSON_ROOTDIR% ^
-DTHRIFT_ROOT=%THRIFT_ROOT%
When i run this i will get the following error complaining about not finding the rapidjson header files (even though they exist in the above list):
cpp\src\arrow\json\chunker.cc(25): fatal error C1083: Cannot open include file: 'rapidjson/reader.h': No such file or directory
I was able to go around this by manually copying rapidjson header files next to my source code (ideally I dont want to do this and rather the build system pick it up automatically). Even after this step I am getting the following error:
This seems to be happening due to the fact that thrift was not built properly:
Performing download step (verify and extract) for 'thrift_ep'
-- verifying file...
file='t:\src\apache-arrow\cpp\thirdparty\arrow-dependencies\thrift-0.12.0.tar.gz'
-- verifying file... done
-- extracting...
src='T:/src/apache-arrow/cpp/thirdparty/arrow-dependencies/thrift-0.12.0.tar.gz'
dst='T:/src/apache-arrow/cpp/build_Release/thrift_ep-prefix/src/thrift_ep'
-- extracting... [tar xfz]
-- extracting... [analysis]
-- extracting... [rename]
-- extracting... [clean up]
-- extracting... done
No update step for 'thrift_ep'
No patch step for 'thrift_ep'
Performing configure step for 'thrift_ep'
-- The C compiler identification is MSVC 19.16.27031.1
-- The CXX compiler identification is MSVC 19.16.27031.1
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Parsed Thrift package version: 0.12.0
-- Parsed Thrift version: 0.12.0 (0.2.0)
-- Setting C++11 as the default language level.
-- To specify a different C++ language level, set CMAKE_CXX_STANDARD
BOOST_ROOT:
Boost_LIBRARY_DIR_RELEASE:
BOOST_LIBRARYDIR:
BOOST_INCLUDEDIR:
Boost_USE_STATIC_LIBS: OFF
Boost_DEBUG:
-- libevent NOT found.
-- Could NOT find RUN_HASKELL (missing: RUN_HASKELL)
-- Could NOT find CABAL (missing: CABAL)
-- Looking for arpa/inet.h
-- Looking for arpa/inet.h - not found
-- Looking for fcntl.h
-- Looking for fcntl.h - found
-- Looking for getopt.h
-- Looking for getopt.h - not found
-- Looking for inttypes.h
-- Looking for inttypes.h - found
-- Looking for netdb.h
-- Looking for netdb.h - not found
-- Looking for netinet/in.h
-- Looking for netinet/in.h - not found
-- Looking for signal.h
-- Looking for signal.h - found
-- Looking for stdint.h
-- Looking for stdint.h - found
-- Looking for unistd.h
-- Looking for unistd.h - not found
-- Looking for pthread.h
-- Looking for pthread.h - not found
-- Looking for sys/ioctl.h
-- Looking for sys/ioctl.h - not found
-- Looking for sys/param.h
-- Looking for sys/param.h - not found
-- Looking for sys/resource.h
-- Looking for sys/resource.h - not found
-- Looking for sys/socket.h
-- Looking for sys/socket.h - not found
-- Looking for sys/stat.h
-- Looking for sys/stat.h - found
-- Looking for sys/time.h
-- Looking for sys/time.h - not found
-- Looking for sys/un.h
-- Looking for sys/un.h - not found
-- Looking for poll.h
-- Looking for poll.h - not found
-- Looking for sys/poll.h
-- Looking for sys/poll.h - not found
-- Looking for sys/select.h
-- Looking for sys/select.h - not found
-- Looking for sched.h
-- Looking for sched.h - not found
-- Looking for string.h
-- Looking for string.h - found
-- Looking for strings.h
-- Looking for strings.h - not found
-- Looking for gethostbyname
-- Looking for gethostbyname - not found
-- Looking for gethostbyname_r
-- Looking for gethostbyname_r - not found
-- Looking for strerror_r
-- Looking for strerror_r - not found
-- Looking for sched_get_priority_max
-- Looking for sched_get_priority_max - not found
-- Looking for sched_get_priority_min
-- Looking for sched_get_priority_min - not found
-- Performing Test STRERROR_R_CHAR_P
-- Performing Test STRERROR_R_CHAR_P - Failed
-- Looking for pthread.h
-- Looking for pthread.h - not found
Why all these files were not found? Also, why the path to boost libraries are not set? when building arrow itself the path to Boost is automatically resolved.
This is happening while other dependencies like snappy or brotli are building fine. Any idea why this is happening? is there a dependency that i'm missing here in Windows?
Any help is highly appreciate.
First issue with rapidjson:
According to ThirdpartyToolchain.cmake from apache-arrow-0.14.1.tar.gz
, rapidjson building depends on ARROW_WITH_RAPIDJSON
,
which is automatically set for ARROW_FLIGHT OR ARROW_IPC
.
So you need ARROW_WITH_RAPIDJSON=ON
or ARROW_FLIGHT=ON
or ARROW_IPC=ON
.
Second issue with BOOST:
It is only quess, but I'm using BOOST_ROOT+BOOST_LIBRARYDIR
and not BOOST_LIBRARYDIR+BOOST_INCLUDEDIR
as I see in your code.
In ThirdpartyToolchain.cmake
is following code in build_thrift
macro, which can explain your problems with thrift compilation:
#Thrift also uses boost. Forward important boost settings if there were ones passed.
if(DEFINED BOOST_ROOT)
set(THRIFT_CMAKE_ARGS ${THRIFT_CMAKE_ARGS} "-DBOOST_ROOT=${BOOST_ROOT}")
endif()