Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17817: [C++] Let ORC compile on MSVC if it is activated #14208

Merged
merged 19 commits into from
Oct 15, 2022
Merged
6 changes: 5 additions & 1 deletion .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ jobs:
ARROW_HOME: /usr
ARROW_JEMALLOC: OFF
ARROW_MIMALLOC: ON
ARROW_ORC: ON
ARROW_PARQUET: ON
ARROW_USE_GLOG: OFF
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF
Expand Down Expand Up @@ -286,7 +287,10 @@ jobs:
bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
- name: Test
shell: bash
run: ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
run: |
# For ORC
export TZDIR=/c/msys64/usr/share/zoneinfo
ci/scripts/cpp_test.sh $(pwd) $(pwd)/build

windows-mingw:
name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} C++
Expand Down
7 changes: 3 additions & 4 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_HDFS=ON ^
-DARROW_JSON=ON ^
-DARROW_MIMALLOC=ON ^
-DARROW_ORC=ON ^
-DARROW_PARQUET=ON ^
-DARROW_S3=%ARROW_S3% ^
-DARROW_SUBSTRAIT=ON ^
Expand All @@ -93,13 +94,11 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
.. || exit /B
cmake --build . --target install --config Release || exit /B

@rem Needed so arrow-python-test.exe works
set OLD_PYTHONHOME=%PYTHONHOME%
set PYTHONHOME=%CONDA_PREFIX%
@rem For ORC C++
set TZDIR=%CONDA_PREFIX%\share\zoneinfo

ctest --output-on-failure || exit /B

set PYTHONHOME=%OLD_PYTHONHOME%
popd

@rem
Expand Down
9 changes: 3 additions & 6 deletions ci/conda_env_cpp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,19 @@ brotli
bzip2
c-ares
cmake
flatbuffers
gflags
glog
gmock>=1.10.0
google-cloud-cpp>=1.34.0
# 1.45.0 appears to segfault on Windows/AppVeyor
grpc-cpp>=1.27.3,<1.45.0
grpc-cpp
gtest>=1.10.0
libprotobuf
libutf8proc
lz4-c
make
ninja
# Required by google-cloud-cpp, the Conda package is missing the dependency:
# https://github.com/conda-forge/google-cloud-cpp-feedstock/issues/28
nlohmann_json
orc
pkg-config
python
rapidjson
Expand All @@ -46,4 +44,3 @@ thrift-cpp>=0.11.0
xsimd
zlib
zstd
flatbuffers
3 changes: 1 addition & 2 deletions ci/scripts/java_jni_windows_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ install_dir=${build_dir}/cpp-install
: ${ARROW_BUILD_TESTS:=ON}
: ${ARROW_DATASET:=ON}
export ARROW_DATASET
# We can enable this after ARROW-17817 is resolved.
: ${ARROW_ORC:=OFF}
: ${ARROW_ORC:=ON}
export ARROW_ORC
: ${ARROW_PARQUET:=ON}
: ${ARROW_S3:=ON}
Expand Down
2 changes: 0 additions & 2 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ endmacro()

macro(resolve_option_dependencies)
if(MSVC_TOOLCHAIN)
# ARROW-17817: ORC can't be built on Windows.
set(ARROW_ORC OFF)
# Plasma using glog is not fully tested on windows.
set(ARROW_USE_GLOG OFF)
endif()
Expand Down
28 changes: 19 additions & 9 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4286,6 +4286,9 @@ macro(build_orc)
get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY)

get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY)

# Weirdly passing in PROTOBUF_LIBRARY for PROTOC_LIBRARY still results in ORC finding
# the protoc library.
set(ORC_CMAKE_ARGS
Expand All @@ -4305,8 +4308,8 @@ macro(build_orc)
"-DPROTOBUF_INCLUDE_DIR=${ORC_PROTOBUF_INCLUDE_DIR}"
"-DPROTOBUF_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
"-DPROTOC_LIBRARY=${ORC_PROTOBUF_LIBRARY}"
"-DLZ4_HOME=${LZ4_HOME}"
"-DZSTD_HOME=${ZSTD_HOME}")
"-DLZ4_HOME=${ORC_LZ4_ROOT}"
"-DZSTD_HOME=${ORZ_ZSTD_ROOT}")
if(ORC_PROTOBUF_EXECUTABLE)
set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS}
"-DPROTOBUF_EXECUTABLE:FILEPATH=${ORC_PROTOBUF_EXECUTABLE}")
Expand All @@ -4322,20 +4325,27 @@ macro(build_orc)
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS})

add_dependencies(toolchain orc_ep)
CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

set(ORC_VENDORED 1)
add_dependencies(orc_ep ZLIB::ZLIB)
add_dependencies(orc_ep LZ4::lz4)
add_dependencies(orc_ep ${Snappy_TARGET})
add_dependencies(orc_ep ${ARROW_PROTOBUF_LIBPROTOBUF})

add_library(orc::liborc STATIC IMPORTED)
set_target_properties(orc::liborc
PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}")
target_link_libraries(orc::liborc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET})
if(NOT MSVC)
if(NOT APPLE)
target_link_libraries(orc::liborc INTERFACE Threads::Threads)
endif()
target_link_libraries(orc::liborc INTERFACE ${CMAKE_DL_LIBS})
endif()

add_dependencies(toolchain orc_ep)
add_dependencies(orc::liborc orc_ep)
Expand Down
15 changes: 1 addition & 14 deletions cpp/src/arrow/adapters/orc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,14 @@ install(FILES adapter.h options.h
# pkg-config support
arrow_add_pkg_config("arrow-orc")

set(ORC_MIN_TEST_LIBS
GTest::gtest_main
GTest::gtest
${Snappy_TARGET}
LZ4::lz4
ZLIB::ZLIB)

if(ARROW_BUILD_STATIC)
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_static arrow_static)
else()
set(ARROW_LIBRARIES_FOR_STATIC_TESTS arrow_testing_shared arrow_shared)
endif()

if(APPLE)
set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} ${CMAKE_DL_LIBS})
elseif(NOT MSVC)
set(ORC_MIN_TEST_LIBS ${ORC_MIN_TEST_LIBS} pthread ${CMAKE_DL_LIBS})
endif()

set(ORC_STATIC_TEST_LINK_LIBS orc::liborc ${ARROW_LIBRARIES_FOR_STATIC_TESTS}
${ORC_MIN_TEST_LIBS})
GTest::gtest_main GTest::gtest)

add_arrow_test(adapter_test
PREFIX
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/arrow/adapters/orc/adapter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@ namespace arrow {

using internal::checked_pointer_cast;

constexpr int kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
constexpr int kDefaultMemStreamSize = 10 * 1024 * 1024;
constexpr size_t kDefaultSmallMemStreamSize = 16384 * 5; // 80KB
constexpr size_t kDefaultMemStreamSize = 10 * 1024 * 1024;
constexpr int64_t kNanoMax = std::numeric_limits<int64_t>::max();
constexpr int64_t kNanoMin = std::numeric_limits<int64_t>::lowest();
const int64_t kMicroMax = std::floor(kNanoMax / 1000);
const int64_t kMicroMin = std::ceil(kNanoMin / 1000);
const int64_t kMilliMax = std::floor(kMicroMax / 1000);
const int64_t kMilliMin = std::ceil(kMicroMin / 1000);
const int64_t kSecondMax = std::floor(kMilliMax / 1000);
const int64_t kSecondMin = std::ceil(kMilliMin / 1000);
const int64_t kMicroMax = static_cast<int64_t>(std::floor(kNanoMax / 1000));
const int64_t kMicroMin = static_cast<int64_t>(std::ceil(kNanoMin / 1000));
const int64_t kMilliMax = static_cast<int64_t>(std::floor(kMicroMax / 1000));
const int64_t kMilliMin = static_cast<int64_t>(std::ceil(kMicroMin / 1000));
const int64_t kSecondMax = static_cast<int64_t>(std::floor(kMilliMax / 1000));
const int64_t kSecondMin = static_cast<int64_t>(std::ceil(kMilliMin / 1000));

static constexpr random::SeedType kRandomSeed = 0x0ff1ce;

class MemoryOutputStream : public liborc::OutputStream {
public:
explicit MemoryOutputStream(ssize_t capacity)
explicit MemoryOutputStream(size_t capacity)
: data_(capacity), name_("MemoryOutputStream"), length_(0) {}

uint64_t getLength() const override { return length_; }
Expand Down Expand Up @@ -86,12 +86,13 @@ class MemoryOutputStream : public liborc::OutputStream {
std::shared_ptr<Buffer> GenerateFixedDifferenceBuffer(int32_t fixed_length,
int64_t length) {
BufferBuilder builder;
int32_t offsets[length];
std::vector<int32_t> offsets;
offsets.resize(length);
ARROW_EXPECT_OK(builder.Resize(4 * length));
for (int32_t i = 0; i < length; i++) {
offsets[i] = fixed_length * i;
for (int64_t i = 0; i < length; i++) {
offsets[i] = static_cast<int32_t>(fixed_length * i);
}
ARROW_EXPECT_OK(builder.Append(offsets, 4 * length));
ARROW_EXPECT_OK(builder.Append(offsets.data(), 4 * length));
std::shared_ptr<Buffer> buffer;
ARROW_EXPECT_OK(builder.Finish(&buffer));
return buffer;
Expand Down
5 changes: 3 additions & 2 deletions dev/tasks/java-jars/github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ jobs:
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
REM For ORC
set TZDIR=/c/msys64/usr/share/zoneinfo
bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist"
- name: Compress into single artifact to keep directory structure
shell: bash
Expand Down Expand Up @@ -148,8 +150,7 @@ jobs:
test -f arrow/java-dist/arrow_dataset_jni.dll
test -f arrow/java-dist/libarrow_orc_jni.dylib
test -f arrow/java-dist/libarrow_orc_jni.so
# We can enable this after ARROW-17817 is resolved.
# test -f arrow/java-dist/arrow_orc_jni.dll
test -f arrow/java-dist/arrow_orc_jni.dll
test -f arrow/java-dist/libgandiva_jni.dylib
test -f arrow/java-dist/libgandiva_jni.so
test -f arrow/java-dist/libplasma_java.dylib
Expand Down