Skip to content

Commit

Permalink
Add faiss support in jni (opensearch-project#28)
Browse files Browse the repository at this point in the history
Signed-off-by: Jack Mazanec <jmazane1@nd.edu>
  • Loading branch information
jmazanec15 committed Oct 22, 2021
1 parent d7c55e9 commit 104a6ac
Show file tree
Hide file tree
Showing 32 changed files with 3,969 additions and 1,091 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "jni/external/nmslib"]
path = jni/external/nmslib
url = https://github.com/nmslib/nmslib.git
[submodule "jni/external/faiss"]
path = jni/external/faiss
url = https://github.com/facebookresearch/faiss.git
184 changes: 144 additions & 40 deletions jni/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@

cmake_minimum_required(VERSION 2.8)

project(KNNIndexV2_0_11)
project(KNNPlugin_JNI)

# ---------------------------------- SETUP ----------------------------------
# Target library to be compiled
set(TARGET_LIB OpensearchKNN)

# Corner case. For CMake 2.8, there is no option to specify set(CMAKE_CXX_STANDARD 11). Instead, the flag manually needs
# to be set.
Expand All @@ -36,15 +40,32 @@ else()
set(CMAKE_CXX_STANDARD_REQUIRED True)
endif()

# Target Library to be built
set(KNN_INDEX KNNIndexV2_0_11)
set(KNN_PACKAGE_NAME opensearch-knnlib)
# Set OS specific variables
if (${CMAKE_SYSTEM_NAME} STREQUAL Darwin)
set(CMAKE_MACOSX_RPATH 1)
set(JVM_OS_TYPE darwin)
set(LIB_EXT .jnilib)
elseif(${CMAKE_SYSTEM_NAME} STREQUAL Linux)
set(JVM_OS_TYPE linux)
set(LIB_EXT .so)
else()
message(FATAL_ERROR "Unable to run on system: ${CMAKE_SYSTEM_NAME}")
endif()

if(NOT KNN_PLUGIN_VERSION)
set(KNN_PLUGIN_VERSION "1.2.0.0")
endif()

# Check if similarity search exists
# Set architecture specific variables
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)
set(MACH_ARCH arm64)
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
set(MACH_ARCH x64)
endif()
# ----------------------------------------------------------------------------

# ---------------------------------- NMSLIB ----------------------------------
# Check if nmslib exists
find_path(NMS_REPO_DIR NAMES similarity_search PATHS ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib)

# If not, pull the updated submodule
Expand All @@ -53,67 +74,150 @@ if (NOT EXISTS ${NMS_REPO_DIR})
execute_process(COMMAND git submodule update --init -- external/nmslib WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif ()

# Add the subdirectory so it is possible to use its targets
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search EXCLUDE_FROM_ALL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search)
# ---------------------------------------------------------------------------

# ---------------------------------- FAISS ----------------------------------
# Avoid building faiss tests
set(BUILD_TESTING OFF)

# Set OS specific variables
if (${CMAKE_SYSTEM_NAME} STREQUAL Darwin)
set(CMAKE_MACOSX_RPATH 1)
set(JVM_OS_TYPE darwin)
set(LIB_EXT .jnilib)
elseif(${CMAKE_SYSTEM_NAME} STREQUAL Linux)
set(JVM_OS_TYPE linux)
set(LIB_EXT .so)
else()
message( FATAL_ERROR "Unable to run on system: ${CMAKE_SYSTEM_NAME}")
if(CMAKE_C_COMPILER_ID MATCHES "Clang\$")
set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp")
set(OpenMP_C_LIB_NAMES "omp")
set(OpenMP_omp_LIBRARY /usr/local/opt/libomp/lib/libomp.dylib)
endif()

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang\$")
set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I/usr/local/opt/libomp/include")
set(OpenMP_CXX_LIB_NAMES "omp")
set(OpenMP_omp_LIBRARY /usr/local/opt/libomp/lib/libomp.dylib)
endif()
endif()

# Compile the library
add_library(${KNN_INDEX} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/org_opensearch_knn_index_v2011_KNNIndex.cpp)
target_link_libraries(${KNN_INDEX} NonMetricSpaceLib)
target_include_directories(${KNN_INDEX} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include $ENV{JAVA_HOME}/include $ENV{JAVA_HOME}/include/${JVM_OS_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include)
find_package(OpenMP REQUIRED)
find_package(ZLIB REQUIRED)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)

set_target_properties(${KNN_INDEX} PROPERTIES SUFFIX ${LIB_EXT})
set_target_properties(${KNN_INDEX} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${KNN_INDEX} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/release)
# Check if faiss exists
find_path(FAISS_REPO_DIR NAMES faiss PATHS ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss)

# If not, pull the updated submodule
if (NOT EXISTS ${FAISS_REPO_DIR})
message(STATUS "Could not find faiss. Pulling updated submodule.")
execute_process(COMMAND git submodule update --init -- external/faiss WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif ()

set(FAISS_ENABLE_GPU OFF)
set(FAISS_ENABLE_PYTHON OFF)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external/faiss EXCLUDE_FROM_ALL)
# ---------------------------------------------------------------------------

# ------------------------------ Lib Compiling ------------------------------
add_library(${TARGET_LIB} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/org_opensearch_knn_index_JNIService.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/jni_util.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/nmslib_wrapper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/faiss_wrapper.cpp)
target_link_libraries(${TARGET_LIB} faiss NonMetricSpaceLib ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} OpenMP::OpenMP_CXX)
target_include_directories(${TARGET_LIB} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include $ENV{JAVA_HOME}/include $ENV{JAVA_HOME}/include/${JVM_OS_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/external/faiss ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include)

set_target_properties(${TARGET_LIB} PROPERTIES SUFFIX ${LIB_EXT})
set_target_properties(${TARGET_LIB} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${TARGET_LIB} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/release)
# ---------------------------------------------------------------------------

# --------------------------------- TESTS -----------------------------------
# Reference - https://crascit.com/2015/07/25/cmake-gtest/
configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
execute_process(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download"
)
execute_process(COMMAND "${CMAKE_COMMAND}" --build .
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download"
)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

add_subdirectory("${CMAKE_BINARY_DIR}/googletest-src"
"${CMAKE_BINARY_DIR}/googletest-build"
)
add_executable(
jni_test
tests/faiss_wrapper_test.cpp
tests/nmslib_wrapper_test.cpp
tests/test_util.cpp)

target_link_libraries(
jni_test
gtest_main
gmock_main
faiss
NonMetricSpaceLib
${BLAS_LIBRARIES}
${LAPACK_LIBRARIES}
OpenMP::OpenMP_CXX
${TARGET_LIB}
)

target_include_directories(jni_test PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/tests
${CMAKE_CURRENT_SOURCE_DIR}/include
$ENV{JAVA_HOME}/include
$ENV{JAVA_HOME}/include/${JVM_OS_TYPE}
${CMAKE_CURRENT_SOURCE_DIR}/external/faiss
${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include
${gtest_SOURCE_DIR}/include
${gmock_SOURCE_DIR}/include)


set_target_properties(jni_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)

# ---------------------------------------------------------------------------

# -------------------------------- INSTALL ----------------------------------
# Installation rules for shared library
install(TARGETS ${KNN_INDEX}
install(TARGETS ${TARGET_LIB}
LIBRARY DESTINATION lib
COMPONENT library)

# CPack section to build artifacts
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL aarch64)
set(MACH_ARCH arm64)
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64)
set(MACH_ARCH x64)
endif()

set(KNN_MAINTAINER "OpenSearch Team <opensearch@amazon.com>")
set(OPENSEARCH_DOWNLOAD_URL "https://opensearch.org/downloads.html")
set(CPACK_PACKAGE_NAME ${KNN_PACKAGE_NAME})
set(CPACK_PACKAGE_VERSION ${KNN_PLUGIN_VERSION})
set(CMAKE_INSTALL_PREFIX /usr)
set(CPACK_GENERATOR "RPM;DEB")
SET(CPACK_OUTPUT_FILE_PREFIX packages)
set(CPACK_OUTPUT_FILE_PREFIX packages)
set(CPACK_PACKAGE_RELEASE 1)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "KNN JNI library built off of NMSLIB for OpenSearch.")
set(CPACK_PACKAGE_VENDOR "Amazon")
set(CPACK_PACKAGE_CONTACT "Maintainer: ${KNN_MAINTAINER}")
set(CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${JVM_OS_TYPE}-${MACH_ARCH}")
set(CPACK_COMPONENTS_GROUPING IGNORE)
get_cmake_property(CPACK_COMPONENTS_ALL COMPONENTS)
list(REMOVE_ITEM CPACK_COMPONENTS_ALL "Unspecified")

# RPM Specific variables
set(CPACK_RPM_PACKAGE_RELEASE ${CPACK_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_URL ${OPENSEARCH_DOWNLOAD_URL})
set(CPACK_RPM_PACKAGE_DESCRIPTION "OpenSearch KNN JNI Library")
# Component variable
set(KNN_PACKAGE_NAME opensearch-knnlib)
set(KNN_PACKAGE_DESCRIPTION "KNN JNI library built off of nmslib and faiss for OpenSearch")

# RPM
set(CPACK_RPM_PACKAGE_LICENSE "ASL-2.0")
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_PACKAGE_URL ${OPENSEARCH_DOWNLOAD_URL})
set(CPACK_RPM_PACKAGE_RELEASE ${CPACK_PACKAGE_RELEASE})

# DEB Specific variables
set(CPACK_RPM_PACKAGE_NAME ${KNN_PACKAGE_NAME})
set(CPACK_RPM_FILE_NAME "${CPACK_RPM_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${JVM_OS_TYPE}-${MACH_ARCH}.rpm")
set(CPACK_RPM_PACKAGE_DESCRIPTION ${KNN_PACKAGE_DESCRIPTION})
set(CPACK_RPM_PACKAGE_SUMMARY "OpenSearch k-NN JNI Library with nmslib and faiss")

# DEB
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${OPENSEARCH_DOWNLOAD_URL})
set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${KNN_MAINTAINER})
set(CPACK_DEBIAN_PACKAGE_SOURCE ${CPACK_PACKAGE_NAME})
set(CPACK_DEBIAN_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION})
set(CPACK_DEBIAN_PACKAGE_SECTION "libs")
set(CPACK_DEB_COMPONENT_INSTALL ON)

set(CPACK_DEBIAN_PACKAGE_NAME ${KNN_PACKAGE_NAME})
set(CPACK_DEBIAN_FILE_NAME "${CPACK_DEBIAN_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${JVM_OS_TYPE}-${MACH_ARCH}.deb")
set(CPACK_DEBIAN_DESCRIPTION ${KNN_PACKAGE_DESCRIPTION})
set(CPACK_DEBIAN_PACKAGE_SOURCE ${CPACK_DEBIAN_PACKAGE_NAME})

include(CPack)
# ---------------------------------------------------------------------------
1 change: 1 addition & 0 deletions jni/external/faiss
Submodule faiss added at 88eabe
55 changes: 55 additions & 0 deletions jni/include/faiss_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

#ifndef OPENSEARCH_KNN_FAISS_WRAPPER_H
#define OPENSEARCH_KNN_FAISS_WRAPPER_H

#include <jni.h>

namespace knn_jni {
namespace faiss_wrapper {
// Create an index with ids and vectors. The configuration is defined by values in the Java map, parametersJ.
// The index is serialized to indexPathJ.
void CreateIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jintArray idsJ, jobjectArray vectorsJ,
jstring indexPathJ, jobject parametersJ);

// Create an index with ids and vectors. Instead of creating a new index, this function creates the index
// based off of the template index passed in. The index is serialized to indexPathJ.
void CreateIndexFromTemplate(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jintArray idsJ,
jobjectArray vectorsJ, jstring indexPathJ, jbyteArray templateIndexJ);

// Load an index from indexPathJ into memory.
//
// Return a pointer to the loaded index
jlong LoadIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jstring indexPathJ);

// Execute a query against the index located in memory at indexPointerJ.
//
// Return an array of KNNQueryResults
jobjectArray QueryIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jlong indexPointerJ,
jfloatArray queryVectorJ, jint kJ);

// Free the index located in memory at indexPointerJ
void Free(jlong indexPointer);

// Perform initilization operations for the library
void InitLibrary();

// Create an empty index defined by the values in the Java map, parametersJ. Train the index with
// the vector of floats located at trainVectorsPointerJ.
//
// Return the serialized representation
jbyteArray TrainIndex(knn_jni::JNIUtilInterface * jniUtil, JNIEnv * env, jobject parametersJ, jint dimension,
jlong trainVectorsPointerJ);
}
}

#endif //OPENSEARCH_KNN_FAISS_WRAPPER_H
Loading

0 comments on commit 104a6ac

Please sign in to comment.