Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2048 add support for numpy 2 #2050

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/set_persistent_storage_env_vars/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: 'Set Persistent storages env variables'
description: 'Set the necessary variables for Persistent storage tests'
inputs:
bucket: {default: 'arcticdb-ci-test-bucket-02', type: string, description: The name of the S3 bucket that we will test against}
endpoint: {default: 'https://s3.eu-west-1.amazonaws.com', type: string, description: The address of the S3 endpoint}
endpoint: {default: 'http://s3.eu-west-1.amazonaws.com', type: string, description: The address of the S3 endpoint}
region: {default: 'eu-west-1', type: string, description: The S3 region of the bucket}
aws_access_key: {required: true, type: string, description: The value for the AWS Access key}
aws_secret_key: {required: true, type: string, description: The value for the AWS Secret key}
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/setup_deps/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ runs:
rm /tmp/sccache.tar.gz

which gcc
which g++
which g++
19 changes: 9 additions & 10 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
uses: ./.github/workflows/cibw_docker_image.yml
permissions: {packages: write}
with:
cibuildwheel_ver: "2.12.1"
cibuildwheel_ver: "2.21.3"
force_update: false

common_config:
Expand Down Expand Up @@ -155,24 +155,23 @@ jobs:
strategy:
fail-fast: false
matrix:
python3: ${{fromJson(vars.LINUX_PYTHON_VERSIONS || '[6, 7, 8, 9, 10, 11]')}}
python3: ${{fromJson(vars.LINUX_PYTHON_VERSIONS || '[7, 8, 9, 10, 11, 12]')}}
include:
- python_deps_ids: [""]
matrix_override: ${{fromJson(needs.common_config.outputs.linux_matrix)}}
pytest_xdist_mode: "--dist worksteal"
- python3: 6
python_deps_ids: ["", -compat36]
matrix_override:
- ${{fromJson(needs.common_config.outputs.linux_matrix)[0]}}
- python_deps_id: -compat36
python_deps: requirements-compatibility-py36.txt
pytest_xdist_mode: "" # worksteal Not supported on Python 3.6
- python3: 8
python_deps_ids: ["", -compat38]
matrix_override:
- ${{fromJson(needs.common_config.outputs.linux_matrix)[0]}}
- python_deps_id: -compat38
python_deps: requirements-compatibility-py38.txt
- python3: 11
python_deps_ids: ["", -compat311]
matrix_override:
- ${{fromJson(needs.common_config.outputs.linux_matrix)[0]}}
- python_deps_id: -compat311
python_deps: requirements-compatibility-py311.txt
name: 3.${{matrix.python3}} Linux
uses: ./.github/workflows/build_steps.yml
secrets: inherit
Expand Down Expand Up @@ -203,7 +202,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python3: ${{fromJson(vars.WINDOWS_PYTHON_VERSIONS || '[7, 8, 9, 10, 11]')}}
python3: ${{fromJson(vars.WINDOWS_PYTHON_VERSIONS || '[7, 8, 9, 10, 11, 12]')}}
name: 3.${{matrix.python3}} Windows
uses: ./.github/workflows/build_steps.yml
secrets: inherit
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/persistent_storage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ jobs:
- name: Checkout
uses: actions/checkout@v3.3.0

# The latest version of arcticdb will no longer publish wheels for Python 3.6
# So we skip the seed if we are testing the latest version with Python 3.6
- name: Skip if unsupported
if: inputs.arcticdb_version == 'latest' && inputs.python3 == '6'
run: exit 0

- name: Select Python (Linux)
if: matrix.os == 'linux'
run: echo /opt/python/${{env.python_impl_name}}*/bin >> $GITHUB_PATH
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __pycache__/
.mypy_cache
.hypothesis/
*.egg-info/
python/venvs/

.vscode/
.vs/
Expand Down
2 changes: 2 additions & 0 deletions build_tooling/requirements-compatibility-py311.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Makes sure we are able to use Numpy 1
numpy<2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what matrix of Python (minor) and Pandas/numpy (major) versions are now covered by the CI?

7 changes: 0 additions & 7 deletions build_tooling/requirements-compatibility-py36.txt

This file was deleted.

8 changes: 2 additions & 6 deletions build_tooling/transform_asv_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def df_to_asv_json(results_df: pd.DataFrame):
1 basic_functions.BasicFunctions.time_read_batch [[0.2775141046000044, 0.597266279600126, 0.379... ... {'<build>': 515.5997927188873, '<setup_cache b... 2
"""
new_df = results_df.copy()
new_df["date"] = (pd.to_datetime(results_df["date"]).astype(int) // 1000000).astype(
object
)
new_df["date"] = (pd.to_datetime(results_df["date"]).astype(int) // 1000000).astype(object)
new_df["version"] = results_df["version"].astype(object)

metadata = {
Expand Down Expand Up @@ -71,9 +69,7 @@ def asv_json_to_df(full_path: str) -> pd.DataFrame:

results_list = []
for test_name, test_results in data["results"].items():
flattened_data = pd.json_normalize(
{"test_name": test_name, "results": str(test_results)}
)
flattened_data = pd.json_normalize({"test_name": test_name, "results": str(test_results)})
flattened_data["commit_hash"] = data["commit_hash"]
flattened_data["env_name"] = data["env_name"]
flattened_data["date"] = data["date"]
Expand Down
9 changes: 6 additions & 3 deletions cpp/CMake/PythonUtils.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Helpers
function(_python_utils_dump_vars_targets _target _props)
if(TARGET ${_target})
Expand Down Expand Up @@ -102,7 +101,7 @@ endfunction()

# Enhanced FindPython
if(DEFINED ARCTICDB_FIND_PYTHON_DEV_MODE)
message("Using supplied ARCTICDB_FIND_PYTHON_DEV_MODE=${ARCTICDB_FIND_PYTHON_DEV_MODE}.")
message(STATUS "Using supplied ARCTICDB_FIND_PYTHON_DEV_MODE=${ARCTICDB_FIND_PYTHON_DEV_MODE}.")
if("${ARCTICDB_FIND_PYTHON_DEV_MODE}" STREQUAL "pybind11")
set(ARCTICDB_FIND_PYTHON_DEV_MODE "")
endif()
Expand All @@ -125,13 +124,16 @@ if(ARCTICDB_FIND_PYTHON_DEV_MODE)
if(NOT Python_EXECUTABLE AND NOT Python_ROOT_DIR AND NOT Python_LIBRARY)
# FindPython searches the PATH environment last, but that's arguably the only correct place it should look
find_program(Python_EXECUTABLE NAMES python3 python NAMES_PER_DIR REQUIRED NO_CMAKE_SYSTEM_PATH)
set(PYTHON_EXECUTABLE ${Python_EXECUTABLE} CACHE FILEPATH "Python executable found by FindPython")
else()
vasil-pashov marked this conversation as resolved.
Show resolved Hide resolved
set(Python_FIND_STRATEGY LOCATION)
endif()

# Let CMake find Python without telling it the BUILD_PYTHON_VERSION we wanted. This way we know third-party stuff that
# is not aware of BUILD_PYTHON_VERSION is going to find the same thing
find_package(Python 3 COMPONENTS Interpreter ${ARCTICDB_FIND_PYTHON_DEV_MODE} REQUIRED)
set(PYTHON_INCLUDE_DIRS ${Python_INCLUDE_DIRS})

python_utils_dump_vars_if_enabled("After our FindPython before any third-party:")

if(DEFINED Python_FIND_ABI)
Expand All @@ -146,7 +148,7 @@ if(ARCTICDB_FIND_PYTHON_DEV_MODE)
MAP_IMPORTED_CONFIG_RELWITHDEBINFO ";RELEASE"
)
endif()

set(PYBIND11_FINDPYTHON OFF)
else()
set(ARCTICDB_PYTHON_PREFIX PYTHON)
python_utils_check_include_dirs("supplied")
Expand All @@ -158,3 +160,4 @@ else()

set(PYBIND11_FINDPYTHON OFF)
endif()

4 changes: 3 additions & 1 deletion cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ else()
add_compile_definitions(_LIBCPP_DISABLE_AVAILABILITY)

SET(HIDE_LINKED_SYMBOLS OFF)

find_package(pybind11 REQUIRED)
find_package(PCRE REQUIRED)
find_package(Libevent REQUIRED)
Expand Down Expand Up @@ -377,6 +377,7 @@ set(arcticdb_srcs
util/lazy.hpp
util/type_traits.hpp
util/variant.hpp
util/gil_safe_py_none.hpp
version/de_dup_map.hpp
version/op_log.hpp
version/schema_checks.hpp
Expand Down Expand Up @@ -509,6 +510,7 @@ set(arcticdb_srcs
util/type_handler.cpp
version/key_block.hpp
version/key_block.cpp
util/gil_safe_py_none.cpp
version/local_versioned_engine.cpp
version/op_log.cpp
version/snapshot.cpp
Expand Down
4 changes: 3 additions & 1 deletion cpp/arcticdb/entity/native_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,16 @@ struct NativeTensor {
[[nodiscard]] auto expanded_dim() const { return expanded_dim_; }
template<typename T>
const T *ptr_cast(size_t pos) const {
util::check(ptr != nullptr, "Unexpected null ptr in NativeTensor");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this no longer needed?

const bool dimension_condition = ndim() == 1;
const bool elsize_condition = elsize_ != 0;
const bool strides_condition = strides_[0] % elsize_ == 0;
const bool strides_condition = (elsize_condition) && (strides_[0] % elsize_ == 0);
util::warn(dimension_condition, "Cannot safely ptr_cast matrices in NativeTensor");
util::warn(elsize_condition, "Cannot safely ptr_cast when elsize_ is zero in NativeTensor");
util::warn(strides_condition,
"Cannot safely ptr_cast to type of size {} when strides ({}) is not a multiple of elsize ({}) in NativeTensor with dtype {}",
sizeof(T), strides_[0], elsize_, data_type());

int64_t signed_pos = pos;
if (dimension_condition && elsize_condition && strides_condition) {
signed_pos *= strides_[0] / elsize_;
Expand Down
7 changes: 4 additions & 3 deletions cpp/arcticdb/pipeline/frame_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
#include <arcticdb/python/python_to_tensor_frame.hpp>
#include <arcticdb/pipeline/string_pool_utils.hpp>
#include <arcticdb/util/offset_string.hpp>
#include <util/flatten_utils.hpp>
#include <arcticdb/entity/timeseries_descriptor.hpp>
#include <arcticdb/entity/type_utils.hpp>
#include <arcticdb/util/flatten_utils.hpp>
#include <arcticdb/util/gil_safe_py_none.hpp>

namespace arcticdb {

Expand Down Expand Up @@ -135,7 +136,7 @@ std::optional<convert::StringEncodingError> aggregator_set_data(
if (!c_style)
ptr_data = flatten_tensor<PyObject*>(flattened_buffer, rows_to_write, tensor, slice_num, regular_slice_size);

auto none = py::none{};
auto none = GilSafePyNone::instance();
std::variant<convert::StringEncodingError, convert::PyStringWrapper> wrapper_or_error;
// GIL will be acquired if there is a string that is not pure ASCII/UTF-8
// In this case a PyObject will be allocated by convert::py_unicode_to_buffer
Expand All @@ -147,7 +148,7 @@ std::optional<convert::StringEncodingError> aggregator_set_data(
auto out_ptr = reinterpret_cast<entity::position_t*>(column.buffer().data());
auto& string_pool = agg.segment().string_pool();
for (size_t s = 0; s < rows_to_write; ++s, ++ptr_data) {
if (*ptr_data == none.ptr()) {
if (*ptr_data == none->ptr()) {
*out_ptr++ = not_a_string();
} else if(is_py_nan(*ptr_data)){
*out_ptr++ = nan_placeholder();
Expand Down
7 changes: 3 additions & 4 deletions cpp/arcticdb/python/python_handlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,18 @@ namespace arcticdb {


static inline PyObject** fill_with_none(PyObject** ptr_dest, size_t count, SpinLock& spin_lock) {
auto none = py::none();
auto none = GilSafePyNone::instance();
for(auto i = 0U; i < count; ++i)
*ptr_dest++ = none.ptr();
*ptr_dest++ = none->ptr();

spin_lock.lock();
for(auto i = 0U; i < count; ++i)
none.inc_ref();
Py_INCREF(none->ptr());
spin_lock.unlock();
return ptr_dest;
}

static inline PyObject** fill_with_none(ChunkedBuffer& buffer, size_t offset, size_t count, SpinLock& spin_lock) {
auto none = py::none();
auto dest = buffer.ptr_cast<PyObject*>(offset, count * sizeof(PyObject*));
return fill_with_none(dest, count, spin_lock);
}
Expand Down
14 changes: 8 additions & 6 deletions cpp/arcticdb/python/python_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <arcticdb/python/python_handlers.hpp>
#include <arcticdb/util/pybind_mutex.hpp>
#include <arcticdb/util/storage_lock.hpp>
#include <arcticdb/util/gil_safe_py_none.hpp>

#include <pybind11/pybind11.h>
#include <mongocxx/exception/logic_error.hpp>
Expand Down Expand Up @@ -213,26 +214,26 @@ void register_error_code_ecosystem(py::module& m, py::exception<arcticdb::Arctic
try {
if (p) std::rethrow_exception(p);
} catch (const mongocxx::v_noabi::logic_error& e){
user_input_exception(e.what());
py::set_error(user_input_exception, e.what());
} catch (const UserInputException& e){
user_input_exception(e.what());
py::set_error(user_input_exception, e.what());
} catch (const arcticdb::InternalException& e){
internal_exception(e.what());
py::set_error(internal_exception, e.what());
} catch (const LMDBMapFullException& e) {
std::string msg = fmt::format("E5003: LMDB map is full. Close and reopen your LMDB backed Arctic instance with a "
"larger map size. For example to open `/tmp/a/b/` with a map size of 5GB, "
"use `Arctic(\"lmdb:///tmp/a/b?map_size=5GB\")`. Also see the "
"[LMDB documentation](http://www.lmdb.tech/doc/group__mdb.html#gaa2506ec8dab3d969b0e609cd82e619e5). "
"LMDB info: message=[{}]", e.what());
lmdb_map_full_exception(msg.c_str());
py::set_error(lmdb_map_full_exception, msg.c_str());
} catch (const StorageException& e) {
storage_exception(e.what());
py::set_error(storage_exception, e.what());
} catch (const py::stop_iteration &e){
// let stop iteration bubble up, since this is how python implements iteration termination
std::rethrow_exception(p);
} catch (const std::exception &e) {
std::string msg = fmt::format("{}({})", arcticdb::get_type_name(typeid(e)), e.what());
internal_exception(msg.c_str());
py::set_error(internal_exception, msg.c_str());
}
});

Expand Down Expand Up @@ -312,6 +313,7 @@ PYBIND11_MODULE(arcticdb_ext, m) {
auto programName ="__arcticdb_logger__";
google::InitGoogleLogging(programName);
using namespace arcticdb;
GilSafePyNone::instance(); // Ensure that the GIL is held when the static py::none gets allocated
#ifndef WIN32
// No fork() in Windows, so no need to register the handler
pthread_atfork(nullptr, nullptr, &SingleThreadMutexHolder::reset_mutex);
Expand Down
4 changes: 2 additions & 2 deletions cpp/arcticdb/python/python_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ void DynamicStringReducer::reduce(const Column& source_column,

void DynamicStringReducer::finalize() {
if (row_ != total_rows_) {
auto none = py::none{};
auto none = GilSafePyNone::instance();;
const auto diff = total_rows_ - row_;
for (; row_ < total_rows_; ++row_, ++ptr_dest_) {
*ptr_dest_ = none.ptr();
*ptr_dest_ = none->ptr();
}

increment_none_refcount(diff, none);
Expand Down
Loading
Loading