Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to Cython 3.0.0 #5506

Merged
merged 11 commits into from
Aug 7, 2023
2 changes: 1 addition & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if [[ "$(arch)" == "aarch64" ]]; then
fi

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cuml*.whl)[test]
Expand Down
8 changes: 4 additions & 4 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ dependencies:
- cudf==23.10.*
- cupy>=12.0.0
- cxx-compiler
- cython>=0.29,<0.30
- dask-core>=2023.5.1
- cython>=3.0.0
- dask-core==2023.7.1
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- dask-ml
- dask>=2023.5.1
- distributed>=2023.5.1
- dask==2023.7.1
- distributed==2023.7.1
- doxygen=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
8 changes: 4 additions & 4 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ dependencies:
- cudf==23.10.*
- cupy>=12.0.0
- cxx-compiler
- cython>=0.29,<0.30
- dask-core>=2023.5.1
- cython>=3.0.0
- dask-core==2023.7.1
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- dask-ml
- dask>=2023.5.1
- distributed>=2023.5.1
- dask==2023.7.1
- distributed==2023.7.1
- doxygen=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ requirements:
- cuda-python ==12.0.0
{% endif %}
- cudf ={{ minor_version }}
- cython >=0.29,<0.30
- cython >=3.0.0
- libcuml ={{ version }}
- libcumlprims ={{ minor_version }}
- pylibraft ={{ minor_version }}
Expand All @@ -76,9 +76,9 @@ requirements:
- cudf ={{ minor_version }}
- cupy >=12.0.0
- dask-cudf ={{ minor_version }}
- dask >=2023.5.1
- dask-core>=2023.5.1
- distributed >=2023.5.1
- dask ==2023.7.1
- dask-core==2023.7.1
- distributed ==2023.7.1
- joblib >=0.11
- libcuml ={{ version }}
- libcumlprims ={{ minor_version }}
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/umap/knn_graph/algo.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ inline void launcher(const raft::handle_t& handle,
out.knn_indices,
out.knn_dists,
n_neighbors,
true,
true,
static_cast<std::vector<int64_t>*>(nullptr),
params->metric,
params->p);
}
Expand Down
10 changes: 6 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- scikit-build>=0.13.1
- cython>=0.29,<0.30
- cython>=3.0.0
- &treelite treelite==3.2.0
- pylibraft==23.10.*
- rmm==23.10.*
Expand All @@ -175,10 +175,10 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- cudf==23.10.*
- dask>=2023.5.1
- dask==2023.7.1
- dask-cuda==23.10.*
- dask-cudf==23.10.*
- distributed>=2023.5.1
- distributed==2023.7.1
- joblib>=0.11
- numba>=0.57
# TODO: Is scipy really a hard dependency, or should
Expand All @@ -192,7 +192,7 @@ dependencies:
- cupy>=12.0.0
- output_types: conda
packages:
- dask-core>=2023.5.1
- dask-core==2023.7.1
- output_types: pyproject
packages:
- *treelite_runtime
Expand Down Expand Up @@ -360,9 +360,11 @@ dependencies:
common:
- output_types: [conda, requirements]
packages:
- dask-ml==2023.3.24
- jupyter
- matplotlib
- numpy
- pandas
- *scikit_learn
- seaborn

4 changes: 2 additions & 2 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ Packages required for multigpu algorithms*:
- ucx-py version matching the cuML version
- dask-cudf version matching the cuML version
- nccl>=2.5
- dask>=2023.5.1
- distributed>=2023.5.1
- dask==2023.7.1
- distributed==2023.7.1

* this can be avoided with `--singlegpu` argument flag.

Expand Down
5 changes: 4 additions & 1 deletion python/cuml/internals/base_return_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def _get_base_return_type(class_name, attr):
# A NameError is raised if the return type is the same as the
# type being defined (which is incomplete). Check that here and
# return base if the name matches
if attr.__annotations__["return"] == class_name:
# Cython 3 changed to preferring types rather than strings for
# annotations. Strings end up wrapped in an extra layer of quotes,
# which we have to replace here.
if attr.__annotations__["return"].replace("'", "") == class_name:
return "base"
except Exception:
assert False, "Shouldn't get here"
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/internals/logger.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil:
Logger& get()
void setLevel(int level)
void setPattern(const string& pattern)
void setCallback(void(*callback)(int, char*))
void setFlush(void(*flush)())
void setCallback(void(*callback)(int, const char*) except *)
void setFlush(void(*flush)() except *)
bool shouldLogFor(int level) const
int getLevel() const
string getPattern() const
Expand Down
49 changes: 40 additions & 9 deletions python/cuml/manifold/simpl_set.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ from cuml.internals.safe_imports import gpu_only_import
cp = gpu_only_import('cupy')

from cuml.manifold.umap_utils cimport *
from cuml.manifold.umap_utils import GraphHolder, find_ab_params
from cuml.manifold.umap_utils import GraphHolder, find_ab_params, \
metric_parsing

from cuml.internals.input_utils import input_to_cuml_array
from cuml.internals.array import CumlArray
Expand Down Expand Up @@ -82,10 +83,17 @@ def fuzzy_simplicial_set(X,
structure to the detriment of the larger picture.
random_state: numpy RandomState or equivalent
A state capable being used as a numpy random state.
metric: string or function (optional, default 'euclidean')
unused
metric_kwds: dict (optional, default {})
unused
metric: string (default='euclidean').
Distance metric to use. Supported distances are ['l1, 'cityblock',
'taxicab', 'manhattan', 'euclidean', 'l2', 'sqeuclidean', 'canberra',
'minkowski', 'chebyshev', 'linf', 'cosine', 'correlation', 'hellinger',
'hamming', 'jaccard']
Metrics that take arguments (such as minkowski) can have arguments
passed via the metric_kwds dictionary.
Note: The 'jaccard' distance metric is only supported for sparse
inputs.
metric_kwds: dict (optional, default=None)
Metric argument
knn_indices: array of shape (n_samples, n_neighbors) (optional)
If the k-nearest neighbors of each point has already been calculated
you can pass them in here to save computation time. This should be
Expand Down Expand Up @@ -138,6 +146,14 @@ def fuzzy_simplicial_set(X,
umap_params.deterministic = <bool> deterministic
umap_params.set_op_mix_ratio = <float> set_op_mix_ratio
umap_params.local_connectivity = <float> local_connectivity
try:
umap_params.metric = metric_parsing[metric.lower()]
except KeyError:
raise ValueError(f"Invalid value for metric: {metric}")
if metric_kwds is None:
umap_params.p = <float> 2.0
else:
umap_params.p = <float> metric_kwds.get("p", 2.0)
umap_params.verbosity = <int> verbose

X_m, _, _, _ = \
Expand Down Expand Up @@ -245,10 +261,17 @@ def simplicial_set_embedding(
* A numpy array of initial embedding positions.
random_state: numpy RandomState or equivalent
A state capable being used as a numpy random state.
metric: string or callable
unused
metric_kwds: dict
unused
metric: string (default='euclidean').
Distance metric to use. Supported distances are ['l1, 'cityblock',
'taxicab', 'manhattan', 'euclidean', 'l2', 'sqeuclidean', 'canberra',
'minkowski', 'chebyshev', 'linf', 'cosine', 'correlation', 'hellinger',
'hamming', 'jaccard']
Metrics that take arguments (such as minkowski) can have arguments
passed via the metric_kwds dictionary.
Note: The 'jaccard' distance metric is only supported for sparse
inputs.
metric_kwds: dict (optional, default=None)
Metric argument
output_metric: function
Function returning the distance between two points in embedding space
and the gradient of the distance wrt the first argument.
Expand Down Expand Up @@ -306,6 +329,14 @@ def simplicial_set_embedding(
umap_params.init = <int> 0
umap_params.random_state = <int> random_state
umap_params.deterministic = <bool> deterministic
try:
umap_params.metric = metric_parsing[metric.lower()]
except KeyError:
raise ValueError(f"Invalid value for metric: {metric}")
if metric_kwds is None:
umap_params.p = <float> 2.0
else:
umap_params.p = <float> metric_kwds.get("p", 2.0)
if output_metric == 'euclidean':
umap_params.target_metric = MetricType.EUCLIDEAN
else: # output_metric == 'categorical'
Expand Down
53 changes: 20 additions & 33 deletions python/cuml/manifold/umap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ cupyx = gpu_only_import('cupyx')
cuda = gpu_only_import('numba.cuda')

from cuml.manifold.umap_utils cimport *
from cuml.manifold.umap_utils import GraphHolder, find_ab_params
from cuml.manifold.umap_utils import GraphHolder, find_ab_params, \
metric_parsing, DENSE_SUPPORTED_METRICS, SPARSE_SUPPORTED_METRICS

from cuml.common.sparsefuncs import extract_knn_infos
from cuml.internals.safe_imports import gpu_only_import_from
Expand All @@ -47,7 +48,6 @@ from cuml.internals.array import CumlArray
from cuml.internals.array_sparse import SparseCumlArray
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.common.sparse_utils import is_sparse
from cuml.metrics.distance_type cimport DistanceType

from cuml.manifold.simpl_set import fuzzy_simplicial_set # no-cython-lint
from cuml.manifold.simpl_set import simplicial_set_embedding # no-cython-lint
Expand Down Expand Up @@ -152,13 +152,17 @@ class UMAP(UniversalBase,
n_components: int (optional, default 2)
The dimension of the space to embed into. This defaults to 2 to
provide easy visualization, but can reasonably be set to any
metric : string (default='euclidean').
metric: string (default='euclidean').
Distance metric to use. Supported distances are ['l1, 'cityblock',
'taxicab', 'manhattan', 'euclidean', 'l2', 'sqeuclidean', 'canberra',
'minkowski', 'chebyshev', 'linf', 'cosine', 'correlation', 'hellinger',
'hamming', 'jaccard']
Metrics that take arguments (such as minkowski) can have arguments
passed via the metric_kwds dictionary.
Note: The 'jaccard' distance metric is only supported for sparse
inputs.
metric_kwds: dict (optional, default=None)
Metric argument
n_epochs: int (optional, default None)
The number of training epochs to be used in optimizing the
low dimensional embedding. Larger values result in more accurate
Expand Down Expand Up @@ -419,7 +423,7 @@ class UMAP(UniversalBase,
raise ValueError("min_dist should be <= spread")

@staticmethod
def _build_umap_params(cls):
def _build_umap_params(cls, sparse):
cdef UMAPParams* umap_params = new UMAPParams()
umap_params.n_neighbors = <int> cls.n_neighbors
umap_params.n_components = <int> cls.n_components
Expand Down Expand Up @@ -448,37 +452,20 @@ class UMAP(UniversalBase,
umap_params.random_state = <uint64_t> cls.random_state
umap_params.deterministic = <bool> cls.deterministic

# metric
metric_parsing = {
"l2": DistanceType.L2SqrtUnexpanded,
"euclidean": DistanceType.L2SqrtUnexpanded,
"sqeuclidean": DistanceType.L2Unexpanded,
"cityblock": DistanceType.L1,
"l1": DistanceType.L1,
"manhattan": DistanceType.L1,
"taxicab": DistanceType.L1,
"minkowski": DistanceType.LpUnexpanded,
"chebyshev": DistanceType.Linf,
"linf": DistanceType.Linf,
"cosine": DistanceType.CosineExpanded,
"correlation": DistanceType.CorrelationExpanded,
"hellinger": DistanceType.HellingerExpanded,
"hamming": DistanceType.HammingUnexpanded,
"jaccard": DistanceType.JaccardExpanded,
"canberra": DistanceType.Canberra
}

if cls.metric.lower() in metric_parsing:
try:
umap_params.metric = metric_parsing[cls.metric.lower()]
else:
raise ValueError("Invalid value for metric: {}"
.format(cls.metric))

if sparse:
if umap_params.metric not in SPARSE_SUPPORTED_METRICS:
raise NotImplementedError(f"Metric '{cls.metric}' not supported for sparse inputs.")
elif umap_params.metric not in DENSE_SUPPORTED_METRICS:
raise NotImplementedError(f"Metric '{cls.metric}' not supported for dense inputs.")

except KeyError:
raise ValueError(f"Invalid value for metric: {cls.metric}")
if cls.metric_kwds is None:
umap_params.p = <float> 2.0
else:
umap_params.p = <float>cls.metric_kwds.get('p')

umap_params.p = <float> cls.metric_kwds.get("p", 2.0)
cdef uintptr_t callback_ptr = 0
if cls.callback:
callback_ptr = cls.callback.get_native_callback()
Expand Down Expand Up @@ -576,7 +563,7 @@ class UMAP(UniversalBase,
cdef uintptr_t embed_raw = self.embedding_.ptr

cdef UMAPParams* umap_params = \
<UMAPParams*> <size_t> UMAP._build_umap_params(self)
<UMAPParams*> <size_t> UMAP._build_umap_params(self, self.sparse_fit)

cdef uintptr_t y_raw = 0

Expand Down Expand Up @@ -742,7 +729,7 @@ class UMAP(UniversalBase,
cdef uintptr_t embed_ptr = self.embedding_.ptr

cdef UMAPParams* umap_params = \
<UMAPParams*> <size_t> UMAP._build_umap_params(self)
<UMAPParams*> <size_t> UMAP._build_umap_params(self, self.sparse_fit)

if self.sparse_fit:
transform_sparse(handle_[0],
Expand Down
Loading