Skip to content

Commit

Permalink
Fixed bug caused by default promotion of 32float to 64float by numpy …
Browse files Browse the repository at this point in the history
…when performing non-in-place multiplication.
  • Loading branch information
jlparkI committed Jul 11, 2024
1 parent 2fe7a89 commit 5b4606f
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 21 deletions.
Binary file removed .pyproject.toml.swp
Binary file not shown.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ if (NOT SKBUILD)
endif()



check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
message(STATUS "CUDA is OK")
include_directories(${CUDA_INCLUDE_DIRS})
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
add_compile_definitions(USE_CUDA)

# set(CMAKE_CUDA_ARCHITECTURES 52)
project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX CUDA)
else()
message(STATUS "No CUDA")
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[build-system]
requires = ["scikit-build-core >=0.4.3", "scipy", "numpy",
"scikit-learn"]
requires = ["scikit-build-core >=0.4.3", "scipy", "numpy", "scikit-learn"]
build-backend = "scikit_build_core.build"

[project]
Expand Down Expand Up @@ -42,7 +41,6 @@ cmake.version = ">=3.18"
# set to false, so that even if build-type is set to debug,
# the symbols will be stripped.
#cmake.build-type = "Debug"
#$install.strip = false

#install.strip = false

cmake.build-type = "Release"
9 changes: 5 additions & 4 deletions xGPR/kernels/basic_kernels/rbf_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,21 @@ class of kernels and is therefore ignored.
Returns:
xtrans: A cupy or numpy array containing the generated features.
"""
xtrans = input_x * self.hyperparams[1]
xcopy = input_x.copy()
input_x *= self.hyperparams[1]
if self.device == "cpu":
output_x = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
rf_features = np.zeros((input_x.shape[0], self.internal_rffs), np.float64)
cpuRBFFeatureGen(xtrans, rf_features, self.radem_diag, self.chi_arr,
cpuRBFFeatureGen(input_x, rf_features, self.radem_diag, self.chi_arr,
self.num_threads, self.fit_intercept, self.simplex_rffs)
else:
output_x = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
rf_features = cp.zeros((input_x.shape[0], self.internal_rffs), cp.float64)
cudaRBFFeatureGen(xtrans, rf_features, self.radem_diag, self.chi_arr,
cudaRBFFeatureGen(input_x, rf_features, self.radem_diag, self.chi_arr,
self.fit_intercept, self.simplex_rffs)

output_x[:,:self.internal_rffs] = rf_features
output_x[:,self.internal_rffs:] = input_x
output_x[:,self.internal_rffs:] = xcopy
return output_x


Expand Down
10 changes: 5 additions & 5 deletions xGPR/kernels/basic_kernels/sorf_kernel_baseclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,14 @@ class of kernels and is therefore ignored.
Returns:
xtrans: A cupy or numpy array containing the generated features.
"""
xtrans = input_x * self.hyperparams[1]
input_x *= self.hyperparams[1]
if self.device == "cpu":
output_x = np.zeros((xtrans.shape[0], self.num_rffs), np.float64)
cpuRBFFeatureGen(xtrans, output_x, self.radem_diag, self.chi_arr,
output_x = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
cpuRBFFeatureGen(input_x, output_x, self.radem_diag, self.chi_arr,
self.num_threads, self.fit_intercept, self.simplex_rffs)
else:
output_x = cp.zeros((xtrans.shape[0], self.num_rffs), cp.float64)
cudaRBFFeatureGen(xtrans, output_x, self.radem_diag, self.chi_arr,
output_x = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
cudaRBFFeatureGen(input_x, output_x, self.radem_diag, self.chi_arr,
self.fit_intercept, self.simplex_rffs)
return output_x

Expand Down
6 changes: 3 additions & 3 deletions xGPR/kernels/convolution_kernels/conv_kernel_baseclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,16 @@ def kernel_specific_transform(self, input_x, sequence_length):
if input_x.shape[2] != self._xdim[2]:
raise RuntimeError("Unexpected input shape supplied.")

x_in = input_x * self.hyperparams[1]
input_x *= self.hyperparams[1]

if self.device == "cpu":
xtrans = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
cpuConv1dFGen(x_in, xtrans, self.radem_diag, self.chi_arr,
cpuConv1dFGen(input_x, xtrans, self.radem_diag, self.chi_arr,
sequence_length, self.conv_width, self.scaling_type,
self.num_threads, self.simplex_rffs)
else:
xtrans = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
cudaConv1dFGen(x_in, xtrans, self.radem_diag, self.chi_arr,
cudaConv1dFGen(input_x, xtrans, self.radem_diag, self.chi_arr,
sequence_length, self.conv_width, self.scaling_type,
self.simplex_rffs)

Expand Down
14 changes: 10 additions & 4 deletions xGPR/kernels/kernel_baseclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,13 @@ def transform_x(self, input_x, sequence_length = None):
which is none for most kernels but must be specified
for convolution kernels), generate random features
as output."""
# This always generates a copy, which means that we
# are never working on the input data, only on a copy,
# and can therefore modify it with impunity.
if self.double_precision:
xin = input_x.astype(np.float64)
xin = input_x.astype(np.float64, copy=True)
else:
xin = input_x.astype(np.float32)
xin = input_x.astype(np.float32, copy=True)

if self.device == "cuda":
xin = cp.asarray(xin)
Expand Down Expand Up @@ -321,10 +324,13 @@ def gradient_x(self, input_x, sequence_length = None):
which is none for most kernels but must be specified
for convolution kernels), generate random features
and gradient as output."""
# This always generates a copy, which means that we
# are never working on the input data, only on a copy,
# and can therefore modify it with impunity.
if self.double_precision:
xin = input_x.astype(np.float64)
xin = input_x.astype(np.float64, copy=True)
else:
xin = input_x.astype(np.float32)
xin = input_x.astype(np.float32, copy=True)

if self.device == "cuda":
xin = cp.asarray(xin)
Expand Down

0 comments on commit 5b4606f

Please sign in to comment.