Fixed bug caused by default promotion of 32float to 64float by numpy …

…when performing non-in-place multiplication.
jlparkI · Jul 11, 2024 · 5b4606f · 5b4606f
1 parent 2fe7a89
commit 5b4606f
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 21 deletions.
diff --git a/.pyproject.toml.swp b/.pyproject.toml.swp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,14 +26,15 @@ if (NOT SKBUILD)
 endif()
 
 
-
 check_language(CUDA)
 if (CMAKE_CUDA_COMPILER)
     message(STATUS "CUDA is OK")
     include_directories(${CUDA_INCLUDE_DIRS})
     set(CMAKE_CUDA_STANDARD 17)
     set(CMAKE_CUDA_STANDARD_REQUIRED ON)
     add_compile_definitions(USE_CUDA)
+
+    # set(CMAKE_CUDA_ARCHITECTURES 52)
     project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX CUDA)
 else()
     message(STATUS "No CUDA")

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,5 @@
 [build-system]
-requires = ["scikit-build-core >=0.4.3", "scipy", "numpy",
-                "scikit-learn"]
+requires = ["scikit-build-core >=0.4.3", "scipy", "numpy", "scikit-learn"]
 build-backend = "scikit_build_core.build"
 
 [project]
@@ -42,7 +41,6 @@ cmake.version = ">=3.18"
 # set to false, so that even if build-type is set to debug,
 # the symbols will be stripped.
 #cmake.build-type = "Debug"
-#$install.strip = false
-
+#install.strip = false
 
 cmake.build-type = "Release"
diff --git a/xGPR/kernels/basic_kernels/rbf_linear.py b/xGPR/kernels/basic_kernels/rbf_linear.py
@@ -158,20 +158,21 @@ class of kernels and is therefore ignored.
         Returns:
             xtrans: A cupy or numpy array containing the generated features.
         """
-        xtrans = input_x * self.hyperparams[1]
+        xcopy = input_x.copy()
+        input_x *= self.hyperparams[1]
         if self.device == "cpu":
             output_x = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
             rf_features = np.zeros((input_x.shape[0], self.internal_rffs), np.float64)
-            cpuRBFFeatureGen(xtrans, rf_features, self.radem_diag, self.chi_arr,
+            cpuRBFFeatureGen(input_x, rf_features, self.radem_diag, self.chi_arr,
                 self.num_threads, self.fit_intercept, self.simplex_rffs)
         else:
             output_x = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
             rf_features = cp.zeros((input_x.shape[0], self.internal_rffs), cp.float64)
-            cudaRBFFeatureGen(xtrans, rf_features, self.radem_diag, self.chi_arr,
+            cudaRBFFeatureGen(input_x, rf_features, self.radem_diag, self.chi_arr,
                 self.fit_intercept, self.simplex_rffs)
 
         output_x[:,:self.internal_rffs] = rf_features
-        output_x[:,self.internal_rffs:] = input_x
+        output_x[:,self.internal_rffs:] = xcopy
         return output_x
 
 

diff --git a/xGPR/kernels/basic_kernels/sorf_kernel_baseclass.py b/xGPR/kernels/basic_kernels/sorf_kernel_baseclass.py
@@ -131,14 +131,14 @@ class of kernels and is therefore ignored.
         Returns:
             xtrans: A cupy or numpy array containing the generated features.
         """
-        xtrans = input_x * self.hyperparams[1]
+        input_x *= self.hyperparams[1]
         if self.device == "cpu":
-            output_x = np.zeros((xtrans.shape[0], self.num_rffs), np.float64)
-            cpuRBFFeatureGen(xtrans, output_x, self.radem_diag, self.chi_arr,
+            output_x = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
+            cpuRBFFeatureGen(input_x, output_x, self.radem_diag, self.chi_arr,
                 self.num_threads, self.fit_intercept, self.simplex_rffs)
         else:
-            output_x = cp.zeros((xtrans.shape[0], self.num_rffs), cp.float64)
-            cudaRBFFeatureGen(xtrans, output_x, self.radem_diag, self.chi_arr,
+            output_x = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
+            cudaRBFFeatureGen(input_x, output_x, self.radem_diag, self.chi_arr,
                 self.fit_intercept, self.simplex_rffs)
         return output_x
 

diff --git a/xGPR/kernels/convolution_kernels/conv_kernel_baseclass.py b/xGPR/kernels/convolution_kernels/conv_kernel_baseclass.py
@@ -152,16 +152,16 @@ def kernel_specific_transform(self, input_x, sequence_length):
         if input_x.shape[2] != self._xdim[2]:
             raise RuntimeError("Unexpected input shape supplied.")
 
-        x_in = input_x * self.hyperparams[1]
+        input_x *= self.hyperparams[1]
 
         if self.device == "cpu":
             xtrans = np.zeros((input_x.shape[0], self.num_rffs), np.float64)
-            cpuConv1dFGen(x_in, xtrans, self.radem_diag, self.chi_arr,
+            cpuConv1dFGen(input_x, xtrans, self.radem_diag, self.chi_arr,
                     sequence_length, self.conv_width, self.scaling_type,
                     self.num_threads, self.simplex_rffs)
         else:
             xtrans = cp.zeros((input_x.shape[0], self.num_rffs), cp.float64)
-            cudaConv1dFGen(x_in, xtrans, self.radem_diag, self.chi_arr,
+            cudaConv1dFGen(input_x, xtrans, self.radem_diag, self.chi_arr,
                     sequence_length, self.conv_width, self.scaling_type,
                     self.simplex_rffs)
 

diff --git a/xGPR/kernels/kernel_baseclass.py b/xGPR/kernels/kernel_baseclass.py
@@ -282,10 +282,13 @@ def transform_x(self, input_x, sequence_length = None):
         which is none for most kernels but must be specified
         for convolution kernels), generate random features
         as output."""
+        # This always generates a copy, which means that we
+        # are never working on the input data, only on a copy,
+        # and can therefore modify it with impunity.
         if self.double_precision:
-            xin = input_x.astype(np.float64)
+            xin = input_x.astype(np.float64, copy=True)
         else:
-            xin = input_x.astype(np.float32)
+            xin = input_x.astype(np.float32, copy=True)
 
         if self.device == "cuda":
             xin = cp.asarray(xin)
@@ -321,10 +324,13 @@ def gradient_x(self, input_x, sequence_length = None):
         which is none for most kernels but must be specified
         for convolution kernels), generate random features
         and gradient as output."""
+        # This always generates a copy, which means that we
+        # are never working on the input data, only on a copy,
+        # and can therefore modify it with impunity.
         if self.double_precision:
-            xin = input_x.astype(np.float64)
+            xin = input_x.astype(np.float64, copy=True)
         else:
-            xin = input_x.astype(np.float32)
+            xin = input_x.astype(np.float32, copy=True)
 
         if self.device == "cuda":
             xin = cp.asarray(xin)