rapidsai · rapids-bot · Nov 17, 2021 · Sep 30, 2021 · Sep 30, 2021 · Oct 4, 2021
@@ -309,6 +309,7 @@ if(BUILD_CUML_CPP_LIBRARY)
         src/spectral/spectral.cu
         src/svm/svc.cu
         src/svm/svr.cu
+        src/svm/linear.cu
         src/svm/ws_util.cu
         src/tsa/auto_arima.cu
         src/tsa/stationarity.cu

@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/handle.hpp>
+
+namespace ML {
+namespace SVM {
+
+struct LinearSVMParams {
+  /** The regularization term. */
+  enum Penalty {
+    /** Abs. value of the weights: `sum |w|` */
+    L1,
+    /** Squared value of the weights: `sum w^2` */
+    L2
+  };
+  /** The loss function. */
+  enum Loss {
+    /** `max(1 - y_i x_i w, 0)` */
+    HINGE,
+    /** `max(1 - y_i x_i w, 0)^2` */
+    SQUARED_HINGE,
+    /** `max(|y_i - x_i w| - epsilon, 0)` */
+    EPSILON_INSENSITIVE,
+    /** `max(|y_i - x_i w| - epsilon, 0)^2` */
+    SQUARED_EPSILON_INSENSITIVE
+  };
+
+  /** The regularization term. */
+  Penalty penalty = L2;
+  /** The loss function. */
+  Loss loss = HINGE;
+  /** Whether to fit the bias term. */
+  bool fit_intercept = true;
+  /** When true, the bias term is treated the same way as other data features.
+   *  Enabling this feature forces an extra copying the input data X.
+   */
+  bool penalized_intercept = false;
+  /** Whether to estimate probabilities using Platt scaling (applicable to SVC). */
+  bool probability = false;
+  /** Maximum number of iterations for the underlying QN solver. */
+  int max_iter = 1000;
+  /**
+   * Maximum number of linesearch (inner loop) iterations for the underlying QN solver.
+   */
+  int linesearch_max_iter = 100;
+  /**
+   * Number of vectors approximating the hessian for the underlying QN solver (l-bfgs).
+   */
+  int lbfgs_memory = 5;
+  /** Triggers extra output when greater than zero. */
+  int verbose = 0;
+  /**
+   * The constant scaling factor of the main term in the loss function.
+   * (You can also think of that as the inverse factor of the penalty term).
+   */
+  double C = 1.0;
+  /** The threshold on the gradient for the underlying QN solver. */
+  double grad_tol = 0.0001;
+  /** The threshold on the function change for the underlying QN solver. */
+  double change_tol = 0.00001;
+  /** The epsilon-sensitivity parameter (applicable to the SVM-regression (SVR) loss functions). */
+  double epsilon = 0.0;
+};
+
+template <typename T>
+struct LinearSVMModel {
+  /**
+   * C-style (row-major) matrix of coefficients of size `(coefRows, coefCols)`
+   * where
+   *   coefRows = nCols + (params.fit_intercept ? 1 : 0)
+   *   coefCols = nClasses == 2 ? 1 : nClasses
+   */
+  T* w;
+  /** Sorted, unique values of input array `y`. */
+  T* classes = nullptr;
+  /**
+   * C-style (row-major) matrix of the probabolistic model calibration coefficients.
+   * It's empty if `LinearSVMParams.probability == false`.
+   * Otherwise, it's size is `(2, coefCols)`.
+   * where
+   *   coefCols = nClasses == 2 ? 1 : nClasses
+   */
+  T* probScale = nullptr;
+  /** Number of classes (not applicable for regression). */
+  std::size_t nClasses = 0;
+  /** Number of rows of `w`, which is the number of data features plus maybe bias. */
+  std::size_t coefRows;
+
+  /** It's 1 for binary classification or regression; nClasses for multiclass. */
+  inline std::size_t coefCols() const { return nClasses <= 2 ? 1 : nClasses; }
+
+  /**
+   * @brief Allocate and fit the LinearSVM model.
+   *
+   * @param [in] handle the cuML handle.
+   * @param [in] params the model parameters.
+   * @param [in] X the input data matrix of size (nRows, nCols) in column-major format.
+   * @param [in] nRows the number of input samples.
+   * @param [in] nCols the number of feature dimensions.
+   * @param [in] y the target - a single vector of either real (regression) or
+   *               categorical (classification) values (nRows, ).
+   * @param [in] sampleWeight the non-negative weights for the training sample (nRows, ).
+   * @return the trained model (don't forget to call `free` on it after use).
+   */
+  static LinearSVMModel<T> fit(const raft::handle_t& handle,
+                               const LinearSVMParams& params,
+                               const T* X,
+                               const std::size_t nRows,
+                               const std::size_t nCols,
+                               const T* y,
+                               const T* sampleWeight);
+
+  /**
+   * @brief Explicitly allocate the data for the model without training it.
+   *
+   * @param [in] handle the cuML handle.
+   * @param [in] params the model parameters.
+   * @param [in] nCols the number of feature dimensions.
+   * @param [in] nClasses the number of classes in the dataset (not applicable for regression).
+   * @return the trained model (don't forget to call `free` on it after use).
+   */
+  static LinearSVMModel<T> allocate(const raft::handle_t& handle,
+                                    const LinearSVMParams& params,
+                                    const std::size_t nCols,
+                                    const std::size_t nClasses = 0);
+
+  /** @brief Free the allocated memory. The model is not usable after the call of this method. */
+  static void free(const raft::handle_t& handle, LinearSVMModel<T>& model);
+
+  /**
+   * @brief Predict using the trained LinearSVM model.
+   *
+   * @param [in] handle the cuML handle.
+   * @param [in] params the model parameters.
+   * @param [in] model the trained model.
+   * @param [in] X the input data matrix of size (nRows, nCols) in column-major format.
+   * @param [in] nRows the number of input samples.
+   * @param [in] nCols the number of feature dimensions.
+   * @param [out] out the predictions (nRows, ).
+   */
+  static void predict(const raft::handle_t& handle,
+                      const LinearSVMParams& params,
+                      const LinearSVMModel<T>& model,
+                      const T* X,
+                      const std::size_t nRows,
+                      const std::size_t nCols,
+                      T* out);
+
+  /**
+   * @brief For SVC, predict the probabilities for each outcome.
+   *
+   * @param [in] handle the cuML handle.
+   * @param [in] params the model parameters.
+   * @param [in] model the trained model.
+   * @param [in] X the input data matrix of size (nRows, nCols) in column-major format.
+   * @param [in] nRows the number of input samples.
+   * @param [in] nCols the number of feature dimensions.
+   * @param [in] log whether to output log-probabilities instead of probabilities.
+   * @param [out] out the estimated probabilities (nRows, nClasses) in row-major format.
+   */
+  static void predictProba(const raft::handle_t& handle,
+                           const LinearSVMParams& params,
+                           const LinearSVMModel<T>& model,
+                           const T* X,
+                           const std::size_t nRows,
+                           const std::size_t nCols,
+                           const bool log,
+                           T* out);
+};
+
+}  // namespace SVM
+}  // namespace ML
@@ -195,7 +195,7 @@ void qnFit(const raft::handle_t& cuml_handle,
         w0,
         f,
         num_iters,
-        loss_type,
+        (QN_LOSS_TYPE)loss_type,
         cuml_handle.get_stream(),
         sample_weight);
 }
@@ -241,7 +241,7 @@ void qnFit(const raft::handle_t& cuml_handle,
         w0,
         f,
         num_iters,
-        loss_type,
+        (QN_LOSS_TYPE)loss_type,
         cuml_handle.get_stream(),
         sample_weight);
 }
@@ -291,7 +291,7 @@ void qnFitSparse(const raft::handle_t& cuml_handle,
               w0,
               f,
               num_iters,
-              loss_type,
+              (QN_LOSS_TYPE)loss_type,
               cuml_handle.get_stream(),
               sample_weight);
 }
@@ -341,7 +341,7 @@ void qnFitSparse(const raft::handle_t& cuml_handle,
               w0,
               f,
               num_iters,
-              loss_type,
+              (QN_LOSS_TYPE)loss_type,
               cuml_handle.get_stream(),
               sample_weight);
 }
@@ -365,7 +365,7 @@ void qnDecisionFunction(const raft::handle_t& cuml_handle,
                      C,
                      fit_intercept,
                      params,
-                     loss_type,
+                     (QN_LOSS_TYPE)loss_type,
                      preds,
                      cuml_handle.get_stream());
 }
@@ -389,7 +389,7 @@ void qnDecisionFunction(const raft::handle_t& cuml_handle,
                      C,
                      fit_intercept,
                      params,
-                     loss_type,
+                     (QN_LOSS_TYPE)loss_type,
                      scores,
                      cuml_handle.get_stream());
 }
@@ -417,7 +417,7 @@ void qnDecisionFunctionSparse(const raft::handle_t& cuml_handle,
                            C,
                            fit_intercept,
                            params,
-                           loss_type,
+                           (QN_LOSS_TYPE)loss_type,
                            scores,
                            cuml_handle.get_stream());
 }
@@ -445,7 +445,7 @@ void qnDecisionFunctionSparse(const raft::handle_t& cuml_handle,
                            C,
                            fit_intercept,
                            params,
-                           loss_type,
+                           (QN_LOSS_TYPE)loss_type,
                            scores,
                            cuml_handle.get_stream());
 }
@@ -469,7 +469,7 @@ void qnPredict(const raft::handle_t& cuml_handle,
             C,
             fit_intercept,
             params,
-            loss_type,
+            (QN_LOSS_TYPE)loss_type,
             scores,
             cuml_handle.get_stream());
 }
@@ -493,7 +493,7 @@ void qnPredict(const raft::handle_t& cuml_handle,
             C,
             fit_intercept,
             params,
-            loss_type,
+            (QN_LOSS_TYPE)loss_type,
             preds,
             cuml_handle.get_stream());
 }
@@ -521,7 +521,7 @@ void qnPredictSparse(const raft::handle_t& cuml_handle,
                   C,
                   fit_intercept,
                   params,
-                  loss_type,
+                  (QN_LOSS_TYPE)loss_type,
                   preds,
                   cuml_handle.get_stream());
 }
@@ -549,7 +549,7 @@ void qnPredictSparse(const raft::handle_t& cuml_handle,
                   C,
                   fit_intercept,
                   params,
-                  loss_type,
+                  (QN_LOSS_TYPE)loss_type,
                   preds,
                   cuml_handle.get_stream());
 }

@@ -128,36 +128,55 @@ struct GLMBase : GLMDims {
    * 2. loss_val <- sum loss(Z)
    *
    * Default: elementwise application of loss and its derivative
+   *
+   * NB: for this method to work, loss implementations must have two functor fields `lz` and `dlz`.
+   *     These two compute loss value and its derivative w.r.t. `z`.
    */
   inline void getLossAndDZ(T* loss_val,
                            SimpleDenseMat<T>& Z,
                            const SimpleVec<T>& y,
                            cudaStream_t stream)
   {
     // Base impl assumes simple case C = 1
-    Loss* loss = static_cast<Loss*>(this);
-
     // TODO would be nice to have a kernel that fuses these two steps
     // This would be easy, if mapThenSumReduce allowed outputing the result of
     // map (supporting inplace)
+    auto lz_copy  = static_cast<Loss*>(this)->lz;
+    auto dlz_copy = static_cast<Loss*>(this)->dlz;
     if (this->sample_weights) {  // Sample weights are in use
       T normalization = 1.0 / this->weights_sum;
-      auto f_l        = [=] __device__(const T y, const T z, const T weight) {
-        return loss->lz(y, z) * (weight * normalization);
-      };
-      raft::linalg::mapThenSumReduce(loss_val, y.len, f_l, stream, y.data, Z.data, sample_weights);
-
-      auto f_dl = [=] __device__(const T y, const T z, const T weight) {
-        return weight * loss->dlz(y, z);
-      };
-      raft::linalg::map(Z.data, y.len, f_dl, stream, y.data, Z.data, sample_weights);
+      raft::linalg::mapThenSumReduce(
+        loss_val,
+        y.len,
+        [lz_copy, normalization] __device__(const T y, const T z, const T weight) {
+          return lz_copy(y, z) * (weight * normalization);
+        },
+        stream,
+        y.data,
+        Z.data,
+        sample_weights);
+      raft::linalg::map(
+        Z.data,
+        y.len,
+        [dlz_copy] __device__(const T y, const T z, const T weight) {
+          return weight * dlz_copy(y, z);
+        },
+        stream,
+        y.data,
+        Z.data,
+        sample_weights);
     } else {  // Sample weights are not used
       T normalization = 1.0 / y.len;
-      auto f_l = [=] __device__(const T y, const T z) { return loss->lz(y, z) * normalization; };
-      raft::linalg::mapThenSumReduce(loss_val, y.len, f_l, stream, y.data, Z.data);
-
-      auto f_dl = [=] __device__(const T y, const T z) { return loss->dlz(y, z); };
-      raft::linalg::binaryOp(Z.data, y.data, Z.data, y.len, f_dl, stream);
+      raft::linalg::mapThenSumReduce(
+        loss_val,
+        y.len,
+        [lz_copy, normalization] __device__(const T y, const T z) {
+          return lz_copy(y, z) * normalization;
+        },
+        stream,
+        y.data,
+        Z.data);
+      raft::linalg::binaryOp(Z.data, y.data, Z.data, y.len, dlz_copy, stream);
     }
   }