From 37d570af6bf9915abce0ef419ae4d962501fa403 Mon Sep 17 00:00:00 2001
From: Ganesh Venkataramana <gvenkatarama@dbcluster.nvidia.com>
Date: Mon, 24 Jun 2019 01:56:05 -0700
Subject: [PATCH] review changes #1

- indented comments properly
- avoided some implicit compiler issued typecasts
- refactored to adding the correct API declaration of the implementation in metrics.hpp
- refactored into using deviceAllocators wherever possible
- added `CUDA_CHECK(cudaStreamSynchronize(stream))` at suspected places
---
 cpp/src/metrics/metrics.hpp             | 137 +++++++++---------
 cpp/src_prims/metrics/mutualInfoScore.h |  24 ++--
 cpp/test/prims/mutualInfoScore.cu       | 180 +++++++++++-------------
 3 files changed, 163 insertions(+), 178 deletions(-)

diff --git a/cpp/src/metrics/metrics.hpp b/cpp/src/metrics/metrics.hpp
index 24dbbb3d6d..2107d9732f 100644
--- a/cpp/src/metrics/metrics.hpp
+++ b/cpp/src/metrics/metrics.hpp
@@ -23,91 +23,90 @@ namespace ML {
 namespace Metrics {
 
 /**
-         * Calculates the "Coefficient of Determination" (R-Squared) score
-         * normalizing the sum of squared errors by the total sum of squares
-         * with single precision.
-         *
-         * This score indicates the proportionate amount of variation in an
-         * expected response variable is explained by the independent variables
-         * in a linear regression model. The larger the R-squared value, the
-         * more variability is explained by the linear regression model.
-         *
-         * @param handle: cumlHandle
-         * @param y: Array of ground-truth response variables
-         * @param y_hat: Array of predicted response variables
-         * @param n: Number of elements in y and y_hat
-         * @return: The R-squared value.
-         */
+* Calculates the "Coefficient of Determination" (R-Squared) score
+* normalizing the sum of squared errors by the total sum of squares
+* with single precision.
+*
+* This score indicates the proportionate amount of variation in an
+* expected response variable is explained by the independent variables
+* in a linear regression model. The larger the R-squared value, the
+* more variability is explained by the linear regression model.
+*
+* @param handle: cumlHandle
+* @param y: Array of ground-truth response variables
+* @param y_hat: Array of predicted response variables
+* @param n: Number of elements in y and y_hat
+* @return: The R-squared value.
+*/
 float r2_score_py(const cumlHandle &handle, float *y, float *y_hat, int n);
 
 /**
-         * Calculates the "Coefficient of Determination" (R-Squared) score
-         * normalizing the sum of squared errors by the total sum of squares
-         * with double precision.
-         *
-         * This score indicates the proportionate amount of variation in an
-         * expected response variable is explained by the independent variables
-         * in a linear regression model. The larger the R-squared value, the
-         * more variability is explained by the linear regression model.
-         *
-         * @param handle: cumlHandle
-         * @param y: Array of ground-truth response variables
-         * @param y_hat: Array of predicted response variables
-         * @param n: Number of elements in y and y_hat
-         * @return: The R-squared value.
-         */
+* Calculates the "Coefficient of Determination" (R-Squared) score
+* normalizing the sum of squared errors by the total sum of squares
+* with double precision.
+*
+* This score indicates the proportionate amount of variation in an
+* expected response variable is explained by the independent variables
+* in a linear regression model. The larger the R-squared value, the
+* more variability is explained by the linear regression model.
+*
+* @param handle: cumlHandle
+* @param y: Array of ground-truth response variables
+* @param y_hat: Array of predicted response variables
+* @param n: Number of elements in y and y_hat
+* @return: The R-squared value.
+*/
 double r2_score_py(const cumlHandle &handle, double *y, double *y_hat, int n);
 
 /**
-         * Calculates the "rand index"
-         *
-         * This metric is a measure of similarity between two data clusterings.
-         *
-         * @param handle: cumlHandle
-         * @param y: Array of response variables of the first clustering classifications
-         * @param y_hat: Array of response variables of the second clustering classifications
-         * @param n: Number of elements in y and y_hat
-         * @return: The rand index value
-         */
+* Calculates the "rand index"
+*
+* This metric is a measure of similarity between two data clusterings.
+*
+* @param handle: cumlHandle
+* @param y: Array of response variables of the first clustering classifications
+* @param y_hat: Array of response variables of the second clustering classifications
+* @param n: Number of elements in y and y_hat
+* @return: The rand index value
+*/
 
 double randIndex(const cumlHandle &handle, double *y, double *y_hat, int n);
 
 /**
-         * Calculates the "adjusted rand index"
-         *
-         * This metric is the corrected-for-chance version of the rand index 
-         *
-         * @param handle: cumlHandle
-         * @param y: Array of response variables of the first clustering classifications
-         * @param y_hat: Array of response variables of the second clustering classifications
-         * @param n: Number of elements in y and y_hat
-         * @param lower_class_range: the lowest value in the range of classes
-         * @param upper_class_range: the highest value in the range of classes
-         * @return: The adjusted rand index value
-         */
+* Calculates the "adjusted rand index"
+*
+* This metric is the corrected-for-chance version of the rand index 
+*
+* @param handle: cumlHandle
+* @param y: Array of response variables of the first clustering classifications
+* @param y_hat: Array of response variables of the second clustering classifications
+* @param n: Number of elements in y and y_hat
+* @param lower_class_range: the lowest value in the range of classes
+* @param upper_class_range: the highest value in the range of classes
+* @return: The adjusted rand index value
+*/
 double adjustedRandIndex(const cumlHandle &handle, const int *y,
                          const int *y_hat, const int n,
                          const int lower_class_range,
                          const int upper_class_range);
 
 /**
-         * Calculates the "Mutual Information score" between two clusters
-         *
-         * Mutual Information is a measure of the similarity between two labels of
-         * the same data.
-         *
-         * @param handle: cumlHandle
-         * @param y: Array of response variables of the first clustering classifications
-         * @param y_hat: Array of response variables of the second clustering classifications
-         * @param n: Number of elements in y and y_hat
-         * @param lower_class_range: the lowest value in the range of classes
-         * @param upper_class_range: the highest value in the range of classes
-         * @return: The mutual information score
-         */
-double adjustedRandIndex(const cumlHandle &handle, const int *y,
-                         const int *y_hat, const int n,
-                         const int lower_class_range,
-                         const int upper_class_range);
+* Calculates the "Mutual Information score" between two clusters
+*
+* Mutual Information is a measure of the similarity between two labels of
+* the same data.
+*
+* @param handle: cumlHandle
+* @param y: Array of response variables of the first clustering classifications
+* @param y_hat: Array of response variables of the second clustering classifications
+* @param n: Number of elements in y and y_hat
+* @param lower_class_range: the lowest value in the range of classes
+* @param upper_class_range: the highest value in the range of classes
+* @return: The mutual information score
+*/
+double mutualInfoScore(const cumlHandle &handle, const int *y, const int *y_hat,
+                       const int n, const int lower_class_range,
+                       const int upper_class_range);
 
 }  // namespace Metrics
 }  // namespace ML
\ No newline at end of file
diff --git a/cpp/src_prims/metrics/mutualInfoScore.h b/cpp/src_prims/metrics/mutualInfoScore.h
index 75923b68a5..9be7daba9d 100644
--- a/cpp/src_prims/metrics/mutualInfoScore.h
+++ b/cpp/src_prims/metrics/mutualInfoScore.h
@@ -52,13 +52,13 @@ __global__ void mutualInfoKernel(const int *dContingencyMatrix, const int *a,
   int i = threadIdx.y + blockIdx.y * blockDim.y;
 
   //thread-local variable to count the mutual info
-  double localMI = 0;
+  double localMI = 0.0;
 
   if (i < size && j < size && a[i] * b[j] != 0 &&
       dContingencyMatrix[i * size + j] != 0) {
     localMI += (double(dContingencyMatrix[i * size + j])) *
-               double(log(double(dContingencyMatrix[i * size + j])) -
-                      log(double(a[i] * b[j])));
+               (log(double(dContingencyMatrix[i * size + j])) -
+                log(double(a[i] * b[j])));
   }
 
   //specialize blockReduce for a 2D block of 1024 threads of type uint64_t
@@ -106,16 +106,15 @@ double mutualInfoScore(const T *firstClusterArray, const T *secondClusterArray,
                              stream));
 
   //workspace allocation
-  char *pWorkspace = nullptr;
   size_t workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize(
     size, firstClusterArray, stream, lowerLabelRange, upperLabelRange);
-  if (workspaceSz != 0) MLCommon::allocate(pWorkspace, workspaceSz);
+  device_buffer<char> pWorkspace(allocator, stream, workspaceSz);
 
   //calculating the contingency matrix
   MLCommon::Metrics::contingencyMatrix(
     firstClusterArray, secondClusterArray, (int)size,
-    (int *)dContingencyMatrix.data(), stream, (void *)pWorkspace, workspaceSz,
-    lowerLabelRange, upperLabelRange);
+    (int *)dContingencyMatrix.data(), stream, (void *)pWorkspace.data(),
+    workspaceSz, lowerLabelRange, upperLabelRange);
 
   //creating device buffers for all the parameters involved in ARI calculation
   //device variables
@@ -133,6 +132,8 @@ double mutualInfoScore(const T *firstClusterArray, const T *secondClusterArray,
     cudaMemsetAsync(b.data(), 0, numUniqueClasses * sizeof(int), stream));
   CUDA_CHECK(cudaMemsetAsync(d_MI.data(), 0, sizeof(double), stream));
 
+  CUDA_CHECK(cudaStreamSynchronize(stream));
+
   //calculating the row-wise sums
   MLCommon::LinAlg::reduce<int, int, int>(a.data(), dContingencyMatrix.data(),
                                           numUniqueClasses, numUniqueClasses, 0,
@@ -149,19 +150,22 @@ double mutualInfoScore(const T *firstClusterArray, const T *secondClusterArray,
   dim3 numBlocks(ceildiv<int>(size, numThreadsPerBlock.x),
                  ceildiv<int>(size, numThreadsPerBlock.y));
 
+  CUDA_CHECK(cudaStreamSynchronize(stream));
+
   //calling the kernel
   mutualInfoKernel<T, BLOCK_DIM_X, BLOCK_DIM_Y>
     <<<numBlocks, numThreadsPerBlock, 0, stream>>>(
       dContingencyMatrix.data(), a.data(), b.data(), numUniqueClasses,
       d_MI.data());
 
+  CUDA_CHECK(cudaStreamSynchronize(stream));
+
   //updating in the host memory
   MLCommon::updateHost(&h_MI, d_MI.data(), 1, stream);
 
-  //freeing the memories in the device
-  if (pWorkspace) CUDA_CHECK(cudaFree(pWorkspace));
+  CUDA_CHECK(cudaStreamSynchronize(stream));
 
-  return h_MI/size;
+  return h_MI / size;
 }
 
 };  //end namespace Metrics
diff --git a/cpp/test/prims/mutualInfoScore.cu b/cpp/test/prims/mutualInfoScore.cu
index e001fdd482..f47342260c 100644
--- a/cpp/test/prims/mutualInfoScore.cu
+++ b/cpp/test/prims/mutualInfoScore.cu
@@ -1,4 +1,4 @@
-    /*
+/*
  * Copyright (c) 2019, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,37 +14,32 @@
  * limitations under the License.
  */
 #include <gtest/gtest.h>
- #include "test_utils.h"
- #include <iostream>
- #include <random>
- #include <algorithm>
-#include "metrics/mutualInfoScore.h"
+#include <algorithm>
+#include <iostream>
+#include <random>
 #include "common/cuml_allocator.hpp"
 #include "metrics/contingencyMatrix.h"
+#include "metrics/mutualInfoScore.h"
+#include "test_utils.h"
 
-
-
-namespace MLCommon{
-namespace Metrics{
+namespace MLCommon {
+namespace Metrics {
 
 //parameter structure definition
-struct mutualInfoParam{
-
+struct mutualInfoParam {
   int nElements;
   int lowerLabelRange;
   int upperLabelRange;
   bool sameArrays;
   double tolerance;
-
 };
 
 //test fixture class
 template <typename T>
-class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam>{
-  protected:
+class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam> {
+ protected:
   //the constructor
   void SetUp() override {
-
     //getting the parameters
     params = ::testing::TestWithParam<mutualInfoParam>::GetParam();
 
@@ -57,22 +52,25 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam>{
     std::vector<int> arr2(nElements, 0);
     std::random_device rd;
     std::default_random_engine dre(rd());
-    std::uniform_int_distribution<int> intGenerator(lowerLabelRange, upperLabelRange);
+    std::uniform_int_distribution<int> intGenerator(lowerLabelRange,
+                                                    upperLabelRange);
 
-    std::generate(arr1.begin(), arr1.end(), [&](){return intGenerator(dre); });
-    if(params.sameArrays) {
-        arr2 = arr1;
+    std::generate(arr1.begin(), arr1.end(),
+                  [&]() { return intGenerator(dre); });
+    if (params.sameArrays) {
+      arr2 = arr1;
     } else {
-        std::generate(arr2.begin(), arr2.end(), [&](){return intGenerator(dre); });
+      std::generate(arr2.begin(), arr2.end(),
+                    [&]() { return intGenerator(dre); });
     }
 
     //generating the golden output
     //calculating the contingency matrix
     int numUniqueClasses = upperLabelRange - lowerLabelRange + 1;
-    size_t sizeOfMat = numUniqueClasses*numUniqueClasses * sizeof(int);
+    size_t sizeOfMat = numUniqueClasses * numUniqueClasses * sizeof(int);
     int *hGoldenOutput = (int *)malloc(sizeOfMat);
     memset(hGoldenOutput, 0, sizeOfMat);
-    int i,j;
+    int i, j;
     for (i = 0; i < nElements; i++) {
       int row = arr1[i] - lowerLabelRange;
       int column = arr2[i] - lowerLabelRange;
@@ -80,100 +78,84 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam>{
       hGoldenOutput[row * numUniqueClasses + column] += 1;
     }
 
-    int *a = (int *)malloc(numUniqueClasses*sizeof(int));
-    int *b = (int *)malloc(numUniqueClasses*sizeof(int));
-    memset(a, 0, numUniqueClasses*sizeof(int));
-    memset(b, 0, numUniqueClasses*sizeof(int));
+    int *a = (int *)malloc(numUniqueClasses * sizeof(int));
+    int *b = (int *)malloc(numUniqueClasses * sizeof(int));
+    memset(a, 0, numUniqueClasses * sizeof(int));
+    memset(b, 0, numUniqueClasses * sizeof(int));
 
-    
     //and also the reducing contingency matrix along row and column
-    for(i=0;i<numUniqueClasses;++i){
-        for(j=0;j<numUniqueClasses;++j){
-            a[i]+=hGoldenOutput[i*numUniqueClasses + j];
-            b[i]+=hGoldenOutput[j*numUniqueClasses + i];
-        }
+    for (i = 0; i < numUniqueClasses; ++i) {
+      for (j = 0; j < numUniqueClasses; ++j) {
+        a[i] += hGoldenOutput[i * numUniqueClasses + j];
+        b[i] += hGoldenOutput[j * numUniqueClasses + i];
+      }
     }
 
-
-//calculating the truth mutual information
-    for(int i =0; i<numUniqueClasses; ++i){
-        for(int j = 0; j<numUniqueClasses; ++j){
-
-            if(a[i]*b[j]!=0 && hGoldenOutput[i*numUniqueClasses + j]!=0){
-
-            truthmutualInfo+= (double)(hGoldenOutput[i*numUniqueClasses + j])*double(log((double)(hGoldenOutput[i*numUniqueClasses + j]))-log((double)(a[i]*b[j])));
-
-            }
-
+    //calculating the truth mutual information
+    for (int i = 0; i < numUniqueClasses; ++i) {
+      for (int j = 0; j < numUniqueClasses; ++j) {
+        if (a[i] * b[j] != 0 && hGoldenOutput[i * numUniqueClasses + j] != 0) {
+          truthmutualInfo +=
+            (double)(hGoldenOutput[i * numUniqueClasses + j]) *
+            double(log((double)(hGoldenOutput[i * numUniqueClasses + j])) -
+                   log((double)(a[i] * b[j])));
         }
+      }
     }
 
-    truthmutualInfo/=nElements;
-
-    
+    truthmutualInfo /= nElements;
 
     //allocating and initializing memory to the GPU
     CUDA_CHECK(cudaStreamCreate(&stream));
-    MLCommon::allocate(firstClusterArray,nElements,true);
-    MLCommon::allocate(secondClusterArray,nElements,true);
-
-    MLCommon::updateDevice(firstClusterArray,&arr1[0],(int)nElements,stream);
-    MLCommon::updateDevice(secondClusterArray,&arr2[0],(int)nElements,stream);
-    std::shared_ptr<MLCommon::deviceAllocator> allocator(new defaultDeviceAllocator);
+    MLCommon::allocate(firstClusterArray, nElements, true);
+    MLCommon::allocate(secondClusterArray, nElements, true);
 
+    MLCommon::updateDevice(firstClusterArray, &arr1[0], (int)nElements, stream);
+    MLCommon::updateDevice(secondClusterArray, &arr2[0], (int)nElements,
+                           stream);
+    std::shared_ptr<MLCommon::deviceAllocator> allocator(
+      new defaultDeviceAllocator);
 
     //calling the mutualInfo CUDA implementation
-    computedmutualInfo = MLCommon::Metrics::mutualInfoScore(firstClusterArray,secondClusterArray,nElements, lowerLabelRange, upperLabelRange, allocator,stream);
-
-    }
-
-    //the destructor
-    void TearDown() override
-    {
-        
-        CUDA_CHECK(cudaFree(firstClusterArray));
-        CUDA_CHECK(cudaFree(secondClusterArray));
-        CUDA_CHECK(cudaStreamDestroy(stream));
-
-
-    }
-
-    //declaring the data values
-    mutualInfoParam params;
-    T lowerLabelRange,upperLabelRange;
-    T* firstClusterArray=nullptr;
-    T* secondClusterArray = nullptr;
-    int nElements=0;
-    double truthmutualInfo=0;
-    double computedmutualInfo = 0;
-    cudaStream_t stream;
-
-    };
+    computedmutualInfo = MLCommon::Metrics::mutualInfoScore(
+      firstClusterArray, secondClusterArray, nElements, lowerLabelRange,
+      upperLabelRange, allocator, stream);
+  }
+
+  //the destructor
+  void TearDown() override {
+    CUDA_CHECK(cudaFree(firstClusterArray));
+    CUDA_CHECK(cudaFree(secondClusterArray));
+    CUDA_CHECK(cudaStreamDestroy(stream));
+  }
+
+  //declaring the data values
+  mutualInfoParam params;
+  T lowerLabelRange, upperLabelRange;
+  T *firstClusterArray = nullptr;
+  T *secondClusterArray = nullptr;
+  int nElements = 0;
+  double truthmutualInfo = 0;
+  double computedmutualInfo = 0;
+  cudaStream_t stream;
+};
 
 //setting test parameter values
 const std::vector<mutualInfoParam> inputs = {
-    {199, 1, 10, false, 0.000001},
-    {200, 15, 100, false, 0.000001},
-    {100, 1, 20, false, 0.000001},
-    {10, 1, 10, false, 0.000001},
-   {198, 1, 100, false, 0.000001},
-    {300, 3, 99, false, 0.000001},
-    {199, 1, 10, true, 0.000001},
-    {200, 15, 100, true, 0.000001},
-    {100, 1, 20, true, 0.000001},
-    {10, 1, 10, true, 0.000001},
-   {198, 1, 100, true, 0.000001},
-    {300, 3, 99, true, 0.000001}
-};
-
+  {199, 1, 10, false, 0.000001},  {200, 15, 100, false, 0.000001},
+  {100, 1, 20, false, 0.000001},  {10, 1, 10, false, 0.000001},
+  {198, 1, 100, false, 0.000001}, {300, 3, 99, false, 0.000001},
+  {199, 1, 10, true, 0.000001},   {200, 15, 100, true, 0.000001},
+  {100, 1, 20, true, 0.000001},   {10, 1, 10, true, 0.000001},
+  {198, 1, 100, true, 0.000001},  {300, 3, 99, true, 0.000001}};
 
 //writing the test suite
 typedef mutualInfoTest<int> mutualInfoTestClass;
-TEST_P(mutualInfoTestClass, Result){
-    ASSERT_NEAR(computedmutualInfo, truthmutualInfo, params.tolerance);
+TEST_P(mutualInfoTestClass, Result) {
+  ASSERT_NEAR(computedmutualInfo, truthmutualInfo, params.tolerance);
 }
-INSTANTIATE_TEST_CASE_P(mutualInfo, mutualInfoTestClass,::testing::ValuesIn(inputs));
-
+INSTANTIATE_TEST_CASE_P(mutualInfo, mutualInfoTestClass,
+                        ::testing::ValuesIn(inputs));
 
-}//end namespace Metrics
-}//end namespace MLCommon
+}  //end namespace Metrics
+}  //end namespace MLCommon