From 1291502170f3cc515960dfd084029eafadd54529 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Thu, 8 Apr 2021 23:40:21 +0200 Subject: [PATCH] Fix NaN errors observed with ARIMA in CUDA 11.2 builds (#3730) Closes #3649 The error was in `batched_kalman_loop_kernel`: calculating `Y = a M X + b Y` when `b = 0` can still result in `NaN` if `Y` is uninitialized. One possible fix would be to initialize to zeros the component that was uninitialized. The fix I chose is to remove the unnecessary read, so no uninitialized value is accessed and we save unnecessary operations. Authors: - Louis Sugy (https://github.com/Nyrio) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/3730 --- cpp/src/arima/batched_kalman.cu | 7 +++---- python/cuml/test/test_arima.py | 6 ------ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/cpp/src/arima/batched_kalman.cu b/cpp/src/arima/batched_kalman.cu index a255a64f4d..d8ee3cf64a 100644 --- a/cpp/src/arima/batched_kalman.cu +++ b/cpp/src/arima/batched_kalman.cu @@ -49,14 +49,13 @@ DI void Mv_l(const double* A, const double* v, double* out) { } template -DI void Mv_l(double alpha, const double* A, const double* v, double beta, - double* out) { +DI void Mv_l(double alpha, const double* A, const double* v, double* out) { for (int i = 0; i < n; i++) { double sum = 0.0; for (int j = 0; j < n; j++) { sum += A[i + j * n] * v[j]; } - out[i] = alpha * sum + beta * out[i]; + out[i] = alpha * sum; } } @@ -179,7 +178,7 @@ __global__ void batched_kalman_loop_kernel( l_K[i] = _1_Fs * l_TP[i]; } } else - Mv_l(_1_Fs, l_TP, l_Z, 0.0, l_K); + Mv_l(_1_Fs, l_TP, l_Z, l_K); // 4. alpha = T*alpha + K*vs[it] + c // tmp = T*alpha diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index 260edeabd8..95380266c1 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -37,7 +37,6 @@ from collections import namedtuple import numpy as np import os -import rmm import warnings import pandas as pd @@ -270,11 +269,6 @@ def _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order, in statsmodels and cuML models (it depends on the order). """ - - if rmm._cuda.gpu.runtimeGetVersion() >= 11020: - pytest.skip("CUDA 11.2 nan failure, see " - "https://github.com/rapidsai/cuml/issues/3649") - nb = cuml_model.batch_size N = cuml_model.complexity x = np.zeros(nb * N, dtype=np.float64)