diff --git a/src/main/cpp/src/json_parser.cuh b/src/main/cpp/src/json_parser.cuh index 0fd908b8b4..ec0790aa6b 100644 --- a/src/main/cpp/src/json_parser.cuh +++ b/src/main/cpp/src/json_parser.cuh @@ -1513,10 +1513,10 @@ class json_parser { return number_token_len; case json_token::VALUE_NUMBER_FLOAT: { // number normalization: - // 0.03E-2 => 0.3E-5; infinity; - // 200.000 => 200.0, 351.980 => 351.98, 12345678900000000000.0 - // => 1.23456789E19 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003 - // => 3.0E-4 leverage function: `get_current_float_parts` + // 0.03E-2 => 0.3E-5, 200.000 => 200.0, 351.980 => 351.98, + // 12345678900000000000.0 => 1.23456789E19, 1E308 => 1.0E308 + // 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003 => 3.0E-4 + // 1.0E309 => "Infinity", -1E309 => "-Infinity" double d_value = spark_rapids_jni::detail::stod( cudf::string_view(current_token_start_pos, number_token_len)); return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination); @@ -1602,6 +1602,7 @@ class json_parser { } return number_token_len; case json_token::VALUE_NUMBER_FLOAT: { + // number normalization: double d_value = spark_rapids_jni::detail::stod( cudf::string_view(current_token_start_pos, number_token_len)); return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination); diff --git a/src/main/cpp/src/string_to_float_cudf.cuh b/src/main/cpp/src/string_to_float_cudf.cuh index 760c3faea8..5a7824d495 100644 --- a/src/main/cpp/src/string_to_float_cudf.cuh +++ b/src/main/cpp/src/string_to_float_cudf.cuh @@ -32,6 +32,13 @@ namespace detail { * to the appropriate float values. * * This function will also handle scientific notation format. + * + * This function is a copy of cudf::strings::detail::stod with + * the namespace changed to spark_rapids_jni::detail and fixed + * an overflow bug of `exp_ten`. It is a short-term solution to + * resolve a bug in get_json_object. We should remove this file + * once the bug is fixed in cudf in long term. + * This diff is `if (exp_ten >= 1e8) break;` */ __device__ inline double stod(cudf::string_view const& d_str) { diff --git a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java index b5379baa58..ea23c4c9ba 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java @@ -200,6 +200,8 @@ void getJsonObjectTest_Number_Normalization() { "[-0.0]", "[-0]", "[12345678999999999999999999]", + "[9.299999257686047e-0005603333574677677]", + "9.299999257686047e0005603333574677677", "[1E308]", "[1.0E309,-1E309,1E5000]", "0.3", @@ -214,6 +216,8 @@ void getJsonObjectTest_Number_Normalization() { "[-0.0]", "[0]", "[12345678999999999999999999]", + "[0.0]", + "\"Infinity\"", "[1.0E308]", "[\"Infinity\",\"-Infinity\",\"Infinity\"]", "0.3",