Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
  • Loading branch information
thirtiseven committed Apr 11, 2024
1 parent f18d6aa commit 907a31a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 4 deletions.
9 changes: 5 additions & 4 deletions src/main/cpp/src/json_parser.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1513,10 +1513,10 @@ class json_parser {
return number_token_len;
case json_token::VALUE_NUMBER_FLOAT: {
// number normalization:
// 0.03E-2 => 0.3E-5; infinity;
// 200.000 => 200.0, 351.980 => 351.98, 12345678900000000000.0
// => 1.23456789E19 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003
// => 3.0E-4 leverage function: `get_current_float_parts`
// 0.03E-2 => 0.3E-5, 200.000 => 200.0, 351.980 => 351.98,
// 12345678900000000000.0 => 1.23456789E19, 1E308 => 1.0E308
// 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003 => 3.0E-4
// 1.0E309 => "Infinity", -1E309 => "-Infinity"
double d_value = spark_rapids_jni::detail::stod(
cudf::string_view(current_token_start_pos, number_token_len));
return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination);
Expand Down Expand Up @@ -1602,6 +1602,7 @@ class json_parser {
}
return number_token_len;
case json_token::VALUE_NUMBER_FLOAT: {
// number normalization:
double d_value = spark_rapids_jni::detail::stod(
cudf::string_view(current_token_start_pos, number_token_len));
return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination);
Expand Down
7 changes: 7 additions & 0 deletions src/main/cpp/src/string_to_float_cudf.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ namespace detail {
* to the appropriate float values.
*
* This function will also handle scientific notation format.
*
* This function is a copy of cudf::strings::detail::stod with
* the namespace changed to spark_rapids_jni::detail and fixed
* an overflow bug of `exp_ten`. It is a short-term solution to
* resolve a bug in get_json_object. We should remove this file
* once the bug is fixed in cudf in long term.
* This diff is `if (exp_ten >= 1e8) break;`
*/
__device__ inline double stod(cudf::string_view const& d_str)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ void getJsonObjectTest_Number_Normalization() {
"[-0.0]",
"[-0]",
"[12345678999999999999999999]",
"[9.299999257686047e-0005603333574677677]",
"9.299999257686047e0005603333574677677",
"[1E308]",
"[1.0E309,-1E309,1E5000]",
"0.3",
Expand All @@ -214,6 +216,8 @@ void getJsonObjectTest_Number_Normalization() {
"[-0.0]",
"[0]",
"[12345678999999999999999999]",
"[0.0]",
"\"Infinity\"",
"[1.0E308]",
"[\"Infinity\",\"-Infinity\",\"Infinity\"]",
"0.3",
Expand Down

0 comments on commit 907a31a

Please sign in to comment.