From 873d997ab040f29900bbe0732650a6b2496675fe Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 17 Jul 2024 05:26:12 +0800
Subject: [PATCH 1/3] Update submodule cudf to
 e2b7e4370c8513811e9c72b30f499a5614b49f7c (#2231)

Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
 thirdparty/cudf                       | 2 +-
 thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
 thirdparty/cudf-pins/versions.json    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/thirdparty/cudf b/thirdparty/cudf
index 669db3ea4a..e2b7e4370c 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 669db3ea4a0c24a343c5619dd00904ad22ea215b
+Subproject commit e2b7e4370c8513811e9c72b30f499a5614b49f7c
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index a02bd5d56a..072e6efd6a 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-351fc88100ce381434f9dc16debbd32d2fc82a07
+fddf1c05f96f3a3ff50c358fdf634ba067001850
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 599a204f7a..915a004aff 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -47,7 +47,7 @@
     {
       "always_download" : true,
       "git_shallow" : false,
-      "git_tag" : "363e2d65c02b216baf33e4219f27c0102936edee",
+      "git_tag" : "ff94539ff1569d990bbd6abb9c678b4d1991e5ba",
       "git_url" : "https://github.com/rapidsai/kvikio.git",
       "version" : "24.08"
     },
@@ -132,7 +132,7 @@
     {
       "always_download" : true,
       "git_shallow" : false,
-      "git_tag" : "429960808c3bccea2e7e1da177439546481bb684",
+      "git_tag" : "f91ca6f22195868e6493e38975f5c07029bd1977",
       "git_url" : "https://github.com/rapidsai/rmm.git",
       "version" : "24.08"
     },

From adda312e8420e5ad791304d2c21e09b9d556d2c0 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 17 Jul 2024 11:27:14 +0800
Subject: [PATCH 2/3] Update submodule cudf to
 093bcc94ccf156a7e39339a7c4bb7e86543187de (#2234)

Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
 thirdparty/cudf                    | 2 +-
 thirdparty/cudf-pins/versions.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/thirdparty/cudf b/thirdparty/cudf
index e2b7e4370c..093bcc94cc 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit e2b7e4370c8513811e9c72b30f499a5614b49f7c
+Subproject commit 093bcc94ccf156a7e39339a7c4bb7e86543187de
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 915a004aff..61e3bfe97e 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -47,7 +47,7 @@
     {
       "always_download" : true,
       "git_shallow" : false,
-      "git_tag" : "ff94539ff1569d990bbd6abb9c678b4d1991e5ba",
+      "git_tag" : "ab3778c0462c2f713125a4e458f46bcd654b9d54",
       "git_url" : "https://github.com/rapidsai/kvikio.git",
       "version" : "24.08"
     },

From aff3696693bb2910b03c83dc1b5a8dfb28b49c86 Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Tue, 16 Jul 2024 23:15:01 -0700
Subject: [PATCH 3/3] Cleanup `get_json_object` (#2232)

* Rewrite `json_generator`

* Debug print

* Cleanup header

* Add `const` and `static` qualifiers

* Cleanup

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Faster name concatenation

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Add case path enum

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Fix `try_skip_children`

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Construct path command async

* More cleanup

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Add comment

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

* Fix format

Signed-off-by: Nghia Truong <nghiat@nvidia.com>

---------

Signed-off-by: Nghia Truong <nghiat@nvidia.com>
---
 src/main/cpp/src/get_json_object.cu  | 529 +++++++++++++--------------
 src/main/cpp/src/get_json_object.hpp |   9 +-
 src/main/cpp/src/json_parser.cuh     |  91 ++---
 3 files changed, 297 insertions(+), 332 deletions(-)

diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu
index 47b1245307..b52754da4b 100644
--- a/src/main/cpp/src/get_json_object.cu
+++ b/src/main/cpp/src/get_json_object.cu
@@ -58,7 +58,7 @@ constexpr int max_path_depth = 16;
 /**
  * write JSON style
  */
-enum class write_style { RAW, QUOTED, FLATTEN };
+enum class write_style : int8_t { RAW, QUOTED, FLATTEN };
 
 /**
  * path instruction
@@ -66,13 +66,13 @@ enum class write_style { RAW, QUOTED, FLATTEN };
 struct path_instruction {
   __device__ inline path_instruction(path_instruction_type _type) : type(_type) {}
 
-  path_instruction_type type;
-
   // used when type is named type
   cudf::string_view name;
 
   // used when type is index
   int index{-1};
+
+  path_instruction_type type;
 };
 
 /**
@@ -83,30 +83,24 @@ struct path_instruction {
  */
 class json_generator {
  public:
-  __device__ json_generator(char* _output) : output(_output), output_len(0) {}
-  __device__ json_generator() : output(nullptr), output_len(0) {}
+  __device__ json_generator(int _offset = 0) : offset(_offset), output_len(0) {}
 
   // create a nested child generator based on this parent generator,
   // child generator is a view, parent and child share the same byte array
-  __device__ json_generator new_child_generator()
+  __device__ json_generator new_child_generator() const
   {
-    if (nullptr == output) {
-      return json_generator();
-    } else {
-      return json_generator(output + output_len);
-    }
+    return json_generator(offset + output_len);
   }
 
   // write [
   // add an extra comma if needed,
   // e.g.: when JSON content is: [[1,2,3]
   // writing a new [ should result: [[1,2,3],[
-  __device__ void write_start_array()
+  __device__ void write_start_array(char* out_begin)
   {
-    try_write_comma();
-
-    if (output) { *(output + output_len) = '['; }
+    try_write_comma(out_begin);
 
+    out_begin[offset + output_len] = '[';
     output_len++;
     array_depth++;
     // new array is empty
@@ -114,14 +108,12 @@ class json_generator {
   }
 
   // write ]
-  __device__ void write_end_array()
+  __device__ void write_end_array(char* out_begin)
   {
-    if (output) { *(output + output_len) = ']'; }
+    out_begin[offset + output_len] = ']';
     output_len++;
-
     // point to parent array
     array_depth--;
-
     // set parent array as non-empty because already had a closed child item.
     is_curr_array_empty = false;
   }
@@ -137,16 +129,16 @@ class json_generator {
   }
 
   // return true if it's in a array context and it's not writing the first item.
-  __device__ inline bool need_comma() { return (array_depth > 0 && !is_curr_array_empty); }
+  __device__ inline bool need_comma() const { return (array_depth > 0 && !is_curr_array_empty); }
 
   /**
    * write comma accroding to current generator state
    */
-  __device__ void try_write_comma()
+  __device__ void try_write_comma(char* out_begin)
   {
     if (need_comma()) {
       // in array context and writes first item
-      if (output) { *(output + output_len) = ','; }
+      out_begin[offset + output_len] = ',';
       output_len++;
     }
   }
@@ -156,24 +148,16 @@ class json_generator {
    * object/array, then copy to corresponding matched end object/array. return
    * false if JSON format is invalid return true if JSON format is valid
    */
-  __device__ bool copy_current_structure(json_parser& parser)
+  __device__ bool copy_current_structure(json_parser& parser, char* out_begin)
   {
     // first try add comma
-    try_write_comma();
+    try_write_comma(out_begin);
 
     if (array_depth > 0) { is_curr_array_empty = false; }
 
-    if (nullptr != output) {
-      auto copy_to       = output + output_len;
-      auto [b, copy_len] = parser.copy_current_structure(copy_to);
-      output_len += copy_len;
-      return b;
-    } else {
-      char* copy_to      = nullptr;
-      auto [b, copy_len] = parser.copy_current_structure(copy_to);
-      output_len += copy_len;
-      return b;
-    }
+    auto [b, copy_len] = parser.copy_current_structure(out_begin + offset + output_len);
+    output_len += copy_len;
+    return b;
   }
 
   /**
@@ -183,17 +167,12 @@ class json_generator {
    * then can not return a pointer and length pair (char *, len),
    * For number token, JSON parser can return a pair (char *, len)
    */
-  __device__ void write_raw(json_parser& parser)
+  __device__ void write_raw(json_parser& parser, char* out_begin)
   {
     if (array_depth > 0) { is_curr_array_empty = false; }
 
-    if (nullptr != output) {
-      auto copied = parser.write_unescaped_text(output + output_len);
-      output_len += copied;
-    } else {
-      auto len = parser.compute_unescaped_len();
-      output_len += len;
-    }
+    auto copied = parser.write_unescaped_text(out_begin + offset + output_len);
+    output_len += copied;
   }
 
   /**
@@ -227,34 +206,32 @@ class json_generator {
    * block
    */
   __device__ void write_child_raw_value(char* child_block_begin,
-                                        size_t child_block_len,
+                                        int child_block_len,
                                         bool write_outer_array_tokens)
   {
     bool insert_comma = need_comma();
 
     if (array_depth > 0) { is_curr_array_empty = false; }
 
-    if (nullptr != output) {
-      if (write_outer_array_tokens) {
-        if (insert_comma) {
-          *(child_block_begin + child_block_len + 2) = ']';
-          move_forward(child_block_begin, child_block_len, 2);
-          *(child_block_begin + 1) = '[';
-          *(child_block_begin)     = ',';
-        } else {
-          *(child_block_begin + child_block_len + 1) = ']';
-          move_forward(child_block_begin, child_block_len, 1);
-          *(child_block_begin) = '[';
-        }
+    if (write_outer_array_tokens) {
+      if (insert_comma) {
+        *(child_block_begin + child_block_len + 2) = ']';
+        move_forward(child_block_begin, child_block_len, 2);
+        *(child_block_begin + 1) = '[';
+        *(child_block_begin)     = ',';
       } else {
-        if (insert_comma) {
-          move_forward(child_block_begin, child_block_len, 1);
-          *(child_block_begin) = ',';
-        } else {
-          // do not need comma && do not need write outer array tokens
-          // do nothing, because child generator buff is directly after the
-          // parent generator
-        }
+        *(child_block_begin + child_block_len + 1) = ']';
+        move_forward(child_block_begin, child_block_len, 1);
+        *(child_block_begin) = '[';
+      }
+    } else {
+      if (insert_comma) {
+        move_forward(child_block_begin, child_block_len, 1);
+        *(child_block_begin) = ',';
+      } else {
+        // do not need comma && do not need write outer array tokens
+        // do nothing, because child generator buff is directly after the
+        // parent generator
       }
     }
 
@@ -270,7 +247,7 @@ class json_generator {
   // e.g.:  memory is: 1 2 0 0, begin is 1, len is 1, after moving,
   // memory is: 1 1 2 0.
   // Note: should move from end to begin to avoid overwrite buffer
-  __device__ void move_forward(char* begin, size_t len, int forward)
+  static __device__ void move_forward(char* begin, size_t len, int forward)
   {
     // TODO copy by 8 bytes
     char* pos = begin + len + forward - 1;
@@ -281,9 +258,8 @@ class json_generator {
     }
   }
 
-  __device__ inline size_t get_output_len() const { return output_len; }
-  __device__ inline char* get_output_start_position() const { return output; }
-  __device__ inline char* get_current_output_position() const { return output + output_len; }
+  __device__ inline int get_offset() const { return offset; }
+  __device__ inline int get_output_len() const { return output_len; }
 
   /**
    * generator may contain trash output, e.g.: generator writes some output,
@@ -294,13 +270,14 @@ class json_generator {
   __device__ inline void set_output_len(size_t len) { output_len = len; }
 
  private:
-  char* output;
-  size_t output_len;
+  int offset;  // offset from the global output buffer
+  int output_len;
+
+  int array_depth = 0;
 
   // whether already worte a item in current array
   // used to decide whether add a comma before writing out a new item.
   bool is_curr_array_empty;
-  int array_depth = 0;
 };
 
 /**
@@ -357,84 +334,95 @@ __device__ inline thrust::tuple<bool, int> path_match_index_wildcard(
   }
 }
 
-/**
- *
- * This function is rewritten from above commented recursive function.
- * this function is equivalent to the above commented recursive function.
- */
-__device__ bool evaluate_path(json_parser& p,
-                              json_generator& root_g,
-                              write_style root_style,
-                              cudf::device_span<path_instruction const> root_path)
-{
-  // manually maintained context stack in lieu of calling evaluate_path recursively.
-  struct context {
-    // current token
-    json_token token;
+enum class evaluation_case_path : int8_t {
+  INVALID                                           = -1,
+  START_ARRAY___EMPTY_PATH___FLATTEN_STYLE          = 2,
+  START_OBJECT___MATCHED_NAME_PATH                  = 4,
+  START_ARRAY___MATCHED_DOUBLE_WILDCARD             = 5,
+  START_ARRAY___MATCHED_WILDCARD___STYLE_NOT_QUOTED = 6,
+  START_ARRAY___MATCHED_WILDCARD                    = 7,
+  START_ARRAY___MATCHED_INDEX_AND_WILDCARD          = 8,
+  START_ARRAY___MATCHED_INDEX                       = 9
+};
 
-    // which case path that this task is from
-    int case_path;
+struct context {
+  // used to save current generator
+  json_generator g;
 
-    // used to save current generator
-    json_generator g;
+  // used to save child JSON generator for case path 6
+  json_generator child_g;
 
-    write_style style;
+  cudf::device_span<path_instruction const> path;
 
-    cudf::device_span<path_instruction const> path;
-    // is this context task is done
-    bool task_is_done;
+  // whether written output
+  // if dirty > 0, indicates success
+  int dirty;
 
-    // whether written output
-    // if dirty > 0, indicates success
-    int dirty;
+  // which case path that this task is from
+  evaluation_case_path case_path;
 
-    // for some case paths
-    bool is_first_enter;
+  // current token
+  json_token token;
 
-    // used to save child JSON generator for case path 8
-    json_generator child_g;
-  };
+  write_style style;
+
+  // for some case paths
+  bool is_first_enter;
+
+  // is this context task is done
+  bool task_is_done;
+};
+
+/**
+ * @brief Parse a single json string using the provided command buffer.
+ *
+ * @param input The incoming json string
+ * @param path_commands The command buffer to be applied to the string
+ * @param out_buf Buffer user to store the string resulted from the query
+ * @return A pair containing the result code and the output size
+ */
+__device__ thrust::pair<bool, cudf::size_type> evaluate_path(
+  char_range input, cudf::device_span<path_instruction const> path_commands, char* out_buf)
+{
+  json_parser p{input};
+  p.next_token();
+  if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
   // define stack; plus 1 indicates root context task needs an extra memory
   context stack[max_path_depth + 1];
-  int stack_pos = 0;
+  int stack_size = 0;
 
   // push context function
-  auto push_context = [&stack, &stack_pos](json_token _token,
-                                           int _case_path,
-                                           json_generator _g,
-                                           write_style _style,
-                                           cudf::device_span<path_instruction const> _path) {
+  auto push_context = [&p, &stack, &stack_size](evaluation_case_path _case_path,
+                                                json_generator _g,
+                                                write_style _style,
+                                                cudf::device_span<path_instruction const> _path) {
     // no need to check stack is full
     // because Spark-Rapids already checked maximum length of `path_instruction`
-    auto& ctx          = stack[stack_pos];
-    ctx.token          = _token;
-    ctx.case_path      = _case_path;
+    auto& ctx          = stack[stack_size++];
     ctx.g              = _g;
-    ctx.style          = _style;
     ctx.path           = _path;
-    ctx.task_is_done   = false;
     ctx.dirty          = 0;
+    ctx.case_path      = _case_path;
+    ctx.token          = p.get_current_token();
+    ctx.style          = _style;
     ctx.is_first_enter = true;
-
-    stack_pos++;
+    ctx.task_is_done   = false;
   };
 
   // put the first context task
-  push_context(p.get_current_token(), -1, root_g, root_style, root_path);
+  push_context(evaluation_case_path::INVALID, json_generator{}, write_style::RAW, path_commands);
 
-  while (stack_pos > 0) {
-    auto& ctx = stack[stack_pos - 1];
+  while (stack_size > 0) {
+    auto& ctx = stack[stack_size - 1];
     if (!ctx.task_is_done) {
-      // task is not done.
-
       // case (VALUE_STRING, Nil) if style == RawStyle
       // case path 1
       if (json_token::VALUE_STRING == ctx.token && path_is_empty(ctx.path.size()) &&
           ctx.style == write_style::RAW) {
         // there is no array wildcard or slice parent, emit this string without
         // quotes write current string in parser to generator
-        ctx.g.write_raw(p);
+        ctx.g.write_raw(p, out_buf);
         ctx.dirty        = 1;
         ctx.task_is_done = true;
       }
@@ -445,10 +433,13 @@ __device__ bool evaluate_path(json_parser& p,
         // flatten this array into the parent
         if (json_token::END_ARRAY != p.next_token()) {
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
           // push back task
           // add child task
-          push_context(p.get_current_token(), 2, ctx.g, ctx.style, {nullptr, 0});
+          push_context(evaluation_case_path::START_ARRAY___EMPTY_PATH___FLATTEN_STYLE,
+                       ctx.g,
+                       ctx.style,
+                       {nullptr, 0});
         } else {
           // END_ARRAY
           ctx.task_is_done = true;
@@ -458,9 +449,9 @@ __device__ bool evaluate_path(json_parser& p,
       // case path 3
       else if (path_is_empty(ctx.path.size())) {
         // general case: just copy the child tree verbatim
-        if (!(ctx.g.copy_current_structure(p))) {
+        if (!(ctx.g.copy_current_structure(p, out_buf))) {
           // JSON validation check
-          return false;
+          return {false, 0};
         }
         ctx.dirty        = 1;
         ctx.task_is_done = true;
@@ -475,17 +466,17 @@ __device__ bool evaluate_path(json_parser& p,
           if (ctx.dirty > 0) {
             while (json_token::END_OBJECT != p.next_token()) {
               // JSON validation check
-              if (json_token::ERROR == p.get_current_token()) { return false; }
+              if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
               // skip FIELD_NAME token
               p.next_token();
               // JSON validation check
-              if (json_token::ERROR == p.get_current_token()) { return false; }
+              if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
               // skip value of FIELD_NAME
               if (!p.try_skip_children()) {
                 // JSON validation check
-                return false;
+                return {false, 0};
               }
             }
           }
@@ -498,7 +489,7 @@ __device__ bool evaluate_path(json_parser& p,
           bool found_expected_child = false;
           while (json_token::END_OBJECT != p.next_token()) {
             // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
+            if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
             // need to try more children
             auto match_named = path_match_named(ctx.path);
@@ -508,13 +499,12 @@ __device__ bool evaluate_path(json_parser& p,
               // skip FIELD_NAME token
               p.next_token();
               // JSON validation check
-              if (json_token::ERROR == p.get_current_token()) { return false; }
+              if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
               // meets null token, it's not expected, return false
-              if (json_token::VALUE_NULL == p.get_current_token()) { return false; }
+              if (json_token::VALUE_NULL == p.get_current_token()) { return {false, 0}; }
               // push sub task; sub task will update the result of path 4
-              push_context(p.get_current_token(),
-                           4,
+              push_context(evaluation_case_path::START_OBJECT___MATCHED_NAME_PATH,
                            ctx.g,
                            ctx.style,
                            {ctx.path.data() + 1, ctx.path.size() - 1});
@@ -524,12 +514,12 @@ __device__ bool evaluate_path(json_parser& p,
               // skip FIELD_NAME token
               p.next_token();
               // JSON validation check
-              if (json_token::ERROR == p.get_current_token()) { return false; }
+              if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
               // current child is not expected, skip current child
               if (!p.try_skip_children()) {
                 // JSON validation check
-                return false;
+                return {false, 0};
               }
             }
           }
@@ -549,19 +539,18 @@ __device__ bool evaluate_path(json_parser& p,
         // behavior in Hive
         if (ctx.is_first_enter) {
           ctx.is_first_enter = false;
-          ctx.g.write_start_array();
+          ctx.g.write_start_array(out_buf);
         }
 
         if (p.next_token() != json_token::END_ARRAY) {
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
-          push_context(p.get_current_token(),
-                       5,
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
+          push_context(evaluation_case_path::START_ARRAY___MATCHED_DOUBLE_WILDCARD,
                        ctx.g,
                        write_style::FLATTEN,
                        {ctx.path.data() + 2, ctx.path.size() - 2});
         } else {
-          ctx.g.write_end_array();
+          ctx.g.write_end_array(out_buf);
           ctx.task_is_done = true;
         }
       }
@@ -594,17 +583,16 @@ __device__ bool evaluate_path(json_parser& p,
 
         if (p.next_token() != json_token::END_ARRAY) {
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
           // track the number of array elements and only emit an outer array if
           // we've written more than one element, this matches Hive's behavior
-          push_context(p.get_current_token(),
-                       6,
+          push_context(evaluation_case_path::START_ARRAY___MATCHED_WILDCARD___STYLE_NOT_QUOTED,
                        child_g,
                        next_style,
                        {ctx.path.data() + 1, ctx.path.size() - 1});
         } else {
-          char* child_g_start = child_g.get_output_start_position();
-          size_t child_g_len  = child_g.get_output_len();
+          char* child_g_start = out_buf + child_g.get_offset();
+          int child_g_len     = child_g.get_output_len();
           if (ctx.dirty > 1) {
             // add outer array tokens
             ctx.g.write_child_raw_value(
@@ -625,21 +613,20 @@ __device__ bool evaluate_path(json_parser& p,
                path_match_element(ctx.path, path_instruction_type::WILDCARD)) {
         if (ctx.is_first_enter) {
           ctx.is_first_enter = false;
-          ctx.g.write_start_array();
+          ctx.g.write_start_array(out_buf);
         }
         if (p.next_token() != json_token::END_ARRAY) {
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
           // wildcards can have multiple matches, continually update the dirty
           // count
-          push_context(p.get_current_token(),
-                       7,
+          push_context(evaluation_case_path::START_ARRAY___MATCHED_WILDCARD,
                        ctx.g,
                        write_style::QUOTED,
                        {ctx.path.data() + 1, ctx.path.size() - 1});
         } else {
-          ctx.g.write_end_array();
+          ctx.g.write_end_array(out_buf);
           ctx.task_is_done = true;
         }
       }
@@ -651,28 +638,27 @@ __device__ bool evaluate_path(json_parser& p,
 
         p.next_token();
         // JSON validation check
-        if (json_token::ERROR == p.get_current_token()) { return false; }
+        if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
         ctx.is_first_enter = false;
 
         int i = idx;
         while (i > 0) {
           if (p.get_current_token() == json_token::END_ARRAY) {
             // terminate, nothing has been written
-            return false;
+            return {false, 0};
           }
 
-          if (!p.try_skip_children()) { return false; }
+          if (!p.try_skip_children()) { return {false, 0}; }
 
           p.next_token();
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
           --i;
         }
 
         // i == 0
-        push_context(p.get_current_token(),
-                     8,
+        push_context(evaluation_case_path::START_ARRAY___MATCHED_INDEX_AND_WILDCARD,
                      ctx.g,
                      write_style::QUOTED,
                      {ctx.path.data() + 1, ctx.path.size() - 1});
@@ -684,176 +670,122 @@ __device__ bool evaluate_path(json_parser& p,
 
         p.next_token();
         // JSON validation check
-        if (json_token::ERROR == p.get_current_token()) { return false; }
+        if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
         int i = idx;
         while (i > 0) {
           if (p.get_current_token() == json_token::END_ARRAY) {
             // terminate, nothing has been written
-            return false;
+            return {false, 0};
           }
 
-          if (!p.try_skip_children()) { return false; }
+          if (!p.try_skip_children()) { return {false, 0}; }
 
           p.next_token();
           // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
+          if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
 
           --i;
         }
 
         // i == 0
-        push_context(
-          p.get_current_token(), 9, ctx.g, ctx.style, {ctx.path.data() + 1, ctx.path.size() - 1});
+        push_context(evaluation_case_path::START_ARRAY___MATCHED_INDEX,
+                     ctx.g,
+                     ctx.style,
+                     {ctx.path.data() + 1, ctx.path.size() - 1});
       }
       // case _ =>
       // case path 12
       else {
-        if (!p.try_skip_children()) { return false; }
+        if (!p.try_skip_children()) { return {false, 0}; }
         // default case path, return false for this task
         ctx.dirty        = 0;
         ctx.task_is_done = true;
       }
-    } else {
-      // current context is done.
-
+    }       // if (!ctx.task_is_done)
+    else {  // current context is done.
       // pop current top context
-      stack_pos--;
+      stack_size--;
+
+      // has no parent task, stack is empty, will exit
+      if (stack_size == 0) { break; }
 
-      // pop parent task
+      // peek parent context task
       // update parent task info according to current task result
-      if (stack_pos > 0) {
-        // peek parent context task
-        auto& p_ctx = stack[stack_pos - 1];
-
-        // case (VALUE_STRING, Nil) if style == RawStyle
-        // case path 1
-        if (1 == ctx.case_path) {
-          // never happen
-        }
-        // path 2: case (START_ARRAY, Nil) if style == FlattenStyle
-        // path 5: case (START_ARRAY, Wildcard :: Wildcard :: xs)
-        // path 7: case (START_ARRAY, Wildcard :: xs)
-        else if (2 == ctx.case_path || 5 == ctx.case_path || 7 == ctx.case_path) {
+      auto& p_ctx = stack[stack_size - 1];
+
+      switch (ctx.case_path) {
+          // path 2: case (START_ARRAY, Nil) if style == FlattenStyle
+          // path 5: case (START_ARRAY, Wildcard :: Wildcard :: xs)
+          // path 7: case (START_ARRAY, Wildcard :: xs)
+        case evaluation_case_path::START_ARRAY___EMPTY_PATH___FLATTEN_STYLE:
+        case evaluation_case_path::START_ARRAY___MATCHED_DOUBLE_WILDCARD:
+        case evaluation_case_path::START_ARRAY___MATCHED_WILDCARD: {
           // collect result from child task
           p_ctx.dirty += ctx.dirty;
           // copy generator states to parent task;
           p_ctx.g = ctx.g;
+
+          break;
         }
-        // case (START_OBJECT, Named :: xs)
-        // case path 4
-        else if (4 == ctx.case_path) {
+
+          // case (START_OBJECT, Named :: xs)
+          // case path 4
+        case evaluation_case_path::START_OBJECT___MATCHED_NAME_PATH: {
           p_ctx.dirty = ctx.dirty;
           // copy generator states to parent task;
           p_ctx.g = ctx.g;
+
+          break;
         }
-        // case (START_ARRAY, Wildcard :: xs) if style != QuotedStyle
-        // case path 6
-        else if (6 == ctx.case_path) {
+
+          // case (START_ARRAY, Wildcard :: xs) if style != QuotedStyle
+          // case path 6
+        case evaluation_case_path::START_ARRAY___MATCHED_WILDCARD___STYLE_NOT_QUOTED: {
           // collect result from child task
           p_ctx.dirty += ctx.dirty;
           // update child generator for parent task
           p_ctx.child_g = ctx.g;
+
+          break;
         }
-        /* case (START_ARRAY, Index(idx) :: (xs@Wildcard :: _)) */
-        // case path 8
-        // case (START_ARRAY, Index(idx) :: xs)
-        // case path 9
-        else if (8 == ctx.case_path || 9 == ctx.case_path) {
+
+          /* case (START_ARRAY, Index(idx) :: (xs@Wildcard :: _)) */
+          // case path 8
+          // case (START_ARRAY, Index(idx) :: xs)
+          // case path 9
+        case evaluation_case_path::START_ARRAY___MATCHED_INDEX_AND_WILDCARD:
+        case evaluation_case_path::START_ARRAY___MATCHED_INDEX: {
           // collect result from child task
           p_ctx.dirty += ctx.dirty;
 
           // post logic:
           while (p.next_token() != json_token::END_ARRAY) {
             // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
+            if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
             // advance the token stream to the end of the array
-            if (!p.try_skip_children()) { return false; }
+            if (!p.try_skip_children()) { return {false, 0}; }
           }
           // task is done
           p_ctx.task_is_done = true;
           // copy generator states to parent task;
           p_ctx.g = ctx.g;
-        }
-        // case path 3: case (_, Nil)
-        // case path 12: case _ =>
-        // others
-        else {
-          // never happen
-        }
-      } else {
-        // has no parent task, stack is empty, will exit
-      }
-    }
-  }
-
-  // copy output len
-  root_g.set_output_len(stack[0].g.get_output_len());
-  return stack[0].dirty > 0;
-}
 
-rmm::device_uvector<path_instruction> construct_path_commands(
-  std::vector<std::tuple<path_instruction_type, std::string, int64_t>> const& instructions,
-  cudf::string_scalar const& all_names_scalar,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  int name_pos = 0;
-
-  // construct the path commands
-  std::vector<path_instruction> path_commands;
-  for (auto const& inst : instructions) {
-    auto const& [type, name, index] = inst;
-    switch (type) {
-      case path_instruction_type::WILDCARD:
-        path_commands.emplace_back(path_instruction{path_instruction_type::WILDCARD});
-        break;
-      case path_instruction_type::INDEX:
-        path_commands.emplace_back(path_instruction{path_instruction_type::INDEX});
-        path_commands.back().index = index;
-        break;
-      case path_instruction_type::NAMED:
-        path_commands.emplace_back(path_instruction{path_instruction_type::NAMED});
-        path_commands.back().name =
-          cudf::string_view(all_names_scalar.data() + name_pos, name.size());
-        name_pos += name.size();
-        break;
-      default: CUDF_FAIL("Invalid path instruction type");
-    }
-  }
-  // convert to uvector
-  return cudf::detail::make_device_uvector_sync(path_commands, stream, mr);
-}
-
-/**
- * @brief Parse a single json string using the provided command buffer
- *
- *
- * @param input The incoming json string
- * @param path_commands The command buffer to be applied to the string
- * @param out_buf Buffer user to store the string resulted from the query
- * @returns A pair containing the result code and the output buffer
- */
-__device__ thrust::pair<bool, cudf::size_type> get_json_object_single(
-  char_range input, cudf::device_span<path_instruction const> path_commands, char* out_buf)
-{
-  json_parser j_parser(input);
-  j_parser.next_token();
-  // JSON validation check
-  if (json_token::ERROR == j_parser.get_current_token()) { return {false, 0}; }
+          break;
+        }
 
-  json_generator generator(out_buf);
+        default:;  // Never happens!
+      }            // end switch (ctx.case_path)
 
-  bool const success = evaluate_path(j_parser, generator, write_style::RAW, path_commands);
+    }              // ctx.task_is_done
+  }                // while (stack_size > 0)
 
-  if (!success) {
-    // generator may contain trash output, e.g.: generator writes some output,
-    // then JSON format is invalid, the previous output becomes trash.
-    // set output as zero to tell second step
-    generator.set_output_len_zero();
-  }
+  auto const success = stack[0].dirty > 0;
 
-  return {success, static_cast<cudf::size_type>(generator.get_output_len())};
+  // generator may contain trash output, e.g.: generator writes some output,
+  // then JSON format is invalid, the previous output becomes trash.
+  // We need to return output size as zero.
+  return {success, success ? stack[0].g.get_output_len() : 0};
 }
 
 /**
@@ -895,11 +827,9 @@ __launch_bounds__(block_size, 1) CUDF_KERNEL
 
     auto const str = input.element<cudf::string_view>(tid);
     if (str.size_bytes() > 0) {
-      auto const max_size = offsets[tid + 1] - offsets[tid];
+      thrust::tie(is_valid, out_size) = evaluate_path(char_range{str}, path_commands, dst);
 
-      // If `max_size == 0`, do not pass in the dst pointer to prevent writing garbage data.
-      thrust::tie(is_valid, out_size) =
-        get_json_object_single(str, path_commands, max_size != 0 ? dst : nullptr);
+      auto const max_size = offsets[tid + 1] - offsets[tid];
       if (out_size > max_size) { *has_out_of_bound = true; }
     }
 
@@ -910,6 +840,35 @@ __launch_bounds__(block_size, 1) CUDF_KERNEL
   }
 }
 
+std::pair<rmm::device_uvector<path_instruction>, std::unique_ptr<std::vector<path_instruction>>>
+construct_path_commands(
+  std::vector<std::tuple<path_instruction_type, std::string, int64_t>> const& instructions,
+  cudf::string_scalar const& all_names_scalar,
+  rmm::cuda_stream_view stream)
+{
+  std::size_t name_pos{0};
+  auto h_path_commands = std::make_unique<std::vector<path_instruction>>();
+  h_path_commands->reserve(instructions.size());
+  for (auto const& [type, name, index] : instructions) {
+    h_path_commands->emplace_back(path_instruction{type});
+
+    if (type == path_instruction_type::INDEX) {
+      h_path_commands->back().index = index;
+    } else if (type == path_instruction_type::NAMED) {
+      h_path_commands->back().name =
+        cudf::string_view(all_names_scalar.data() + name_pos, name.size());
+      name_pos += name.size();
+    } else if (type != path_instruction_type::WILDCARD) {
+      CUDF_FAIL("Invalid path instruction type");
+    }
+  }
+
+  // h_path_commands needs to be kept alive outside of this function due to async copy.
+  return {cudf::detail::make_device_uvector_async(
+            *h_path_commands, stream, rmm::mr::get_current_device_resource()),
+          std::move(h_path_commands)};
+}
+
 std::unique_ptr<cudf::column> get_json_object(
   cudf::strings_column_view const& input,
   std::vector<std::tuple<path_instruction_type, std::string, int64_t>> const& instructions,
@@ -919,13 +878,23 @@ std::unique_ptr<cudf::column> get_json_object(
   if (instructions.size() > max_path_depth) { CUDF_FAIL("JSONPath query exceeds maximum depth"); }
   if (input.is_empty()) { return cudf::make_empty_column(cudf::type_id::STRING); }
 
-  std::string all_names;
-  for (auto const& inst : instructions) {
-    all_names += std::get<1>(inst);
-  }
+  auto const all_names = [&] {
+    std::size_t length{0};
+    for (auto const& inst : instructions) {
+      length += (std::get<1>(inst)).length();
+    }
+
+    std::string all_names;
+    all_names.reserve(length);
+    for (auto const& inst : instructions) {
+      all_names += std::get<1>(inst);
+    }
+    return all_names;
+  }();
+
   auto const all_names_scalar = cudf::string_scalar(all_names, true, stream);
-  auto const path_commands    = construct_path_commands(
-    instructions, all_names_scalar, stream, rmm::mr::get_current_device_resource());
+  auto const [d_path_commands, h_path_commands] =
+    construct_path_commands(instructions, all_names_scalar, stream);
   auto const d_input_ptr = cudf::column_device_view::create(input.parent(), stream);
   auto const in_offsets  = cudf::detail::offsetalator_factory::make_input_iterator(input.offsets());
 
@@ -966,7 +935,7 @@ std::unique_ptr<cudf::column> get_json_object(
   get_json_object_kernel<block_size>
     <<<num_blocks, block_size, 0, stream.value()>>>(*d_input_ptr,
                                                     in_offsets,
-                                                    path_commands,
+                                                    d_path_commands,
                                                     out_stringviews.data(),
                                                     output_scratch.data(),
                                                     has_out_of_bound.data());
@@ -1009,7 +978,7 @@ std::unique_ptr<cudf::column> get_json_object(
   get_json_object_kernel<block_size>
     <<<num_blocks, block_size, 0, stream.value()>>>(*d_input_ptr,
                                                     out_offsets,
-                                                    path_commands,
+                                                    d_path_commands,
                                                     nullptr /*out_stringviews*/,
                                                     chars.data(),
                                                     has_out_of_bound.data());
diff --git a/src/main/cpp/src/get_json_object.hpp b/src/main/cpp/src/get_json_object.hpp
index bb3294b424..e13c1e42d7 100644
--- a/src/main/cpp/src/get_json_object.hpp
+++ b/src/main/cpp/src/get_json_object.hpp
@@ -16,18 +16,11 @@
 
 #pragma once
 
-#include <cudf/strings/string_view.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <rmm/resource_ref.hpp>
 
-#include <thrust/optional.h>
-#include <thrust/pair.h>
-#include <thrust/scan.h>
-#include <thrust/tuple.h>
-
 #include <memory>
-#include <string>
 #include <vector>
 
 namespace spark_rapids_jni {
@@ -35,7 +28,7 @@ namespace spark_rapids_jni {
 /**
  * path instruction type
  */
-enum class path_instruction_type { WILDCARD, INDEX, NAMED };
+enum class path_instruction_type : int8_t { WILDCARD, INDEX, NAMED };
 
 /**
  * Extracts json object from a json string based on json path specified, and
diff --git a/src/main/cpp/src/json_parser.cuh b/src/main/cpp/src/json_parser.cuh
index 217ec0047b..10ad2e4fcc 100644
--- a/src/main/cpp/src/json_parser.cuh
+++ b/src/main/cpp/src/json_parser.cuh
@@ -61,7 +61,7 @@ constexpr int max_num_len = 1000;
 /**
  * JSON token enum
  */
-enum class json_token {
+enum class json_token : int8_t {
   // start token
   INIT = 0,
 
@@ -228,7 +228,7 @@ class json_parser {
   /**
    * @brief get the bit value for specified bit from a int64 number
    */
-  __device__ inline bool get_bit_value(int64_t number, int bitIndex)
+  static __device__ inline bool get_bit_value(int64_t number, int bitIndex)
   {
     // Shift the number right by the bitIndex to bring the desired bit to the rightmost position
     long shifted = number >> bitIndex;
@@ -242,7 +242,7 @@ class json_parser {
   /**
    * @brief set the bit value for specified bit to a int64 number
    */
-  __device__ inline void set_bit_value(int64_t& number, int bit_index, bool bit_value)
+  static __device__ inline void set_bit_value(int64_t& number, int bit_index, bool bit_value)
   {
     // Create a mask with a 1 at the desired bit index
     long mask = 1L << bit_index;
@@ -265,7 +265,7 @@ class json_parser {
   /**
    * is hex digits: 0-9, A-F, a-f
    */
-  __device__ inline bool is_hex_digit(char c) const
+  static __device__ inline bool is_hex_digit(char c)
   {
     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
   }
@@ -273,12 +273,12 @@ class json_parser {
   /**
    * is 0 to 9 digit
    */
-  __device__ inline bool is_digit(char c) const { return (c >= '0' && c <= '9'); }
+  static __device__ inline bool is_digit(char c) { return (c >= '0' && c <= '9'); }
 
   /**
    * is white spaces: ' ', '\t', '\n' '\r'
    */
-  __device__ inline bool is_whitespace(char c) const
+  static __device__ inline bool is_whitespace(char c)
   {
     return c == ' ' || c == '\t' || c == '\n' || c == '\r';
   }
@@ -296,7 +296,7 @@ class json_parser {
   /**
    * check current char, if it's expected, then plus the position
    */
-  __device__ inline bool try_skip(char_range_reader& reader, char expected)
+  static __device__ inline bool try_skip(char_range_reader& reader, char expected)
   {
     if (!reader.eof() && reader.current_char() == expected) {
       reader.next();
@@ -305,7 +305,7 @@ class json_parser {
     return false;
   }
 
-  __device__ inline bool try_skip(cudf::size_type& pos, char expected)
+  __device__ inline bool try_skip(cudf::size_type& pos, char expected) const
   {
     if (!eof(pos) && chars[pos] == expected) {
       pos++;
@@ -343,7 +343,7 @@ class json_parser {
    * true is object, false is array
    * only has two contexts: object or array
    */
-  __device__ inline bool is_object_context()
+  __device__ inline bool is_object_context() const
   {
     return get_bit_value(context_stack, stack_size - 1);
   }
@@ -356,7 +356,7 @@ class json_parser {
   /**
    * is context stack is empty
    */
-  __device__ inline bool is_context_stack_empty() { return stack_size == 0; }
+  __device__ inline bool is_context_stack_empty() const { return stack_size == 0; }
 
   __device__ inline void set_current_error() { current_token = json_token::ERROR; }
 
@@ -431,7 +431,7 @@ class json_parser {
   /**
    * transform int value from [0, 15] to hex char
    */
-  __device__ inline char to_hex_char(unsigned int v)
+  static __device__ inline char to_hex_char(unsigned int v)
   {
     if (v < 10)
       return '0' + v;
@@ -446,7 +446,7 @@ class json_parser {
    * @param char to be escaped, c should in range [0, 31)
    * @param[out] escape output
    */
-  __device__ inline int escape_char(unsigned char c, char* output)
+  static __device__ inline int escape_char(unsigned char c, char* output)
   {
     if (nullptr == output) {
       switch (c) {
@@ -499,9 +499,9 @@ class json_parser {
     }
   }
 
-  __device__ inline int write_string(char_range_reader& str,
-                                     char* copy_destination,
-                                     escape_style w_style)
+  static __device__ inline int write_string(char_range_reader& str,
+                                            char* copy_destination,
+                                            escape_style w_style)
   {
     if (str.eof()) { return 0; }
     char const quote_char = str.current_char();
@@ -619,7 +619,7 @@ class json_parser {
    * is valid and length is the number of bytes needed to encode the string
    * in the given style.
    */
-  __device__ inline std::pair<bool, cudf::size_type> try_parse_string(
+  static __device__ inline std::pair<bool, cudf::size_type> try_parse_string(
     char_range_reader& str,
     char_range_reader to_match = char_range_reader(char_range::null()),
     escape_style w_style       = escape_style::UNESCAPED)
@@ -689,7 +689,7 @@ class json_parser {
     return std::make_pair(false, 0);
   }
 
-  __device__ inline bool try_match_char(char_range_reader& reader, char c)
+  static __device__ inline bool try_match_char(char_range_reader& reader, char c)
   {
     if (!reader.is_null()) {
       if (!reader.eof() && reader.current_char() == c) {
@@ -708,11 +708,11 @@ class json_parser {
    * skip the HEX chars in \u HEX HEX HEX HEX.
    * @return positive escaped ASCII value if success, -1 otherwise
    */
-  __device__ inline bool try_skip_escape_part(char_range_reader& str,
-                                              char_range_reader& to_match,
-                                              char*& copy_dest,
-                                              escape_style w_style,
-                                              int& output_size_bytes)
+  static __device__ inline bool try_skip_escape_part(char_range_reader& str,
+                                                     char_range_reader& to_match,
+                                                     char*& copy_dest,
+                                                     escape_style w_style,
+                                                     int& output_size_bytes)
   {
     // already skipped the first '\'
     // try skip second part
@@ -853,7 +853,7 @@ class json_parser {
    *     : ~ ["\\\u0000-\u001F]
    *     ;
    */
-  __device__ inline bool try_skip_safe_code_point(char_range_reader& str, char c)
+  static __device__ inline bool try_skip_safe_code_point(char_range_reader& str, char c)
   {
     // 1 the char is not quoted(' or ") char, here satisfy, do not need to check
     // again
@@ -873,7 +873,7 @@ class json_parser {
   /**
    * convert chars 0-9, a-f, A-F to int value
    */
-  __device__ inline uint8_t hex_value(char c)
+  static __device__ inline uint8_t hex_value(char c)
   {
     if (c >= '0' && c <= '9') return c - '0';
     if (c >= 'a' && c <= 'f') return c - 'a' + 10;
@@ -887,7 +887,7 @@ class json_parser {
    * @param character Single character
    * @return Number of bytes
    */
-  __device__ cudf::size_type bytes_in_char_utf8(cudf::char_utf8 character)
+  static __device__ cudf::size_type bytes_in_char_utf8(cudf::char_utf8 character)
   {
     return 1 + static_cast<cudf::size_type>((character & 0x0000'FF00u) > 0) +
            static_cast<cudf::size_type>((character & 0x00FF'0000u) > 0) +
@@ -900,7 +900,7 @@ class json_parser {
    * @param unchr Character code-point to convert.
    * @return Single UTF-8 character.
    */
-  __device__ cudf::char_utf8 codepoint_to_utf8(uint32_t unchr)
+  static __device__ cudf::char_utf8 codepoint_to_utf8(uint32_t unchr)
   {
     cudf::char_utf8 utf8 = 0;
     if (unchr < 0x0000'0080) {
@@ -935,7 +935,7 @@ class json_parser {
    * @param[out] str Output array.
    * @return The number of bytes in the character
    */
-  __device__ cudf::size_type from_char_utf8(cudf::char_utf8 character, char* str)
+  static __device__ cudf::size_type from_char_utf8(cudf::char_utf8 character, char* str)
   {
     cudf::size_type const chr_width = bytes_in_char_utf8(character);
     for (cudf::size_type idx = 0; idx < chr_width; ++idx) {
@@ -949,10 +949,10 @@ class json_parser {
    * try skip 4 HEX chars
    * in pattern: '\\' 'u' HEX HEX HEX HEX, it's a code point of unicode
    */
-  __device__ bool try_skip_unicode(char_range_reader& str,
-                                   char_range_reader& to_match,
-                                   char*& copy_dest,
-                                   int& output_size_bytes)
+  static __device__ bool try_skip_unicode(char_range_reader& str,
+                                          char_range_reader& to_match,
+                                          char*& copy_dest,
+                                          int& output_size_bytes)
   {
     // already parsed \u
     // now we expect 4 hex chars.
@@ -1042,7 +1042,7 @@ class json_parser {
    * verify max number digits length if enabled
    * e.g.: +1.23e-45 length is 5
    */
-  __device__ inline bool check_max_num_len(int number_digits_length)
+  static __device__ inline bool check_max_num_len(int number_digits_length)
   {
     return
       // disabled num len check
@@ -1362,10 +1362,10 @@ class json_parser {
   /**
    * get current token
    */
-  __device__ json_token get_current_token() { return current_token; }
+  __device__ json_token get_current_token() const { return current_token; }
 
   // TODO make this go away!!!!
-  __device__ inline char_range current_range()
+  __device__ inline char_range current_range() const
   {
     return chars.slice(current_token_start_pos, curr_pos - current_token_start_pos);
   }
@@ -1386,9 +1386,10 @@ class json_parser {
       return true;
     }
 
+    json_token t;
     int open = 1;
-    while (true) {
-      json_token t = next_token();
+    do {
+      t = next_token();
       if (t == json_token::START_OBJECT || t == json_token::START_ARRAY) {
         ++open;
       } else if (t == json_token::END_OBJECT || t == json_token::END_ARRAY) {
@@ -1396,10 +1397,11 @@ class json_parser {
       } else if (t == json_token::ERROR) {
         return false;
       }
-    }
+    } while (t != json_token::SUCCESS);
+    return false;
   }
 
-  __device__ cudf::size_type compute_unescaped_len() { return write_unescaped_text(nullptr); }
+  __device__ cudf::size_type compute_unescaped_len() const { return write_unescaped_text(nullptr); }
 
   /**
    * unescape current token text, then write to destination
@@ -1408,7 +1410,7 @@ class json_parser {
    *   writes 6 utf8 bytes: -28  -72 -83 -27 -101 -67
    * For number, write verbatim without normalization
    */
-  __device__ cudf::size_type write_unescaped_text(char* destination)
+  __device__ cudf::size_type write_unescaped_text(char* destination) const
   {
     switch (current_token) {
       case json_token::VALUE_STRING: {
@@ -1490,7 +1492,7 @@ class json_parser {
     return 0;
   }
 
-  __device__ cudf::size_type compute_escaped_len() { return write_escaped_text(nullptr); }
+  __device__ cudf::size_type compute_escaped_len() const { return write_escaped_text(nullptr); }
   /**
    * escape current token text, then write to destination
    * e.g.: '"' is a string with 1 char '"', writes out 4 chars '"' '\' '\"' '"'
@@ -1498,7 +1500,7 @@ class json_parser {
    *   writes 8 utf8 bytes: '"' -28  -72 -83 -27 -101 -67 '"'
    * For number, write verbatim without normalization
    */
-  __device__ cudf::size_type write_escaped_text(char* destination)
+  __device__ cudf::size_type write_escaped_text(char* destination) const
   {
     switch (current_token) {
       case json_token::VALUE_STRING: {
@@ -1580,7 +1582,7 @@ class json_parser {
    * return true if current token is FIELD_NAME and match successfully.
    * return false otherwise,
    */
-  __device__ bool match_current_field_name(cudf::string_view name)
+  __device__ bool match_current_field_name(cudf::string_view name) const
   {
     return match_current_field_name(char_range(name));
   }
@@ -1588,7 +1590,7 @@ class json_parser {
   /**
    * match current field name
    */
-  __device__ bool match_current_field_name(char_range name)
+  __device__ bool match_current_field_name(char_range name) const
   {
     if (json_token::FIELD_NAME == current_token) {
       char_range_reader reader(current_range());
@@ -1689,7 +1691,6 @@ class json_parser {
  private:
   char_range const chars;
   cudf::size_type curr_pos;
-  json_token current_token;
 
   // 64 bits long saves the nested object/array contexts
   // true(bit value 1) is JSON object context
@@ -1704,6 +1705,8 @@ class json_parser {
   // TODO remove if possible
   // used to store number token length
   cudf::size_type number_token_len;
+
+  json_token current_token;
 };
 
 }  // namespace spark_rapids_jni