From 100c3f7e03815bfd783e2b568ab7bab2b78baed8 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Sun, 2 Jul 2023 16:48:07 +0200 Subject: [PATCH] implode: Better invalid input validation and handling Error on non-number and nan codepoint, would asserd before Replace negative codepoint and surrogate range with unicode replacement character, would assert before Fixes #1160 --- modules/oniguruma | 2 +- src/builtin.c | 24 +++++++++++++++++++++++- src/jv.c | 3 ++- src/jv.h | 1 + src/jv_type_private.h | 1 - tests/jq.test | 11 +++++++++++ 6 files changed, 38 insertions(+), 4 deletions(-) diff --git a/modules/oniguruma b/modules/oniguruma index d2f1a14ced..660f097dff 160000 --- a/modules/oniguruma +++ b/modules/oniguruma @@ -1 +1 @@ -Subproject commit d2f1a14ced5d5d461acac0da0d477ab240a7ab5f +Subproject commit 660f097dff67e331cef864d70abc96dc3332a37f diff --git a/src/builtin.c b/src/builtin.c index 3e99c37615..7e7c5719f9 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1201,7 +1201,29 @@ static jv f_string_implode(jq_state *jq, jv a) { if (jv_get_kind(a) != JV_KIND_ARRAY) { return ret_error(a, jv_string("implode input must be an array")); } - return jv_string_implode(a); + + int len = jv_array_length(jv_copy(a)); + assert(len >= 0); + jv s = jv_string_empty(len); + + for (int i = 0; i < len; i++) { + jv n = jv_array_get(jv_copy(a), i); + if (jv_get_kind(n) != JV_KIND_NUMBER || jvp_number_is_nan(n)) { + jv_free(a); + jv_free(s); + return type_error(n, "codepoint must be a number"); + } + + int nv = jv_number_value(n); + jv_free(n); + // outside codepoint range or in utf16 surrogate pair range + if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF)) + nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER + s = jv_string_append_codepoint(s, nv); + } + + jv_free(a); + return s; } static jv f_setpath(jq_state *jq, jv a, jv b, jv c) { return jv_setpath(a, b, c); } diff --git a/src/jv.c b/src/jv.c index 159b3f272f..b4ee8a2e7c 100644 --- a/src/jv.c +++ b/src/jv.c @@ -1368,7 +1368,8 @@ jv jv_string_implode(jv j) { assert(JVP_HAS_KIND(n, JV_KIND_NUMBER)); int nv = jv_number_value(n); jv_free(n); - if (nv > 0x10FFFF) + // outside codepoint range or in utf16 surrogate pair range + if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF)) nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER s = jv_string_append_codepoint(s, nv); } diff --git a/src/jv.h b/src/jv.h index 8c96f822f0..446ffb06e6 100644 --- a/src/jv.h +++ b/src/jv.h @@ -63,6 +63,7 @@ jv jv_number(double); jv jv_number_with_literal(const char*); double jv_number_value(jv); int jv_is_integer(jv); +int jvp_number_is_nan(jv); int jv_number_has_literal(jv n); const char* jv_number_get_literal(jv); diff --git a/src/jv_type_private.h b/src/jv_type_private.h index 5996282ba5..a25254dc10 100644 --- a/src/jv_type_private.h +++ b/src/jv_type_private.h @@ -2,6 +2,5 @@ #define JV_TYPE_PRIVATE int jvp_number_cmp(jv, jv); -int jvp_number_is_nan(jv); #endif //JV_TYPE_PRIVATE diff --git a/tests/jq.test b/tests/jq.test index 4e0b950d3c..2c96438281 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1910,3 +1910,14 @@ any(keys[]|tostring?;true) {"a":"1","b":"2","c":"3"} true + +# explode/implode +# test replacement character (65533) for outside codepoint range and 0xd800 (55296) - 0xdfff (57343) utf16 surrogate pair range +# 1.1 and 1.9 to test round down of non-ints +implode|explode +[-1,0,1,2,3,1114111,1114112,55295,55296,57343,57344,1.1,1.9] +[65533,0,1,2,3,1114111,65533,55295,65533,65533,57344,1,1] + +map(try implode catch .) +[123,["a"],[nan]] +["implode input must be an array","string (\"a\") codepoint must be a number","number (null) codepoint must be a number"]