Skip to content

Commit

Permalink
implode: Better invalid input validation and handling
Browse files Browse the repository at this point in the history
Error on non-number and nan codepoint, would asserd before
Replace negative codepoint and surrogate range with unicode replacement character, would assert before

Fixes #1160
  • Loading branch information
wader committed Jul 24, 2023
1 parent 3305596 commit 100c3f7
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 4 deletions.
2 changes: 1 addition & 1 deletion modules/oniguruma
Submodule oniguruma updated 1 files
+1 −1 Makefile.am
24 changes: 23 additions & 1 deletion src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,29 @@ static jv f_string_implode(jq_state *jq, jv a) {
if (jv_get_kind(a) != JV_KIND_ARRAY) {
return ret_error(a, jv_string("implode input must be an array"));
}
return jv_string_implode(a);

int len = jv_array_length(jv_copy(a));
assert(len >= 0);
jv s = jv_string_empty(len);

for (int i = 0; i < len; i++) {
jv n = jv_array_get(jv_copy(a), i);
if (jv_get_kind(n) != JV_KIND_NUMBER || jvp_number_is_nan(n)) {
jv_free(a);
jv_free(s);
return type_error(n, "codepoint must be a number");
}

int nv = jv_number_value(n);
jv_free(n);
// outside codepoint range or in utf16 surrogate pair range
if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF))
nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
s = jv_string_append_codepoint(s, nv);
}

jv_free(a);
return s;
}

static jv f_setpath(jq_state *jq, jv a, jv b, jv c) { return jv_setpath(a, b, c); }
Expand Down
3 changes: 2 additions & 1 deletion src/jv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1368,7 +1368,8 @@ jv jv_string_implode(jv j) {
assert(JVP_HAS_KIND(n, JV_KIND_NUMBER));
int nv = jv_number_value(n);
jv_free(n);
if (nv > 0x10FFFF)
// outside codepoint range or in utf16 surrogate pair range
if (nv < 0 || nv > 0x10FFFF || (nv >= 0xD800 && nv <= 0xDFFF))
nv = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
s = jv_string_append_codepoint(s, nv);
}
Expand Down
1 change: 1 addition & 0 deletions src/jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ jv jv_number(double);
jv jv_number_with_literal(const char*);
double jv_number_value(jv);
int jv_is_integer(jv);
int jvp_number_is_nan(jv);

int jv_number_has_literal(jv n);
const char* jv_number_get_literal(jv);
Expand Down
1 change: 0 additions & 1 deletion src/jv_type_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@
#define JV_TYPE_PRIVATE

int jvp_number_cmp(jv, jv);
int jvp_number_is_nan(jv);

#endif //JV_TYPE_PRIVATE
11 changes: 11 additions & 0 deletions tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -1910,3 +1910,14 @@ any(keys[]|tostring?;true)
{"a":"1","b":"2","c":"3"}
true


# explode/implode
# test replacement character (65533) for outside codepoint range and 0xd800 (55296) - 0xdfff (57343) utf16 surrogate pair range
# 1.1 and 1.9 to test round down of non-ints
implode|explode
[-1,0,1,2,3,1114111,1114112,55295,55296,57343,57344,1.1,1.9]
[65533,0,1,2,3,1114111,65533,55295,65533,65533,57344,1,1]

map(try implode catch .)
[123,["a"],[nan]]
["implode input must be an array","string (\"a\") codepoint must be a number","number (null) codepoint must be a number"]

0 comments on commit 100c3f7

Please sign in to comment.