From 323c6329256bbcb66ba80f69f3584d7eede1120c Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Sat, 28 Jan 2017 18:56:55 -0600 Subject: [PATCH 1/3] jv int64 and uint64 support --- configure.ac | 2 + src/jv.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++- src/jv.h | 17 ++++++- src/jv_parse.c | 49 +++++++++++++++++++- src/jv_print.c | 18 +++++++- 5 files changed, 204 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 59432fdd4c..445042f63a 100644 --- a/configure.ac +++ b/configure.ac @@ -80,6 +80,8 @@ if test "x$valgrind_cmd" = "x" ; then fi AC_CHECK_FUNCS(memmem) AC_CHECK_FUNCS(mkstemp) +AC_CHECK_FUNCS(strtoimax) +AC_CHECK_FUNCS(strtoumax) AC_CHECK_HEADER("shlwapi.h",[have_win32=1;]) AM_CONDITIONAL([WIN32], [test "x$have_win32" = x1]) diff --git a/src/jv.c b/src/jv.c index e064baf572..64e358448f 100644 --- a/src/jv.c +++ b/src/jv.c @@ -47,6 +47,10 @@ jv_kind jv_get_kind(jv x) { return x.kind_flags & KIND_MASK; } +jv_subkind jv_get_subkind(jv x) { + return x.subkind_flags & KIND_MASK; +} + const char* jv_kind_name(jv_kind k) { switch (k) { case JV_KIND_INVALID: return ""; @@ -138,12 +142,72 @@ static void jvp_invalid_free(jv x) { */ jv jv_number(double x) { - jv j = {JV_KIND_NUMBER, 0, 0, 0, {.number = x}}; + jv j = {JV_KIND_NUMBER, JV_SUBKIND_NONE, 0, 0, {.number = x}}; + return j; +} + +jv jv_int64(int64_t x) { +#ifndef JQ_OMIT_INTS + jv j = {JV_KIND_NUMBER, JV_SUBKIND_INT64, 0, 0, {.int64 = x}}; +#else + jv j = {JV_KIND_NUMBER, JV_SUBKIND_INT64, 0, 0, {.number = x}}; +#endif + return j; +} + +jv jv_uint64(uint64_t x) { +#ifndef JQ_OMIT_INTS + jv j = {JV_KIND_NUMBER, JV_SUBKIND_UINT64, 0, 0, {.uint64 = x}}; +#else + jv j = {JV_KIND_NUMBER, JV_SUBKIND_UINT64, 0, 0, {.number = x}}; +#endif return j; } double jv_number_value(jv j) { assert(jv_get_kind(j) == JV_KIND_NUMBER); +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk == JV_SUBKIND_NONE) + return j.u.number; + if (sk == JV_SUBKIND_INT64) + return j.u.int64; + assert(sk == JV_SUBKIND_UINT64); + return j.u.uint64; +#else + return j.u.number; +#endif +} + +int64_t jv_int64_value(jv j) +{ + assert(jv_get_kind(j) == JV_KIND_NUMBER); +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk == JV_SUBKIND_INT64) + return j.u.int64; + if (sk == JV_SUBKIND_UINT64) { + if (j.u.uint64 <= INT64_MAX) + return (int64_t)j.u.uint64; + return INT64_MAX; + } +#endif + return j.u.number; +} + +uint64_t jv_uint64_value(jv j) +{ + assert(jv_get_kind(j) == JV_KIND_NUMBER); +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk == JV_SUBKIND_UINT64) + return j.u.uint64; + if (sk == JV_SUBKIND_INT64) { + if (j.u.int64 >= 0) + return j.u.int64; + return 0; + } +#endif return j.u.number; } @@ -151,6 +215,13 @@ int jv_is_integer(jv j){ if(jv_get_kind(j) != JV_KIND_NUMBER){ return 0; } +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk != JV_SUBKIND_NONE) { + assert(sk == JV_SUBKIND_UINT64 || sk == JV_SUBKIND_INT64); + return 1; + } +#endif double x = jv_number_value(j); if(x != x || x > INT_MAX || x < INT_MIN){ return 0; @@ -159,6 +230,42 @@ int jv_is_integer(jv j){ return x == (int)x; } +int jv_is_int64(jv j) +{ + if(jv_get_kind(j) != JV_KIND_NUMBER){ + return 0; + } +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk == JV_SUBKIND_INT64) + return 1; + if (sk == JV_SUBKIND_UINT64) { + if (j.u.uint64 <= INT64_MAX) + return 1; + return 0; + } +#endif + return jv_is_integer(j); +} + +int jv_is_uint64(jv j) +{ + if(jv_get_kind(j) != JV_KIND_NUMBER){ + return 0; + } +#ifndef JQ_OMIT_INTS + char sk = jv_get_subkind(j); + if (sk == JV_SUBKIND_UINT64) + return 1; + if (sk == JV_SUBKIND_INT64) { + if (j.u.int64 >= 0) + return 1; + return 0; + } +#endif + return jv_is_integer(j); +} + /* * Arrays (internal helpers) */ @@ -1302,9 +1409,21 @@ int jv_identical(jv a, jv b) { case JV_KIND_OBJECT: r = a.u.ptr == b.u.ptr; break; - case JV_KIND_NUMBER: + case JV_KIND_NUMBER: { +#ifndef JQ_OMIT_INTS + char ask = jv_get_subkind(a); + char bsk = jv_get_subkind(b); + if (ask != bsk) + r = jv_number_value(a) == jv_number_value(b); + else if (ask == JV_SUBKIND_NONE) + r = memcmp(&a.u.number, &b.u.number, sizeof(a.u.number)) == 0; + else + r = a.u.int64 == b.u.int64; /* this handles int64 and uint64 */ +#else r = memcmp(&a.u.number, &b.u.number, sizeof(a.u.number)) == 0; +#endif break; + } default: r = 1; break; diff --git a/src/jv.h b/src/jv.h index 9e74c9d8ae..f9b9bae982 100644 --- a/src/jv.h +++ b/src/jv.h @@ -16,18 +16,26 @@ typedef enum { JV_KIND_OBJECT } jv_kind; +typedef enum { + JV_SUBKIND_NONE, + JV_SUBKIND_INT64, + JV_SUBKIND_UINT64, +} jv_subkind; + struct jv_refcnt; /* All of the fields of this struct are private. Really. Do not play with them. */ typedef struct { unsigned char kind_flags; - unsigned char pad_; + unsigned char subkind_flags; unsigned short offset; /* array offsets */ int size; union { struct jv_refcnt* ptr; double number; + int64_t int64; + uint64_t uint64; } u; } jv; @@ -37,6 +45,7 @@ typedef struct { */ jv_kind jv_get_kind(jv); +jv_subkind jv_get_subkind(jv); const char* jv_kind_name(jv_kind); static int jv_is_valid(jv x) { return jv_get_kind(x) != JV_KIND_INVALID; } @@ -61,8 +70,14 @@ jv jv_false(void); jv jv_bool(int); jv jv_number(double); +jv jv_int64(int64_t); +jv jv_uint64(uint64_t); double jv_number_value(jv); +int64_t jv_int64_value(jv); +uint64_t jv_uint64_value(jv); int jv_is_integer(jv); +int jv_is_int64(jv); +int jv_is_uint64(jv); jv jv_array(void); jv jv_array_sized(int); diff --git a/src/jv_parse.c b/src/jv_parse.c index 51ad9f0947..2343b226d9 100644 --- a/src/jv_parse.c +++ b/src/jv_parse.c @@ -1,7 +1,10 @@ +#include +#include +#include +#include #include #include #include -#include #include "jv.h" #include "jv_dtoa.h" #include "jv_unicode.h" @@ -496,8 +499,52 @@ static pfunc check_literal(struct jv_parser* p) { double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end); if (end == 0 || *end != 0) return "Invalid numeric literal"; + +#ifndef JQ_OMIT_INTS + if (d == (int64_t)d || d == (uint64_t)d) { + if (d >= INT64_MIN && d <= INT64_MAX) { + TRY(value(p, jv_int64(d))); + goto out; + } else if (d >= 0 && d <= UINT64_MAX) { + TRY(value(p, jv_uint64(d))); + goto out; + } + + char *q = p->tokenbuf; + int is_signed = 0; + while (isspace(*q)) + q++; + if (*q == '-') { + is_signed = 1; + q++; + } + errno = 0; + if (is_signed) { +#ifdef HAVE_STRTOIMAX + int64_t i64 = strtoimax(p->tokenbuf, &q, 10); +#else + int64_t i64 = strtoll(p->tokenbuf, &q, 10); +#endif + if (q == end && i64 < 0 && errno == 0) { + TRY(value(p, jv_int64(i64))); + goto out; + } + } else { +#ifdef HAVE_STRTOUMAX + uint64_t u64 = strtoumax(p->tokenbuf, &q, 10); +#else + uint64_t u64 = strtoull(p->tokenbuf, &q, 10); +#endif + if (q == end && errno == 0) { + TRY(value(p, jv_int64(u64))); + goto out; + } + } + } +#endif TRY(value(p, jv_number(d))); } +out: p->tokenpos = 0; return 0; } diff --git a/src/jv_print.c b/src/jv_print.c index ce4a59afa0..94dcbd0d1b 100644 --- a/src/jv_print.c +++ b/src/jv_print.c @@ -1,6 +1,7 @@ #include -#include +#include #include +#include #include #ifdef WIN32 @@ -182,6 +183,21 @@ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FI put_str("true", F, S, flags & JV_PRINT_ISATTY); break; case JV_KIND_NUMBER: { +#ifndef JQ_OMIT_INTS + if (jv_is_int64(x)) { + int64_t i64 = jv_int64_value(x); + char buf[21]; + (void) snprintf(buf, sizeof(buf), "%" PRId64, i64); + put_str(buf, F, S, flags & JV_PRINT_ISATTY); + break; + } else if (jv_is_uint64(x)) { + uint64_t u64 = jv_uint64_value(x); + char buf[21]; + (void) snprintf(buf, sizeof(buf), "%" PRIu64, u64); + put_str(buf, F, S, flags & JV_PRINT_ISATTY); + break; + } +#endif double d = jv_number_value(x); if (d != d) { // JSON doesn't have NaN, so we'll render it as "null" From d066c61c68a99556c5faac7a1e025f3cb3b8c8e8 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Mon, 30 Jan 2017 14:30:49 -0600 Subject: [PATCH 2/3] fixup parser bugs --- src/jv.c | 8 ++++++ src/jv_parse.c | 66 +++++++++++++++++++++++--------------------------- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/src/jv.c b/src/jv.c index 64e358448f..fe0dc58ce9 100644 --- a/src/jv.c +++ b/src/jv.c @@ -191,6 +191,10 @@ int64_t jv_int64_value(jv j) return (int64_t)j.u.uint64; return INT64_MAX; } + if (j.u.number > 0 && (int64_t)j.u.number < 0) + return INT64_MAX; + if (j.u.number < 0 && (int64_t)j.u.number > 0) + return INT64_MIN; #endif return j.u.number; } @@ -207,6 +211,10 @@ uint64_t jv_uint64_value(jv j) return j.u.int64; return 0; } + if (j.u.number < 0) + return 0; + if (j.u.number > (double)UINT64_MAX) + return UINT64_MAX; #endif return j.u.number; } diff --git a/src/jv_parse.c b/src/jv_parse.c index 2343b226d9..617fb97e76 100644 --- a/src/jv_parse.c +++ b/src/jv_parse.c @@ -495,53 +495,47 @@ static pfunc check_literal(struct jv_parser* p) { } else { // FIXME: better parser p->tokenbuf[p->tokenpos] = 0; - char* end = 0; - double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end); - if (end == 0 || *end != 0) - return "Invalid numeric literal"; + char *end = 0; #ifndef JQ_OMIT_INTS - if (d == (int64_t)d || d == (uint64_t)d) { - if (d >= INT64_MIN && d <= INT64_MAX) { - TRY(value(p, jv_int64(d))); - goto out; - } else if (d >= 0 && d <= UINT64_MAX) { - TRY(value(p, jv_uint64(d))); - goto out; - } - - char *q = p->tokenbuf; - int is_signed = 0; - while (isspace(*q)) - q++; - if (*q == '-') { - is_signed = 1; - q++; - } - errno = 0; - if (is_signed) { + char *q = p->tokenbuf; + int is_signed = 0; + while (isspace(*q)) + q++; + if (*q == '-') { + is_signed = 1; + q++; + } + errno = 0; + if (is_signed) { #ifdef HAVE_STRTOIMAX - int64_t i64 = strtoimax(p->tokenbuf, &q, 10); + int64_t i64 = strtoimax(p->tokenbuf, &end, 10); #else - int64_t i64 = strtoll(p->tokenbuf, &q, 10); + int64_t i64 = strtoll(p->tokenbuf, &end, 10); #endif - if (q == end && i64 < 0 && errno == 0) { - TRY(value(p, jv_int64(i64))); - goto out; - } - } else { + if (end != 0 && *end == 0 && i64 < 0 && errno == 0) { + TRY(value(p, jv_int64(i64))); + goto out; + } + } else { #ifdef HAVE_STRTOUMAX - uint64_t u64 = strtoumax(p->tokenbuf, &q, 10); + uint64_t u64 = strtoumax(p->tokenbuf, &end, 10); #else - uint64_t u64 = strtoull(p->tokenbuf, &q, 10); + uint64_t u64 = strtoull(p->tokenbuf, &end, 10); #endif - if (q == end && errno == 0) { - TRY(value(p, jv_int64(u64))); - goto out; - } + if (end != 0 && *end == 0 && errno == 0) { + TRY(value(p, jv_uint64(u64))); + goto out; } } #endif + double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end); + if (end == 0 || *end != 0) + return "Invalid numeric literal"; + /* + * So there was a decimal or exponent; this might still be an + * integer, but we'll go with double. + */ TRY(value(p, jv_number(d))); } out: From eb77bae83a779bc239c184dfa1f71f50e956dd98 Mon Sep 17 00:00:00 2001 From: Nicolas Williams Date: Thu, 2 Feb 2017 17:21:30 -0600 Subject: [PATCH 3/3] Add `tointeger` and `isinteger` --- src/builtin.c | 29 +++++++++++++++++++++++++++++ src/jv.c | 1 + 2 files changed, 30 insertions(+) diff --git a/src/builtin.c b/src/builtin.c index aa0ab4d5eb..3c5a3f8196 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -363,6 +363,33 @@ static jv f_tonumber(jq_state *jq, jv input) { return type_error(input, "cannot be parsed as a number"); } +static jv f_toint(jq_state *jq, jv input) { + input = f_tonumber(jq, input); + if (!jv_is_valid(input)) + return input; + if (jv_is_int64(input) || jv_is_uint64(input)) + return input; + double d = jv_number_value(input); + if (d < 0 && d >= INT64_MIN) { + int64_t i = d; + if (i < 0) + return jv_int64(i); + } else if (d < UINT64_MAX) { + uint64_t u = d; + if (d == (double)u) + return jv_uint64(u); + } + return jv_number(nearbyint(d)); +} + +static jv f_isint(jq_state *jq, jv input) { + if (jv_get_kind(input) != JV_KIND_NUMBER) + return type_error(input, "only numbers can be integers"); + if (jv_is_int64(input) || jv_is_uint64(input) || jv_is_integer(input)) + return jv_true(); + return jv_false(); +} + static jv f_length(jq_state *jq, jv input) { if (jv_get_kind(input) == JV_KIND_ARRAY) { return jv_number(jv_array_length(input)); @@ -1285,6 +1312,8 @@ static const struct cfunction function_list[] = { {(cfunction_ptr)f_json_parse, "fromjson", 1}, {(cfunction_ptr)f_tonumber, "tonumber", 1}, {(cfunction_ptr)f_tostring, "tostring", 1}, + {(cfunction_ptr)f_toint, "tointeger", 1}, + {(cfunction_ptr)f_isint, "isinteger", 1}, {(cfunction_ptr)f_keys, "keys", 1}, {(cfunction_ptr)f_keys_unsorted, "keys_unsorted", 1}, {(cfunction_ptr)f_startswith, "startswith", 2}, diff --git a/src/jv.c b/src/jv.c index fe0dc58ce9..1ae809ec33 100644 --- a/src/jv.c +++ b/src/jv.c @@ -231,6 +231,7 @@ int jv_is_integer(jv j){ } #endif double x = jv_number_value(j); + /* XXX Check against actual double min/max integers */ if(x != x || x > INT_MAX || x < INT_MIN){ return 0; }