Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jv: Add some support for 64 bit ints in a very conservative way (ALTERNATIVE) #1327

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ if test "x$valgrind_cmd" = "x" ; then
fi
AC_CHECK_FUNCS(memmem)
AC_CHECK_FUNCS(mkstemp)
AC_CHECK_FUNCS(strtoimax)
AC_CHECK_FUNCS(strtoumax)

AC_CHECK_HEADER("shlwapi.h",[have_win32=1;])
AM_CONDITIONAL([WIN32], [test "x$have_win32" = x1])
Expand Down
29 changes: 29 additions & 0 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,33 @@ static jv f_tonumber(jq_state *jq, jv input) {
return type_error(input, "cannot be parsed as a number");
}

static jv f_toint(jq_state *jq, jv input) {
input = f_tonumber(jq, input);
if (!jv_is_valid(input))
return input;
if (jv_is_int64(input) || jv_is_uint64(input))
return input;
double d = jv_number_value(input);
if (d < 0 && d >= INT64_MIN) {
int64_t i = d;
if (i < 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's happening here? Is this a guard against the cast turning -0.5 into 0 instead of -1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's just for deciding whether to return a signed 64-bit integer representation or unsigned 64-bit integer representation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if the check is false, the next code that runs is return jv_number(nearbyint(d));, which is a double representation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These checks should use the nextafter stuff from below for INT64_MIN and UINT64_MAX

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eh? No, there's an else if at 377.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I misunderstand.
Line 373 is checking to see if d is negative AND d fits into an int64_t
Line 374 does the cast
What does line 375 check?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes, i'm the one who's confused here. I need to s/else // here :)

return jv_int64(i);
} else if (d < UINT64_MAX) {
uint64_t u = d;
if (d == (double)u)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also this one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto. Here we know that d is positive, and if it's also less than UINT64_MAX then we can represent it as a uint64_t.

return jv_uint64(u);
}
return jv_number(nearbyint(d));
}

static jv f_isint(jq_state *jq, jv input) {
if (jv_get_kind(input) != JV_KIND_NUMBER)
return type_error(input, "only numbers can be integers");
if (jv_is_int64(input) || jv_is_uint64(input) || jv_is_integer(input))
return jv_true();
return jv_false();
}

static jv f_length(jq_state *jq, jv input) {
if (jv_get_kind(input) == JV_KIND_ARRAY) {
return jv_number(jv_array_length(input));
Expand Down Expand Up @@ -1285,6 +1312,8 @@ static const struct cfunction function_list[] = {
{(cfunction_ptr)f_json_parse, "fromjson", 1},
{(cfunction_ptr)f_tonumber, "tonumber", 1},
{(cfunction_ptr)f_tostring, "tostring", 1},
{(cfunction_ptr)f_toint, "tointeger", 1},
{(cfunction_ptr)f_isint, "isinteger", 1},
{(cfunction_ptr)f_keys, "keys", 1},
{(cfunction_ptr)f_keys_unsorted, "keys_unsorted", 1},
{(cfunction_ptr)f_startswith, "startswith", 2},
Expand Down
132 changes: 130 additions & 2 deletions src/jv.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ jv_kind jv_get_kind(jv x) {
return x.kind_flags & KIND_MASK;
}

jv_subkind jv_get_subkind(jv x) {
return x.subkind_flags & KIND_MASK;
}

const char* jv_kind_name(jv_kind k) {
switch (k) {
case JV_KIND_INVALID: return "<invalid>";
Expand Down Expand Up @@ -138,27 +142,139 @@ static void jvp_invalid_free(jv x) {
*/

jv jv_number(double x) {
jv j = {JV_KIND_NUMBER, 0, 0, 0, {.number = x}};
jv j = {JV_KIND_NUMBER, JV_SUBKIND_NONE, 0, 0, {.number = x}};
return j;
}

jv jv_int64(int64_t x) {
#ifndef JQ_OMIT_INTS
jv j = {JV_KIND_NUMBER, JV_SUBKIND_INT64, 0, 0, {.int64 = x}};
#else
jv j = {JV_KIND_NUMBER, JV_SUBKIND_INT64, 0, 0, {.number = x}};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This uses .number, but has a JV_SUBKIND_INT64?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops! (It would have no effect, given that other code that would check it would be omitted. But it's still a bug.)

#endif
return j;
}

jv jv_uint64(uint64_t x) {
#ifndef JQ_OMIT_INTS
jv j = {JV_KIND_NUMBER, JV_SUBKIND_UINT64, 0, 0, {.uint64 = x}};
#else
jv j = {JV_KIND_NUMBER, JV_SUBKIND_UINT64, 0, 0, {.number = x}};
#endif
return j;
}

double jv_number_value(jv j) {
assert(jv_get_kind(j) == JV_KIND_NUMBER);
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk == JV_SUBKIND_NONE)
return j.u.number;
if (sk == JV_SUBKIND_INT64)
return j.u.int64;
assert(sk == JV_SUBKIND_UINT64);
return j.u.uint64;
#else
return j.u.number;
#endif
}

int64_t jv_int64_value(jv j)
{
assert(jv_get_kind(j) == JV_KIND_NUMBER);
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk == JV_SUBKIND_INT64)
return j.u.int64;
if (sk == JV_SUBKIND_UINT64) {
if (j.u.uint64 <= INT64_MAX)
return (int64_t)j.u.uint64;
return INT64_MAX;
}
if (j.u.number > 0 && (int64_t)j.u.number < 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm concerned this might be processor-specific behavior and prone to breaking.
How about this instead:

if (j.u.number > nextafter(INT64_MAX, 0))
  return INT64_MAX;
if (j.u.number < nextafter(INT64_MIN, 0))
  return INT64_MIN;

Basically, we get the next double in the direction of zero from whatever we get after converting INT64_MAX to double.
If we're farther from 0 than that value, then we had to be at least INT64_MAX.
The same logic holds for INT64_MIN.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this! Thanks!

return INT64_MAX;
if (j.u.number < 0 && (int64_t)j.u.number > 0)
return INT64_MIN;
#endif
return j.u.number;
}

uint64_t jv_uint64_value(jv j)
{
assert(jv_get_kind(j) == JV_KIND_NUMBER);
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk == JV_SUBKIND_UINT64)
return j.u.uint64;
if (sk == JV_SUBKIND_INT64) {
if (j.u.int64 >= 0)
return j.u.int64;
return 0;
}
if (j.u.number < 0)
return 0;
if (j.u.number > (double)UINT64_MAX)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, perhaps something like this:

if (j.u.number > nextafter(UINT64_MAX, 0))
  return UINT64_MAX;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!

return UINT64_MAX;
#endif
return j.u.number;
}

int jv_is_integer(jv j){
if(jv_get_kind(j) != JV_KIND_NUMBER){
return 0;
}
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk != JV_SUBKIND_NONE) {
assert(sk == JV_SUBKIND_UINT64 || sk == JV_SUBKIND_INT64);
return 1;
}
#endif
double x = jv_number_value(j);
/* XXX Check against actual double min/max integers */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I seem to repeat myself. 😜
Assuming that x != x is for NaN detection...

if (isnan(j.u.number) || j.u.number > nextafter(INT_MAX, 0) || j.u.number < nextafter(INT_MIN, 0))
  return 0;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough.

if(x != x || x > INT_MAX || x < INT_MIN){
return 0;
}

return x == (int)x;
}

int jv_is_int64(jv j)
{
if(jv_get_kind(j) != JV_KIND_NUMBER){
return 0;
}
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk == JV_SUBKIND_INT64)
return 1;
if (sk == JV_SUBKIND_UINT64) {
if (j.u.uint64 <= INT64_MAX)
return 1;
return 0;
}
#endif
return jv_is_integer(j);
}

int jv_is_uint64(jv j)
{
if(jv_get_kind(j) != JV_KIND_NUMBER){
return 0;
}
#ifndef JQ_OMIT_INTS
char sk = jv_get_subkind(j);
if (sk == JV_SUBKIND_UINT64)
return 1;
if (sk == JV_SUBKIND_INT64) {
if (j.u.int64 >= 0)
return 1;
return 0;
}
#endif
return jv_is_integer(j);
}

/*
* Arrays (internal helpers)
*/
Expand Down Expand Up @@ -1302,9 +1418,21 @@ int jv_identical(jv a, jv b) {
case JV_KIND_OBJECT:
r = a.u.ptr == b.u.ptr;
break;
case JV_KIND_NUMBER:
case JV_KIND_NUMBER: {
#ifndef JQ_OMIT_INTS
char ask = jv_get_subkind(a);
char bsk = jv_get_subkind(b);
if (ask != bsk)
r = jv_number_value(a) == jv_number_value(b);
else if (ask == JV_SUBKIND_NONE)
r = memcmp(&a.u.number, &b.u.number, sizeof(a.u.number)) == 0;
else
r = a.u.int64 == b.u.int64; /* this handles int64 and uint64 */
#else
r = memcmp(&a.u.number, &b.u.number, sizeof(a.u.number)) == 0;
#endif
break;
}
default:
r = 1;
break;
Expand Down
17 changes: 16 additions & 1 deletion src/jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,26 @@ typedef enum {
JV_KIND_OBJECT
} jv_kind;

typedef enum {
JV_SUBKIND_NONE,
JV_SUBKIND_INT64,
JV_SUBKIND_UINT64,
} jv_subkind;

struct jv_refcnt;

/* All of the fields of this struct are private.
Really. Do not play with them. */
typedef struct {
unsigned char kind_flags;
unsigned char pad_;
unsigned char subkind_flags;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And now our padding is gone. 😢

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, yes, we lose the padding. I suppose I could encode sub-kind into kind_flags. We could always do that later if ever we want to use the padding for something else.

unsigned short offset; /* array offsets */
int size;
union {
struct jv_refcnt* ptr;
double number;
int64_t int64;
uint64_t uint64;
} u;
} jv;

Expand All @@ -37,6 +45,7 @@ typedef struct {
*/

jv_kind jv_get_kind(jv);
jv_subkind jv_get_subkind(jv);
const char* jv_kind_name(jv_kind);
static int jv_is_valid(jv x) { return jv_get_kind(x) != JV_KIND_INVALID; }

Expand All @@ -61,8 +70,14 @@ jv jv_false(void);
jv jv_bool(int);

jv jv_number(double);
jv jv_int64(int64_t);
jv jv_uint64(uint64_t);
double jv_number_value(jv);
int64_t jv_int64_value(jv);
uint64_t jv_uint64_value(jv);
int jv_is_integer(jv);
int jv_is_int64(jv);
int jv_is_uint64(jv);

jv jv_array(void);
jv jv_array_sized(int);
Expand Down
45 changes: 43 additions & 2 deletions src/jv_parse.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "jv.h"
#include "jv_dtoa.h"
#include "jv_unicode.h"
Expand Down Expand Up @@ -492,12 +495,50 @@ static pfunc check_literal(struct jv_parser* p) {
} else {
// FIXME: better parser
p->tokenbuf[p->tokenpos] = 0;
char* end = 0;
char *end = 0;

#ifndef JQ_OMIT_INTS
char *q = p->tokenbuf;
int is_signed = 0;
while (isspace(*q))
q++;
if (*q == '-') {
is_signed = 1;
q++;
}
errno = 0;
if (is_signed) {
#ifdef HAVE_STRTOIMAX
int64_t i64 = strtoimax(p->tokenbuf, &end, 10);
#else
int64_t i64 = strtoll(p->tokenbuf, &end, 10);
#endif
if (end != 0 && *end == 0 && i64 < 0 && errno == 0) {
TRY(value(p, jv_int64(i64)));
goto out;
}
} else {
#ifdef HAVE_STRTOUMAX
uint64_t u64 = strtoumax(p->tokenbuf, &end, 10);
#else
uint64_t u64 = strtoull(p->tokenbuf, &end, 10);
#endif
if (end != 0 && *end == 0 && errno == 0) {
TRY(value(p, jv_uint64(u64)));
goto out;
}
}
#endif
double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end);
if (end == 0 || *end != 0)
return "Invalid numeric literal";
/*
* So there was a decimal or exponent; this might still be an
* integer, but we'll go with double.
*/
TRY(value(p, jv_number(d)));
}
out:
p->tokenpos = 0;
return 0;
}
Expand Down
18 changes: 17 additions & 1 deletion src/jv_print.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <assert.h>
#include <stdio.h>
#include <inttypes.h>
#include <float.h>
#include <stdio.h>
#include <string.h>

#ifdef WIN32
Expand Down Expand Up @@ -182,6 +183,21 @@ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FI
put_str("true", F, S, flags & JV_PRINT_ISATTY);
break;
case JV_KIND_NUMBER: {
#ifndef JQ_OMIT_INTS
if (jv_is_int64(x)) {
int64_t i64 = jv_int64_value(x);
char buf[21];
(void) snprintf(buf, sizeof(buf), "%" PRId64, i64);
put_str(buf, F, S, flags & JV_PRINT_ISATTY);
break;
} else if (jv_is_uint64(x)) {
uint64_t u64 = jv_uint64_value(x);
char buf[21];
(void) snprintf(buf, sizeof(buf), "%" PRIu64, u64);
put_str(buf, F, S, flags & JV_PRINT_ISATTY);
break;
}
#endif
double d = jv_number_value(x);
if (d != d) {
// JSON doesn't have NaN, so we'll render it as "null"
Expand Down