Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow, and strip, comments (#, // or /*) #2548

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ endif

### Tests (make check)

TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test
TESTS = tests/mantest tests/jqtest tests/shtest tests/utf8test tests/base64test tests/commenttest
if !WIN32
TESTS += tests/optionaltest
endif
Expand Down Expand Up @@ -233,6 +233,7 @@ EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \
tests/setup tests/torture/input0.json \
tests/optional.test tests/man.test tests/manonig.test \
tests/jq.test tests/onig.test tests/base64.test \
tests/commenttest \
tests/utf8-truncate.jq tests/jq-f-test.sh \
tests/no-main-program.jq tests/yes-main-program.jq

Expand Down
1 change: 1 addition & 0 deletions src/jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ enum {
JV_PARSE_SEQ = 1,
JV_PARSE_STREAMING = 2,
JV_PARSE_STREAM_ERRORS = 4,
JV_PARSE_STRIP_COMMENTS = 8
};

jv jv_parse(const char* string);
Expand Down
61 changes: 56 additions & 5 deletions src/jv_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ enum last_seen {
};

struct jv_parser {
pfunc (*scan)(struct jv_parser* p, char ch, jv* out);
const char* curr_buf;
int curr_buf_length;
int curr_buf_pos;
Expand Down Expand Up @@ -65,9 +66,14 @@ struct jv_parser {
unsigned int last_ch_was_ws:1;
};

static pfunc scan_json(struct jv_parser* p, char ch, jv* out);
static pfunc scan_line_comment(struct jv_parser* p, char ch, jv* out);
static pfunc scan_slash_comment(struct jv_parser* p, char ch, jv* out);
static pfunc scan_c_comment(struct jv_parser* p, char ch, jv* out);

static void parser_init(struct jv_parser* p, int flags) {
p->flags = flags;
p->scan = scan_json;
if ((p->flags & JV_PARSE_STREAMING)) {
p->path = jv_array();
} else {
Expand Down Expand Up @@ -639,12 +645,42 @@ static int stream_is_top_num(struct jv_parser* p) {
#define is_top_num(p) \
(((p)->flags & JV_PARSE_STREAMING) ? stream_is_top_num((p)) : parse_is_top_num((p)))

static pfunc scan(struct jv_parser* p, char ch, jv* out) {
p->column++;
if (ch == '\n') {
p->line++;
p->column = 0;

static pfunc scan_line_comment(struct jv_parser* p, char ch, jv* out) {
if(ch == '\n')
p->scan = scan_json;
return 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have some commentary in the commit message about why this change?

Copy link
Member

@wader wader Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to understand the 0 -> OK change. OK is static const presult OK = "output produced"; so i guess something else must have changed before the change? only reference i can find is in #2548 (comment). Will digg more later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I see that, but I'm not sure why that's needed, and anyways, there's places where we look for OK that need to be changed correspondingly.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iirc the reason for this is fairly straightforward, which is that in the below block of jv_parse.c:

jv jv_parser_next(struct jv_parser* p) {
...
  while (!msg && p->curr_buf_pos < p->curr_buf_length) {
    ...
    msg = scan(p, ch, &value);
  }

the desired behavior, when a comment is encountered, is to simply ignore it and keep on going. Therefore the while loop must keep on going, which requires that !msg is true. Any other result will break the while loop, which would be incorrect.

That said, given that the return type of scan() is a pointer, it might be better for the scan_xxx() functions to all return NULL where they currently are returning of 0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@liquidaty thanks for the explanation. think i tried to return OK instead when played around a bit with the code and pretty sure it also worked for some reason, was confusing

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be useful to define IGNORE_AND_CONTINUE as a presult equal to NULL (or alternatively, not null, with other corresponding changes e.g. !msg becomes msg == IGNORE_AND_CONTINUE)

}

static pfunc scan_c_comment_close(struct jv_parser* p, char ch, jv* out) {
if(ch == '/') {
p->scan = scan_json;
} else {
p->scan = scan_c_comment;
}
return 0;
}

static pfunc scan_c_comment(struct jv_parser* p, char ch, jv* out) {
if(ch == '*') {
p->scan = scan_c_comment_close;
}
return 0;
}

static pfunc scan_slash_comment(struct jv_parser* p, char ch, jv* out) {
if(ch == '/') {
p->scan = scan_line_comment;
return 0;
}
if(ch == '*') {
p->scan = scan_c_comment;
return 0;
}
return "Incomplete comment token; slash must be followed by another slash or asterisk";
}

static pfunc scan_json(struct jv_parser* p, char ch, jv* out) {
if ((p->flags & JV_PARSE_SEQ)
&& ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) {
if (check_truncation(p)) {
Expand All @@ -662,9 +698,15 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
*out = jv_invalid();
return OK;
}

presult answer = 0;
p->last_ch_was_ws = 0;
if (p->st == JV_PARSER_NORMAL) {
if(ch == '/' && (p->flags & JV_PARSE_STRIP_COMMENTS)) {
p->scan = scan_slash_comment;
return answer;
}

chclass cls = classify(ch);
if (cls == WHITESPACE)
p->last_ch_was_ws = 1;
Expand Down Expand Up @@ -705,6 +747,15 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
return answer;
}

static pfunc scan(struct jv_parser* p, char ch, jv* out) {
p->column++;
if (ch == '\n') {
p->line++;
p->column = 0;
}
return p->scan(p, ch, out);
}

struct jv_parser* jv_parser_new(int flags) {
struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser));
parser_init(p, flags);
Expand Down
5 changes: 5 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ static void usage(int code, int keep_it_short) {
" --stream-errors implies --stream and report parse error as\n"
" an array;\n"
" --seq parse input/output as application/json-seq;\n"
" --strip-comments accept and strip comments from input;\n"
" -f, --from-file file load filter from the file;\n"
" -L directory search modules from the directory;\n"
" --arg name value set $name to the string value;\n"
Expand Down Expand Up @@ -471,6 +472,10 @@ int main(int argc, char* argv[]) {
parser_flags |= JV_PARSE_STREAMING;
continue;
}
if (isoption(argv[i], 0, "strip-comments", &short_opts)) {
parser_flags |= JV_PARSE_STRIP_COMMENTS;
continue;
}
if (isoption(argv[i], 0, "stream-errors", &short_opts)) {
parser_flags |= JV_PARSE_STREAMING | JV_PARSE_STREAM_ERRORS;
continue;
Expand Down
15 changes: 15 additions & 0 deletions tests/commenttest
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh

. "${0%/*}/setup" "$@"

if [ "$(echo '{"hi":"there"/*comment*/}' | $VALGRIND $Q $JQ --strip-comments '.hi')" != '"there"' ]; then
echo "C-style comment test failed"
exit 1
fi

if [ "$(printf '{"hi"://comment\n"there"}' | $VALGRIND $Q $JQ --strip-comments '.hi')" != '"there"' ]; then
echo "C++-style comment test failed"
exit 1
fi

exit 0