From c6cec91c8c3abac2149c121f39c25240b3746c5f Mon Sep 17 00:00:00 2001 From: pkoppstein Date: Sun, 9 Jul 2023 03:55:42 -0400 Subject: [PATCH 1/2] much faster and simpler version of transpose, with stream-oriented min and max Define stream-oriented min/1 max/1 so that a much faster and much shorter implementation of `transpose` can be provided, while preserving symmetry between min and max. (Nothing in this change would preclude adding stream-oriented versions of min_by and max_by in the future.) --- docs/content/manual/manual.yml | 14 +++++++++++--- src/builtin.jq | 14 +++++--------- tests/jq.test | 4 ++++ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index 183e4a5d45..02f7076770 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -1426,10 +1426,13 @@ sections: input: '[{"foo":1, "bar":10}, {"foo":3, "bar":100}, {"foo":1, "bar":1}]' output: ['[[{"foo":1, "bar":10}, {"foo":1, "bar":1}], [{"foo":3, "bar":100}]]'] - - title: "`min`, `max`, `min_by(path_exp)`, `max_by(path_exp)`" + - title: "`min`, `max`, `min(stream)`, `max(stream)`, `min_by(path_exp)`, `max_by(path_exp)`" body: | - Find the minimum or maximum element of the input array. + `min` finds the minimum element of the input array, and + `min(stream)` finds the minimum item in the stream. + + `max` and `max(stream)` similarly find the maximum element. The `min_by(path_exp)` and `max_by(path_exp)` functions allow you to specify a particular field or property to examine, e.g. @@ -1439,10 +1442,15 @@ sections: - program: 'min' input: '[5,4,2,7]' output: ['2'] + + - program: 'min(1,2,3,0.1)' + input: null + output: ['0.1'] + - program: 'max_by(.foo)' input: '[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' output: ['{"foo":2, "bar":3}'] - + - title: "`unique`, `unique_by(path_exp)`" body: | diff --git a/src/builtin.jq b/src/builtin.jq index 146a64a36b..4402aec15a 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -1,3 +1,4 @@ +def first(g): label $out | g | ., break $out; def halt_error: halt_error(5); def error(msg): msg|error; def map(f): [.[] | f]; @@ -6,6 +7,8 @@ def sort_by(f): _sort_by_impl(map([f])); def group_by(f): _group_by_impl(map([f])); def unique: group_by(.) | map(.[0]); def unique_by(f): group_by(f) | map(.[0]); +def min(s): reduce s as $x (first(s); if $x < . then $x else . end); +def max(s): reduce s as $x (first(s); if $x > . then $x else . end); def max_by(f): _max_by_impl(map([f])); def min_by(f): _min_by_impl(map([f])); def add: reduce .[] as $x (null; . + $x); @@ -154,7 +157,7 @@ def range($init; $upto; $by): if $by > 0 then $init|while(. < $upto; . + $by) elif $by < 0 then $init|while(. > $upto; . + $by) else empty end; -def first(g): label $out | g | ., break $out; + def isempty(g): first((g|false), true); def all(generator; condition): isempty(generator|condition and empty); def any(generator; condition): isempty(generator|condition or empty)|not; @@ -181,14 +184,7 @@ def combinations(n): | combinations; # transpose a possibly jagged matrix, quickly; # rows are padded with nulls so the result is always rectangular. -def transpose: - if . == [] then [] - else . as $in - | (map(length) | max) as $max - | length as $length - | reduce range(0; $max) as $j - ([]; . + [reduce range(0;$length) as $i ([]; . + [ $in[$i][$j] ] )] ) - end; +def transpose: [range(0; max(.[]|length)) as $i | [.[][$i]]]; def in(xs): . as $x | xs | has($x); def inside(xs): . as $x | xs | contains($x); def repeat(exp): diff --git a/tests/jq.test b/tests/jq.test index 4e693452bd..7c7f1b1e47 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1352,6 +1352,10 @@ unique [] [null,null,null,null] +min(1,2,0.1) +null +0.1 + .foo[.baz] {"foo":{"bar":4},"baz":"bar"} 4 From 2270f38be39ba0ffcaca5e3c59ff6b194e19bc14 Mon Sep 17 00:00:00 2001 From: pkoppstein Date: Sun, 9 Jul 2023 17:17:19 -0400 Subject: [PATCH 2/2] min(s) and max(s) use boxing technique to avoid problem with `input` It turns out that the boxing technique is noticeably faster than using setpath. Also revise transpose to use max/0 as that is faster than using the new max(s). --- docs/content/manual/manual.yml | 2 ++ src/builtin.jq | 19 +++++++++++++++---- tests/jq.test | 9 +++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index 02f7076770..ae02b31221 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -1434,6 +1434,8 @@ sections: `max` and `max(stream)` similarly find the maximum element. + `min(empty)` and `max(empty)` both emit nothing. + The `min_by(path_exp)` and `max_by(path_exp)` functions allow you to specify a particular field or property to examine, e.g. `min_by(.foo)` finds the object with the smallest `foo` field. diff --git a/src/builtin.jq b/src/builtin.jq index 4402aec15a..9dc732ed86 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -7,8 +7,19 @@ def sort_by(f): _sort_by_impl(map([f])); def group_by(f): _group_by_impl(map([f])); def unique: group_by(.) | map(.[0]); def unique_by(f): group_by(f) | map(.[0]); -def min(s): reduce s as $x (first(s); if $x < . then $x else . end); -def max(s): reduce s as $x (first(s); if $x > . then $x else . end); +# max(s) and min(s) use boxing technique for the sake of `input`: +def max(s): + reduce (s|[.]) as $x (null; + if . == null then $x + else if $x > . then $x end # for speed + end ) + | select(.)[0]; +def min(s): + reduce (s|[.]) as $x (null; + if . == null then $x + else if $x < . then $x end # for speed + end ) + | select(.)[0]; def max_by(f): _max_by_impl(map([f])); def min_by(f): _min_by_impl(map([f])); def add: reduce .[] as $x (null; . + $x); @@ -157,7 +168,6 @@ def range($init; $upto; $by): if $by > 0 then $init|while(. < $upto; . + $by) elif $by < 0 then $init|while(. > $upto; . + $by) else empty end; - def isempty(g): first((g|false), true); def all(generator; condition): isempty(generator|condition and empty); def any(generator; condition): isempty(generator|condition or empty)|not; @@ -184,7 +194,8 @@ def combinations(n): | combinations; # transpose a possibly jagged matrix, quickly; # rows are padded with nulls so the result is always rectangular. -def transpose: [range(0; max(.[]|length)) as $i | [.[][$i]]]; +# Using map(length) turns out to be faster than using max/1 +def transpose: [range(0; map(length)|max) as $i | [.[][$i]]]; def in(xs): . as $x | xs | has($x); def inside(xs): . as $x | xs | contains($x); def repeat(exp): diff --git a/tests/jq.test b/tests/jq.test index 7c7f1b1e47..fb03d2f383 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1352,9 +1352,14 @@ unique [] [null,null,null,null] -min(1,2,0.1) +[min(1,2,0.1), max(1,2,0.1)] null -0.1 +[0.1,2] + + +[min(empty),max(empty)] +null +[] .foo[.baz] {"foo":{"bar":4},"baz":"bar"}