From e0459795975fd3b8d8e42c1b53a3f4b517c391a5 Mon Sep 17 00:00:00 2001 From: Eloy Coto Date: Tue, 14 Nov 2023 20:47:59 +0100 Subject: [PATCH] refactor: move bsearch function to C-code This commit fixes issue #527 and move the bsearch function to a native C-code. The performance is a bit better: Testing script: ```bash clear if [[ `uname` == Darwin ]]; then MAX_MEMORY_UNITS=KB else MAX_MEMORY_UNITS=MB fi export TIMEFMT='%J %U user %S system %P cpu %*E total'$'\n'\ 'avg shared (code): %X KB'$'\n'\ 'avg unshared (data/stack): %D KB'$'\n'\ 'total (sum): %K KB'$'\n'\ 'max memory: %M '$MAX_MEMORY_UNITS''$'\n'\ 'page faults from disk: %F'$'\n'\ 'other page faults: %R' echo "JQ code bsearch" time /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' echo "C code bsearch" time ./jq -n '[range(30000000)] | bsearch(3000)' ```` Results: ``` JQ code bsearch 3000 /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' 8.63s user 0.77s system 98% cpu 9.542 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 823 MB page faults from disk: 1 other page faults: 432828 C code bsearch 3000 ./jq -n '[range(30000000)] | bsearch(3000)' 8.44s user 0.74s system 99% cpu 9.249 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 824 MB page faults from disk: 0 other page faults: 432766 ``` The results may be better if we can use jvp_array_read, and there is no need to copy/free the input array in each iteration. I guess that is like that for API pourposes when the libjq is in use with multiple threads in place. Signed-off-by: Eloy Coto --- src/builtin.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++- src/builtin.jq | 31 -------------------------- 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index cf4792c446..a38fc5a5dc 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -46,7 +46,6 @@ void *alloca (size_t); #include "jv_private.h" #include "util.h" - #define BINOP(name) \ static jv f_ ## name(jq_state *jq, jv input, jv a, jv b) { \ jv_free(input); \ @@ -807,6 +806,63 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) { } } +/* Assuming the input array is sorted, bsearch/1 returns */ +/* the index of the target if the target is in the input array; and otherwise */ +/* (-1 - ix), where ix is the insertion point that would leave the array sorted. */ +/* If the input is not sorted, bsearch will terminate but with irrelevant results. */ +static jv f_bsearch(jq_state *jq, jv input, jv target) { + assert(jv_get_kind(input) == JV_KIND_ARRAY); + assert(jv_get_kind(target) == JV_KIND_NUMBER); + int len = jv_array_length(jv_copy(input)); + if (len == 0) { + jv_free(input); + jv_free(target); + return jv_number(-1); + } else if (len == 1) { + int result = jv_cmp(target, jv_array_get(jv_copy(input), 0)); + jv_free(input); + if (result == 0 ) { + return jv_number(0); + } else if (result > 0) { + return jv_number(-2); + } else { + return jv_number(-1); + } + } + + int start = 0; + int end = len - 1; + jv answer = jv_null(); + while (start .[1] ; - if .[2] != null then (.[1] = -1) # i.e. break - else - ( ( (.[1] + .[0]) / 2 ) | floor ) as $mid - | $in[$mid] as $monkey - | if $monkey == $target then (.[2] = $mid) # success - elif .[0] == .[1] then (.[1] = -1) # failure - elif $monkey < $target then (.[0] = ($mid + 1)) - else (.[1] = ($mid - 1)) - end - end ) - | if .[2] == null then # compute the insertion point - if $in[ .[0] ] < $target then (-2 -.[0]) - else (-1 -.[0]) - end - else .[2] - end - end; - # Apply f to composite entities recursively, and to atoms def walk(f): def w: