Skip to content

Commit

Permalink
Add random functions
Browse files Browse the repository at this point in the history
- Functions include: `rand`, `randint`, `shuffle`, `rand_select`, and
  `rand_select_rep`.
- Currently this is done with a seeded Mersenne Twister, but the room
  exists to add new PRNGs.
- The build and test of TinyMT was added nonrecursively to the existing
  automake file, due to a) not needing the entire library, and b) the
  library not supporting a standard `check` target.

fixes #677 and fixes #1038
  • Loading branch information
erikbrinkman committed Dec 1, 2017
1 parent 0c9eace commit d157bc5
Show file tree
Hide file tree
Showing 14 changed files with 494 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
[submodule "modules/oniguruma"]
path = modules/oniguruma
url = https://github.com/kkos/oniguruma.git

[submodule "modules/TinyMT"]
path = modules/TinyMT
url = https://github.com/MersenneTwister-Lab/TinyMT.git
ignore = untracked
15 changes: 12 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ LIBJQ_INCS = src/builtin.h src/bytecode.h src/compile.h \
src/exec_stack.h src/jq_parser.h src/jv_alloc.h src/jv_dtoa.h \
src/jv_unicode.h src/jv_utf8_tables.h src/lexer.l src/libm.h \
src/linker.h src/locfile.h src/opcode_list.h src/parser.y \
src/util.h
src/util.h src/rand.h

LIBJQ_SRC = src/builtin.c src/bytecode.c src/compile.c src/execute.c \
src/jq_test.c src/jv.c src/jv_alloc.c src/jv_aux.c \
src/jv_dtoa.c src/jv_file.c src/jv_parse.c src/jv_print.c \
src/jv_unicode.c src/linker.c src/locfile.c src/util.c \
${LIBJQ_INCS}
src/rand.c modules/TinyMT/tinymt/tinymt64.c ${LIBJQ_INCS}

### C build options

Expand Down Expand Up @@ -117,9 +117,18 @@ if ENABLE_ALL_STATIC
jq_LDFLAGS += -all-static
endif

### TinyMT

AM_CFLAGS += -I$(srcdir)/modules/TinyMT/tinymt

check_PROGRAMS = modules/TinyMT/tinymt/check64
modules_TinyMT_tinymt_check64_SOURCES = modules/TinyMT/tinymt/check64.c
modules_TinyMT_tinymt_check64_LDADD = modules/TinyMT/tinymt/tinymt64.o


### Tests (make check)

TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test tests/base64test
TESTS = tests/optionaltest tests/mantest tests/jqtest tests/onigtest tests/shtest tests/utf8test tests/base64test tests/mttest
TESTS_ENVIRONMENT = NO_VALGRIND=$(NO_VALGRIND)


Expand Down
26 changes: 26 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,32 @@ AC_CHECK_MATH_FUNC(y0, [.5])
AC_CHECK_MATH_FUNC(y1, [.5])
AC_CHECK_MATH_FUNC(yn, [1,.5])

dnl Check random sources
AC_MSG_CHECKING(for the Linux getrandom() syscall)
AC_LINK_IFELSE(
[
AC_LANG_SOURCE([[
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/random.h>
int main() {
char buffer;
const size_t buflen = sizeof(buffer);
(void)syscall(SYS_getrandom, &buffer, buflen);
return 0;
}
]])
],[have_getrandom_syscall=yes],[have_getrandom_syscall=no])
AC_MSG_RESULT($have_getrandom_syscall)

if test "$have_getrandom_syscall" = yes; then
AC_DEFINE(HAVE_GETRANDOM_SYSCALL, 1,
[Define to 1 if the Linux getrandom() syscall is available])
else
AC_CHECK_FILES("/dev/urandom", [AC_MSG_NOTICE(could not link getrandom falling back to /dev/urandom)])
fi

dnl Thread local storage
have___thread=no
AC_MSG_CHECKING(for thread-local storage)
Expand Down
40 changes: 39 additions & 1 deletion docs/content/2.download/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ body:
jq is licensed under the MIT license. For all of the gory
details, read the file `COPYING` in the source distribution.
jq uses Mersenne Twister as a fast PRNG, its license is located in the
[copyright](#copyright) section.
### Linux
Expand Down Expand Up @@ -168,3 +169,40 @@ body:
the YAML docs, and you'll still need the Ruby dependencies to
build the manpage.
### Copyright
jq uses Mersenne Twister for fast pseudorandom number generation,
specifically
[TinyMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/TINYMT/). The
following is the verbatim license:
Copyright (c) 2011, 2013 Mutsuo Saito, Makoto Matsumoto,
Hiroshima University and The University of Tokyo.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of the Hiroshima University nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
82 changes: 82 additions & 0 deletions docs/content/3.manual/manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,13 @@ sections:
Remaining arguments are positional JSON text arguments. These
are available to the jq program as `$ARGS.positional[]`.
* `--rand-seed seed-int`:
Set the seed for randomness generation to anything interpretable as an
unsigned long. If unspecified, they key will be taken from the best
source of randomness (currently /dev/urandom if it's available, and
otherwise the current time `time(NULL)`).
* `--run-tests [filename]`:
Runs the tests in the given file or standard input. This must
Expand Down Expand Up @@ -1329,6 +1336,81 @@ sections:
input: '[{"foo":1, "bar":14}, {"foo":2, "bar":3}]'
output: ['{"foo":2, "bar":3}']

- title: "`rand`"
body: |
Generate a random number greater than or equal to zero and less than
one.
*NOTE: This function should not be used for security purposes.*
Also see the command line argument `--rand-seed`.
examples:
- program: 'rand | . >= 0 and . < 1'
input: 'null'
output: ['true']

- title: "`randint`, `randint(b)`, `randint(a; b)`"
body: |
Generate a random integer greater than or equal to `a` and less
than `b`. `a` is zero when unspecified. The first form reads `b` from
its input. The range must have at least one number in it, e.g. `b`
must be greater than zero if a is unspecified. Inputs will be cast to
an integer, so `randint(5)` and `randint(5.6)` are equivalent.
*NOTE: This function should not be used for security purposes.*
Also see the command line argument `--rand-seed`.
examples:
- program: 'randint | . >= 0 and . < 5'
input: '5'
output: ['true']
- program: 'randint(-5; 5 + 1) | . >= -5 and . <= 5'
input: 'null'
output: ['true']
- program: '.[randint(length)] | . == 1 or . == 5 or . == 17'
input: '[1, 5, 17]'
output: ['true']

- title: "`shuffle`"
body: |
Randomly permute an array.
*NOTE: This function should not be used for security purposes.*
Also see the command line argument `--rand-seed`.
examples:
- program: 'shuffle | sort == [1, 5, 17]'
input: '[1, 5, 17]'
output: ['true']

- title: "`rand_select(n)`, `rand_select(stream; n)`, `rand_select_rep(n)`"
body: |
Randomly select `n` elements from an array or stream. The stream
version is significantly slower for large inputs. `rand_select_rep`
does so with repetition.
*NOTE: This function should not be used for security purposes.*
Also see the command line argument `--rand-seed`.
examples:
- program: 'rand_select(2) | map(. == 1 or . == 5 or . == 17) | all'
input: '[1, 5, 17]'
output: ['true']
- program: 'rand_select(1, 5, 17; 2) | map(. == 1 or . == 5 or . == 17) | all'
input: 'null'
output: ['true']
- program: 'rand_select_rep(2) | map(. == 1 or . == 5 or . == 17) | all'
input: '[1, 5, 17]'
output: ['true']

- title: "`unique`, `unique_by(path_exp)`"
body: |
Expand Down
1 change: 1 addition & 0 deletions modules/TinyMT
Submodule TinyMT added at 53206a
65 changes: 65 additions & 0 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ void *alloca (size_t);
#include <time.h>
#include "builtin.h"
#include "compile.h"
#include "rand.h"
#include "jq_parser.h"
#include "bytecode.h"
#include "linker.h"
Expand Down Expand Up @@ -1058,6 +1059,66 @@ static jv f_error(jq_state *jq, jv input, jv msg) {
return jv_invalid_with_msg(msg);
}

// Random functions
static jv f_rand(jq_state *jq, jv input) {
// Random number in [0, 1)
jv_free(input);
return jv_number(jq_rand_double());
}

static jv f_randint(jq_state *jq, jv input) {
// Random int in [0, input)
double max_val = jv_number_value(jv_copy(input));
if (max_val < 1 || (double)((uint64_t)-1) < max_val) {
return type_error(input, "number invalid, less than 1 or too large");
} else {
jv_free(input);
return jv_number(jq_rand_int((uint64_t)max_val));
}
}

static jv f_shuffle(jq_state *jq, jv input) {
// Shuffle array
if (jv_get_kind(input) != JV_KIND_ARRAY) {
return type_error(input, "cannot be shuffled, as it is not an array");
} else {
int length = jv_array_length(jv_copy(input));
for (int i = 0; i < length; ++i) {
int swap = i + (int)jq_rand_int((unsigned long)(length - i));
jv to_swap = jv_array_get(jv_copy(input), i);
input = jv_array_set(input, i, jv_array_get(jv_copy(input), swap));
input = jv_array_set(input, swap, to_swap);
}
return input;
}
}

static jv f_rand_select(jq_state *jq, jv input, jv j_num) {
// Select j_num without replacement from array
// More efficient than `shuffle | slice`
int num = jv_number_value(jv_copy(j_num));
if (jv_get_kind(input) != JV_KIND_ARRAY) {
return type_error2(input, j_num, "can't select from a non array");
} else if (num < 0 || num > jv_array_length(jv_copy(input))) {
return type_error2(input, j_num, "can't select less than 0 or more than than the input");
} else if (num == 0) {
jv_free(input);
jv_free(j_num);
return jv_array();
} else {
jv result = f_shuffle(jq, jv_array_slice(jv_copy(input), 0, num));
for (int i = num; i < jv_array_length(jv_copy(input)); ++i) {
if (jq_rand_double() < (double)num / (double)(i + 1)) {
jv selected = jv_array_get(jv_copy(input), i);
result = jv_array_set(result, (int)jq_rand_int((unsigned long)num), selected);
}
}
jv_free(j_num);
jv_free(input);
return result;
}
}

// FIXME Should autoconf check for this!
#ifndef WIN32
extern char **environ;
Expand Down Expand Up @@ -1631,6 +1692,10 @@ static const struct cfunction function_list[] = {
{(cfunction_ptr)f_min_by_impl, "_min_by_impl", 2},
{(cfunction_ptr)f_max_by_impl, "_max_by_impl", 2},
{(cfunction_ptr)f_error, "error", 2},
{(cfunction_ptr)f_rand, "rand", 1},
{(cfunction_ptr)f_randint, "randint", 1},
{(cfunction_ptr)f_shuffle, "shuffle", 1},
{(cfunction_ptr)f_rand_select, "rand_select", 2},
{(cfunction_ptr)f_format, "format", 2},
{(cfunction_ptr)f_env, "env", 1},
{(cfunction_ptr)f_halt, "halt", 1},
Expand Down
23 changes: 23 additions & 0 deletions src/builtin.jq
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,26 @@ def JOIN($idx; stream; idx_expr; join_expr):
stream | [., $idx[idx_expr]] | join_expr;
def IN(s): reduce (first(select(. == s)) | true) as $v (false; if . or $v then true else false end);
def IN(src; s): reduce (src|IN(s)) as $v (false; if . or $v then true else false end);

# Random aliases
def randint(upper): upper | randint;
def randint(lower; upper): (lower | floor) as $a
| (upper | floor) as $b
| if $b <= $a
then error("randint upper limit must be greater than the lower \($b) <= \($a)")
else $b - $a | randint + $a
end;
def rand_select_rep(n): if n < 0
then error("\(type) (\(.)) and \(n | type) (\(n)) can't select less than 0 from the input")
else . as $array | [range(n) | $array[$array | length | randint]]
end;
def rand_select(stream; $n):
if $n < 0 then error("rand_select can't select less than 0 elements")
elif $n == 0 then []
else reduce stream as $x ([0, []]; .[0] += 1
| if .[0] <= $n then .[1] += [$x]
elif rand < $n / .[0] then .[1][randint($n)] = $x
else .
end)
| .[1] | shuffle
end;
Loading

0 comments on commit d157bc5

Please sign in to comment.