diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml
index 2f4bd3c014..5f334f49c4 100644
--- a/docs/content/manual/manual.yml
+++ b/docs/content/manual/manual.yml
@@ -772,6 +772,9 @@ sections:
           `null` can be added to any value, and returns the other
           value unchanged.
 
+          A numeric byte value between 0 and 255, inclusive, can be
+          added to a binary string value.
+
         examples:
           - program: '.a + 1'
             input: '{"a": 7}'
@@ -1414,6 +1417,41 @@ sections:
             input: '[1, "1", [1]]'
             output: ['"1"', '"1"', '"[1]"']
 
+      - title: "`tobinary`"
+        body: |
+
+          The `tobinary` function is like `tostring`, but its output
+          will be a string which when output to jq's output stream
+          will be base64-encoded, and which if added with other
+          strings will produce a binary string value.
+
+          Internally the binary string may be represented efficiently,
+          and may not be encoded until it is output or until it is
+          passed to `tostring`.  Adding a byte value (integer value
+          between 0 and 255, inclusive) to a binary string is allowed,
+          and will append that byte to it.
+
+      - title: "`tobinary_bytearray`"
+        body: |
+
+          The `tobinary_bytearray` function is like `tobinary`, but
+          when output by jq it will be represented as an array of
+          small non-negative byte value integers.
+
+      - title: "`tobinary_utf8`"
+        body: |
+
+          The `tobinary_utf8` function is like `tobinary`, but when
+          output by jq it will be converted to UTF-8 with bad
+          character replacements.
+
+      - title: "`tobinary(bytes)`"
+        body: |
+
+          This function constructs a binary string value like
+          `tobinary` but consisting of the byte values output by
+          `bytes`.
+
       - title: "`type`"
         body: |
 
@@ -1421,6 +1459,13 @@ sections:
           string, which is one of null, boolean, number, string, array
           or object.
 
+      - title: "`stringtype`"
+        body: |
+
+          Strings can be UTF-8 strings or binary strings.  The
+          `stringtype` builtin outputs `"UTF-8"` or `"binary"` when
+          given a string as input.
+
         examples:
           - program: 'map(type)'
             input: '[0, false, [], {}, null, "hello"]'
@@ -2038,7 +2083,9 @@ sections:
           * `@base64d`:
 
             The inverse of `@base64`, input is decoded as specified by RFC 4648.
-            Note\: If the decoded string is not UTF-8, the results are undefined.
+            The result will be a binary string as if `tobinary_utf8`
+            was used, meaning that on output bad characters will be
+            replaced.
 
           This syntax can be combined with string interpolation in a
           useful way. You can follow a `@foo` token with a string
diff --git a/jq.1.prebuilt b/jq.1.prebuilt
index c4f99dd603..91e902cdcb 100644
--- a/jq.1.prebuilt
+++ b/jq.1.prebuilt
@@ -742,6 +742,9 @@ The operator \fB+\fR takes two filters, applies them both to the same input, and
 .P
 \fBnull\fR can be added to any value, and returns the other value unchanged\.
 .
+.P
+A numeric byte value between 0 and 255, inclusive, can be added to a binary string value\.
+.
 .IP "" 4
 .
 .nf
@@ -1483,9 +1486,27 @@ jq \'\.[] | tostring\'
 .
 .IP "" 0
 .
+.SS "tobinary"
+The \fBtobinary\fR function is like \fBtostring\fR, but its output will be a string which when output to jq\'s output stream will be base64\-encoded, and which if added with other strings will produce a binary string value\.
+.
+.P
+Internally the binary string may be represented efficiently, and may not be encoded until it is output or until it is passed to \fBtostring\fR\. Adding a byte value (integer value between 0 and 255, inclusive) to a binary string is allowed, and will append that byte to it\.
+.
+.SS "tobinary_bytearray"
+The \fBtobinary_bytearray\fR function is like \fBtobinary\fR, but when output by jq it will be represented as an array of small non\-negative byte value integers\.
+.
+.SS "tobinary_utf8"
+The \fBtobinary_utf8\fR function is like \fBtobinary\fR, but when output by jq it will be converted to UTF\-8 with bad character replacements\.
+.
+.SS "tobinary(bytes)"
+This function constructs a binary string value like \fBtobinary\fR but consisting of the byte values output by \fBbytes\fR\.
+.
 .SS "type"
 The \fBtype\fR function returns the type of its argument as a string, which is one of null, boolean, number, string, array or object\.
 .
+.SS "stringtype"
+Strings can be UTF\-8 strings or binary strings\. The \fBstringtype\fR builtin outputs \fB"UTF\-8"\fR or \fB"binary"\fR when given a string as input\.
+.
 .IP "" 4
 .
 .nf
@@ -2216,7 +2237,7 @@ The input is converted to base64 as specified by RFC 4648\.
 \fB@base64d\fR:
 .
 .IP
-The inverse of \fB@base64\fR, input is decoded as specified by RFC 4648\. Note\e: If the decoded string is not UTF\-8, the results are undefined\.
+The inverse of \fB@base64\fR, input is decoded as specified by RFC 4648\. The result will be a binary string as if \fBtobinary_utf8\fR was used, meaning that on output bad characters will be replaced\.
 .
 .P
 This syntax can be combined with string interpolation in a useful way\. You can follow a \fB@foo\fR token with a string literal\. The contents of the string literal will \fInot\fR be escaped\. However, all interpolations made inside that string literal will be escaped\. For instance,
diff --git a/src/builtin.c b/src/builtin.c
index b38d4c2f4f..37ed57df56 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -102,6 +102,14 @@ static jv f_plus(jq_state *jq, jv input, jv a, jv b) {
     return jv_array_concat(a, b);
   } else if (jv_get_kind(a) == JV_KIND_OBJECT && jv_get_kind(b) == JV_KIND_OBJECT) {
     return jv_object_merge(a, b);
+  } else if (jv_get_kind(a) == JV_KIND_STRING &&
+             jv_get_string_kind(a) != JV_STRING_KIND_UTF8 &&
+             jv_get_kind(b) == JV_KIND_NUMBER) {
+    int c = jv_number_value(b);
+    if (c < 0 || c > 255)
+      return type_error2(a, b, "cannot be added because the latter is not a valid byte value");
+    unsigned char uc = c;
+    return jv_binary_append_buf(a, &uc, 1);
   } else {
     return type_error2(a, b, "cannot be added");
   }
@@ -511,7 +519,15 @@ static jv f_length(jq_state *jq, jv input) {
 
 static jv f_tostring(jq_state *jq, jv input) {
   if (jv_get_kind(input) == JV_KIND_STRING) {
-    return input;
+    switch (jv_get_string_kind(input)) {
+    case JV_STRING_KIND_UTF8:
+      return input;
+    default:
+      jv o = jv_string_sized(jv_string_value(input),
+                             jv_string_length_bytes(jv_copy(input)));
+      jv_free(input);
+      return o;
+    }
   } else {
     return jv_dump_string(input, 0);
   }
@@ -525,24 +541,6 @@ static jv f_utf8bytelength(jq_state *jq, jv input) {
 
 #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
 
-static const unsigned char BASE64_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "+/";
-static const unsigned char BASE64_INVALID_ENTRY = 0xFF;
-static const unsigned char BASE64_DECODE_TABLE[255] = {
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  62, // +
-  0xFF, 0xFF, 0xFF,
-  63, // /
-  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
-  0xFF, 0xFF, 0xFF,
-  99, // =
-  0xFF, 0xFF, 0xFF,
-  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // A-Z
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // a-z
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
-};
-
-
 static jv escape_string(jv input, const char* escapings) {
 
   assert(jv_get_kind(input) == JV_KIND_STRING);
@@ -560,7 +558,7 @@ static jv escape_string(jv input, const char* escapings) {
   const char* i = jv_string_value(input);
   const char* end = i + jv_string_length_bytes(jv_copy(input));
   const char* cstart;
-  int c = 0;
+  uint32_t c = 0;
   while ((i = jvp_utf8_next((cstart = i), end, &c))) {
     if (c < 128 && lookup[c]) {
       ret = jv_string_append_str(ret, lookup[c]);
@@ -693,70 +691,10 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
     return line;
   } else if (!strcmp(fmt_s, "base64")) {
     jv_free(fmt);
-    input = f_tostring(jq, input);
-    jv line = jv_string("");
-    const unsigned char* data = (const unsigned char*)jv_string_value(input);
-    int len = jv_string_length_bytes(jv_copy(input));
-    for (int i=0; i<len; i+=3) {
-      uint32_t code = 0;
-      int n = len - i >= 3 ? 3 : len-i;
-      for (int j=0; j<3; j++) {
-        code <<= 8;
-        code |= j < n ? (unsigned)data[i+j] : 0;
-      }
-      char buf[4];
-      for (int j=0; j<4; j++) {
-        buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
-      }
-      if (n < 3) buf[3] = '=';
-      if (n < 2) buf[2] = '=';
-      line = jv_string_append_buf(line, buf, sizeof(buf));
-    }
-    jv_free(input);
-    return line;
+    return jv_binary_to_base64(f_tostring(jq, input));
   } else if (!strcmp(fmt_s, "base64d")) {
     jv_free(fmt);
-    input = f_tostring(jq, input);
-    const unsigned char* data = (const unsigned char*)jv_string_value(input);
-    int len = jv_string_length_bytes(jv_copy(input));
-    size_t decoded_len = (3 * len) / 4; // 3 usable bytes for every 4 bytes of input
-    char *result = jv_mem_calloc(decoded_len, sizeof(char));
-    memset(result, 0, decoded_len * sizeof(char));
-    uint32_t ri = 0;
-    int input_bytes_read=0;
-    uint32_t code = 0;
-    for (int i=0; i<len && data[i] != '='; i++) {
-      if (BASE64_DECODE_TABLE[data[i]] == BASE64_INVALID_ENTRY) {
-        free(result);
-        return type_error(input, "is not valid base64 data");
-      }
-
-      code <<= 6;
-      code |= BASE64_DECODE_TABLE[data[i]];
-      input_bytes_read++;
-
-      if (input_bytes_read == 4) {
-        result[ri++] = (code >> 16) & 0xFF;
-        result[ri++] = (code >> 8) & 0xFF;
-        result[ri++] = code & 0xFF;
-        input_bytes_read = 0;
-        code = 0;
-      }
-    }
-    if (input_bytes_read == 3) {
-      result[ri++] = (code >> 10) & 0xFF;
-      result[ri++] = (code >> 2) & 0xFF;
-    } else if (input_bytes_read == 2) {
-      result[ri++] = (code >> 4) & 0xFF;
-    } else if (input_bytes_read == 1) {
-      free(result);
-      return type_error(input, "trailing base64 byte found");
-    }
-
-    jv line = jv_string_sized(result, ri);
-    jv_free(input);
-    free(result);
-    return line;
+    return jv_binary_from_base64(f_tostring(jq, input));
   } else {
     jv_free(input);
     return jv_invalid_with_msg(jv_string_concat(fmt, jv_string(" is not a valid format")));
@@ -1603,13 +1541,24 @@ static jv f_strftime(jq_state *jq, jv a, jv b) {
 
   const char *fmt = jv_string_value(b);
   size_t alloced = strlen(fmt) + 100;
-  char *buf = alloca(alloced);
+  char *buf;
+  if (alloced > 2048)
+    buf = jv_mem_alloc(alloced);
+  else
+    buf = alloca(alloced);
   size_t n = strftime(buf, alloced, fmt, &tm);
   jv_free(b);
   /* POSIX doesn't provide errno values for strftime() failures; weird */
-  if (n == 0 || n > alloced)
+  if (n == 0 || n > alloced) {
+    if (alloced > 2048)
+      jv_mem_free(buf);
     return jv_invalid_with_msg(jv_string("strftime/1: unknown system failure"));
-  return jv_string(buf);
+  }
+  if (alloced < 2048)
+    return jv_string(buf);
+  b = jv_string(buf);
+  jv_mem_free(buf);
+  return b;
 }
 #else
 static jv f_strftime(jq_state *jq, jv a, jv b) {
@@ -1678,6 +1627,54 @@ static jv f_current_line(jq_state *jq, jv a) {
   return jq_util_input_get_current_line(jq);
 }
 
+static jv f_tobinary(jq_state *jq, jv a) {
+  switch (jv_get_kind(a)) {
+  case JV_KIND_STRING:
+    a.subkind = JV_STRING_KIND_BINARY;
+    return a;
+  case JV_KIND_ARRAY:
+    int len = jv_array_length(jv_copy(a));
+    unsigned char *b = jv_mem_alloc(len);
+    jv_array_foreach(a, i, x) {
+      if (jv_get_kind(x) != JV_KIND_NUMBER) {
+        char errbuf[15];
+
+        jv_mem_free(b);
+        return jv_invalid_with_msg(jv_string_fmt("Not a byte value at array index %d: %s", i,
+                                                 jv_dump_string_trunc(jv_copy(x), errbuf, sizeof(errbuf))));
+      }
+      /* XXX No validation that `x' is an integer... */
+      b[i] = jv_number_value(x);
+    }
+    jv_free(a);
+    a = jv_binary_sized(b, len);
+    jv_mem_free(b);
+    return a;
+  default:
+    return ret_error(a, jv_string("Only strings and arrays of byte values can be converted to binary"));
+  }
+}
+
+static jv f_tobinary_bytearray(jq_state *jq, jv a) {
+  a = f_tobinary(jq, a);
+  a.subkind = JV_STRING_KIND_BINARY_BYTEARRAY;
+  return a;
+}
+
+static jv f_tobinary_utf8(jq_state *jq, jv a) {
+  a = f_tobinary(jq, a);
+  a.subkind = JV_STRING_KIND_BINARY_UTF8;
+  return a;
+}
+
+static jv f_stringtype(jq_state *jq, jv a) {
+  if (jv_get_kind(a) != JV_KIND_STRING)
+    return type_error(a, "is not a string");
+  jv b = jv_string(jv_string_kind_name(jv_get_string_kind(a)));
+  jv_free(a);
+  return b;
+}
+
 #define LIBM_DD(name) \
   {(cfunction_ptr)f_ ## name,  #name, 1},
 #define LIBM_DD_NO(name)
@@ -1769,6 +1766,10 @@ static const struct cfunction function_list[] = {
   {(cfunction_ptr)f_now, "now", 1},
   {(cfunction_ptr)f_current_filename, "input_filename", 1},
   {(cfunction_ptr)f_current_line, "input_line_number", 1},
+  {(cfunction_ptr)f_tobinary, "tobinary", 1},
+  {(cfunction_ptr)f_tobinary_bytearray, "tobinary_bytearray", 1},
+  {(cfunction_ptr)f_tobinary_utf8, "tobinary_utf8", 1},
+  {(cfunction_ptr)f_stringtype, "stringtype", 1},
 };
 #undef LIBM_DDDD_NO
 #undef LIBM_DDD_NO
diff --git a/src/builtin.jq b/src/builtin.jq
index a13d7845bf..b6341efb8a 100644
--- a/src/builtin.jq
+++ b/src/builtin.jq
@@ -281,3 +281,5 @@ def JOIN($idx; stream; idx_expr; join_expr):
   stream | [., $idx[idx_expr]] | join_expr;
 def IN(s): any(s == .; .);
 def IN(src; s): any(src == s; .);
+
+def tobinary(bytes): reduce bytes as $byte (""|tobinary; . + $byte);
diff --git a/src/execute.c b/src/execute.c
index adf3773799..557841123a 100644
--- a/src/execute.c
+++ b/src/execute.c
@@ -11,6 +11,7 @@
 
 #include "jv_alloc.h"
 #include "jq_parser.h"
+#include "jv_unicode.h"
 #include "locfile.h"
 #include "jv.h"
 #include "jq.h"
@@ -673,20 +674,47 @@ jv jq_next(jq_state *jq) {
     case INDEX_OPT: {
       jv t = stack_pop(jq);
       jv k = stack_pop(jq);
-      // detect invalid path expression like path(reverse | .a)
-      if (!path_intact(jq, jv_copy(t))) {
-        char keybuf[15];
-        char objbuf[30];
-        jv msg = jv_string_fmt(
-            "Invalid path expression near attempt to access element %s of %s",
-            jv_dump_string_trunc(k, keybuf, sizeof(keybuf)),
-            jv_dump_string_trunc(t, objbuf, sizeof(objbuf)));
-        set_error(jq, jv_invalid_with_msg(msg));
-        goto do_backtrack;
+      jv v;
+      if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_NUMBER) {
+        switch (jv_get_string_kind(t)) {
+        case JV_STRING_KIND_UTF8:
+          v = jv_string_append_codepoint(jv_string(""), jv_string_index(t, jv_number_value(k)));
+          break;
+        case JV_STRING_KIND_BINARY:
+        case JV_STRING_KIND_BINARY_BYTEARRAY:
+        case JV_STRING_KIND_BINARY_UTF8:
+          const char *s = jv_string_value(t);
+          int len = jv_string_length_bytes(jv_copy(t));
+          int idx = jv_number_value(k);
+
+          if (idx < 0)
+            idx += idx;
+          if (idx < 0 || idx >= len)
+            goto do_backtrack;
+          v = jv_number(((unsigned char *)s)[idx]);
+          jv_free(t);
+          break;
+        default:
+          set_error(jq, jv_invalid_with_msg(jv_string("Internal error: unknown string sub-type")));
+          goto do_backtrack;
+        }
+      } else {
+        // detect invalid path expression like path(reverse | .a)
+        if (!path_intact(jq, jv_copy(t))) {
+          char keybuf[15];
+          char objbuf[30];
+          jv msg = jv_string_fmt(
+                                 "Invalid path expression near attempt to access element %s of %s",
+                                 jv_dump_string_trunc(k, keybuf, sizeof(keybuf)),
+                                 jv_dump_string_trunc(t, objbuf, sizeof(objbuf)));
+          set_error(jq, jv_invalid_with_msg(msg));
+          goto do_backtrack;
+        }
+        v = jv_get(t, jv_copy(k));
+        if (jv_is_valid(v))
+          path_append(jq, k, jv_copy(v));
       }
-      jv v = jv_get(t, jv_copy(k));
       if (jv_is_valid(v)) {
-        path_append(jq, k, jv_copy(v));
         stack_push(jq, v);
       } else {
         jv_free(k);
@@ -721,7 +749,8 @@ jv jq_next(jq_state *jq) {
     case EACH_OPT: {
       jv container = stack_pop(jq);
       // detect invalid path expression like path(reverse | .[])
-      if (!path_intact(jq, jv_copy(container))) {
+      if (jv_get_kind(container) != JV_KIND_STRING &&
+          !path_intact(jq, jv_copy(container))) {
         char errbuf[30];
         jv msg = jv_string_fmt(
             "Invalid path expression near attempt to iterate through %s",
@@ -758,6 +787,44 @@ jv jq_next(jq_state *jq) {
           key = jv_object_iter_key(container, idx);
           value = jv_object_iter_value(container, idx);
         }
+      } else if (jv_get_kind(container) == JV_KIND_STRING) {
+        switch (jv_get_string_kind(container)) {
+        case JV_STRING_KIND_UTF8: {
+          const char *s = jv_string_value(container);
+          const char *next = s;
+          int len = jv_string_length_bytes(jv_copy(container));
+          const char *end = s + len;
+          int c;
+          if (opcode == EACH || opcode == EACH_OPT) {
+            idx = 0;
+          } else {
+            next = s + idx;
+          }
+          keep_going = idx < len;
+          next = jvp_utf8_next(next, end, &c);
+          idx = next - s;
+          value = jv_string_append_codepoint(jv_string(""), c);
+          is_last = jvp_utf8_next(next, end, &c) == 0;
+          break;
+        }
+        case JV_STRING_KIND_BINARY:
+        case JV_STRING_KIND_BINARY_BYTEARRAY:
+        case JV_STRING_KIND_BINARY_UTF8:
+          const unsigned char *s = (const unsigned char *)jv_string_value(container);
+          int len = jv_string_length_bytes(jv_copy(container));
+          if (opcode == EACH || opcode == EACH_OPT) {
+            idx = 0;
+          } else {
+            idx++;
+          }
+          keep_going = idx < len;
+          value = jv_string_append_codepoint(jv_string(""), s[idx]);
+          is_last = idx == len -1;
+          break;
+        default:
+          set_error(jq, jv_invalid_with_msg(jv_string("Internal error: unknown string sub-type")));
+          goto do_backtrack;
+        }
       } else {
         assert(opcode == EACH || opcode == EACH_OPT);
         if (opcode == EACH) {
@@ -777,15 +844,17 @@ jv jq_next(jq_state *jq) {
         goto do_backtrack;
       } else if (is_last) {
         // we don't need to make a backtrack point
-        jv_free(container);
-        path_append(jq, key, jv_copy(value));
+        if (jv_get_kind(container) != JV_KIND_STRING)
+          path_append(jq, key, jv_copy(value));
         stack_push(jq, value);
+        jv_free(container);
       } else {
         struct stack_pos spos = stack_get_pos(jq);
         stack_push(jq, container);
         stack_push(jq, jv_number(idx));
         stack_save(jq, pc - 1, spos);
-        path_append(jq, key, jv_copy(value));
+        if (jv_get_kind(container) != JV_KIND_STRING)
+          path_append(jq, key, jv_copy(value));
         stack_push(jq, value);
       }
       break;
diff --git a/src/jq.h b/src/jq.h
index 5269de3ff8..9c6b1592f9 100644
--- a/src/jq.h
+++ b/src/jq.h
@@ -54,9 +54,15 @@ jv jq_get_attr(jq_state *, jv);
  */
 typedef struct jq_util_input_state jq_util_input_state;
 typedef void (*jq_util_msg_cb)(void *, const char *);
+typedef enum {
+  JQ_UTIL_PARSE_SLURP  = 1,
+  JQ_UTIL_PARSE_BINARY = 2,
+} jq_util_parser_enum;
+
+
 
 jq_util_input_state *jq_util_input_init(jq_util_msg_cb, void *);
-void jq_util_input_set_parser(jq_util_input_state *, jv_parser *, int);
+void jq_util_input_set_parser(jq_util_input_state *, jv_parser *, jq_util_parser_enum);
 void jq_util_input_free(jq_util_input_state **);
 void jq_util_input_add_input(jq_util_input_state *, const char *);
 int jq_util_input_errors(jq_util_input_state *);
diff --git a/src/jv.c b/src/jv.c
index 498a14149d..5ffd5c4988 100644
--- a/src/jv.c
+++ b/src/jv.c
@@ -144,6 +144,24 @@ jv jv_bool(int x) {
   return x ? JV_TRUE : JV_FALSE;
 }
 
+jv_string_kind jv_get_string_kind(jv v) {
+  assert(jv_get_kind(v) == JV_KIND_STRING);
+  return v.subkind;
+}
+
+const char* jv_string_kind_name(jv_string_kind k) {
+  switch (k) {
+  case JV_STRING_KIND_UTF8:
+    return "UTF-8";
+  case JV_STRING_KIND_BINARY:
+  case JV_STRING_KIND_BINARY_BYTEARRAY:
+  case JV_STRING_KIND_BINARY_UTF8:
+    return "binary";
+  default:
+    return "<unknown>";
+  }
+}
+
 /*
  * Invalid objects, with optional error messages
  */
@@ -1094,10 +1112,10 @@ static jv jvp_string_copy_replace_bad(const char* data, uint32_t length) {
   uint32_t maxlength = length * 3 + 1; // worst case: all bad bytes, each becomes a 3-byte U+FFFD
   jvp_string* s = jvp_string_alloc(maxlength);
   char* out = s->data;
-  int c = 0;
+  uint32_t c = 0;
 
   while ((i = jvp_utf8_next((cstart = i), end, &c))) {
-    if (c == -1) {
+    if (c == (uint32_t)-1) {
       c = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
     }
     out += jvp_utf8_encode(c, out);
@@ -1167,8 +1185,8 @@ static jv jvp_string_append(jv string, const char* data, uint32_t len) {
     memcpy(news->data, s->data, currlen);
     memcpy(news->data + currlen, data, len);
     news->data[currlen + len] = 0;
+    jv r = {JVP_FLAGS_STRING, string.subkind, 0, 0, {&news->refcnt}};
     jvp_string_free(string);
-    jv r = {JVP_FLAGS_STRING, 0, 0, 0, {&news->refcnt}};
     return r;
   }
 }
@@ -1245,6 +1263,148 @@ static int jvp_string_equal(jv a, jv b) {
   return memcmp(stra->data, strb->data, jvp_string_length(stra)) == 0;
 }
 
+/*
+ * Binary strings (public API)
+ */
+
+jv jv_binary_sized(const unsigned char *str, int len) {
+  jv b = jvp_string_new((const char *)str, len);
+  b.subkind = JV_STRING_KIND_BINARY;
+  return b;
+}
+
+jv jv_binary(const unsigned char *str) {
+  /* The input is NUL-terminated, but otherwise binary */
+  return jv_binary_sized(str, strlen((const char *)str));
+}
+
+int jv_binary_length(jv j) {
+  assert(JVP_HAS_KIND(j, JV_KIND_STRING));
+  int r = jvp_string_length(jvp_string_ptr(j));
+  jv_free(j);
+  return r;
+}
+
+jv jv_binary_slice(jv j, int start, int end) {
+  assert(JVP_HAS_KIND(j, JV_KIND_STRING));
+  const unsigned char *s = (const unsigned char *)jv_string_value(j);
+  int len = jv_string_length_bytes(jv_copy(j));
+  jv res;
+
+  jvp_clamp_slice_params(len, &start, &end);
+  assert(0 <= start && start <= end && end <= len);
+
+  /* See note in jv_string_slice() */
+  res = jv_binary_sized(s + start, end - start);
+  jv_free(j);
+  return res;
+}
+
+jv jv_binary_append_buf(jv a, const unsigned char *buf, int len) {
+  return jvp_string_append(a, (const char *)buf, len);
+}
+
+jv jv_binary_from_string(jv j) {
+  j.subkind = JV_STRING_KIND_BINARY;
+  return j;
+}
+
+#define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+
+static const unsigned char BASE64_ENCODE_TABLE[64 + 1] = CHARS_ALPHANUM "+/";
+static const unsigned char BASE64_INVALID_ENTRY = 0xFF;
+static const unsigned char BASE64_DECODE_TABLE[255] = {
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+  62, // +
+  0xFF, 0xFF, 0xFF,
+  63, // /
+  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // 0-9
+  0xFF, 0xFF, 0xFF,
+  99, // =
+  0xFF, 0xFF, 0xFF,
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // A-Z
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,  // a-z
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+jv jv_binary_to_base64(jv input) {
+  const unsigned char* data = (const unsigned char*)jv_string_value(input);
+  int len = jv_string_length_bytes(jv_copy(input));
+  jv out = jv_string("");
+  for (int i=0; i<len; i+=3) {
+    uint32_t code = 0;
+    int n = len - i >= 3 ? 3 : len-i;
+    for (int j=0; j<3; j++) {
+      code <<= 8;
+      code |= j < n ? (unsigned)data[i+j] : 0;
+    }
+    char buf[4];
+    for (int j=0; j<4; j++) {
+      buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
+    }
+    if (n < 3) buf[3] = '=';
+    if (n < 2) buf[2] = '=';
+    out = jv_string_append_buf(out, buf, sizeof(buf));
+  }
+  jv_free(input);
+  return out;
+}
+
+jv jv_binary_from_base64(jv input) {
+  const unsigned char* data = (const unsigned char*)jv_string_value(input);
+  int len = jv_string_length_bytes(jv_copy(input));
+  size_t decoded_len = (3 * len) / 4; // 3 usable bytes for every 4 bytes of input
+  unsigned char *result = jv_mem_calloc(decoded_len, sizeof(char));
+  memset(result, 0, decoded_len * sizeof(char));
+  uint32_t ri = 0;
+  int input_bytes_read=0;
+  uint32_t code = 0;
+  for (int i=0; i<len && data[i] != '='; i++) {
+    if (BASE64_DECODE_TABLE[data[i]] == BASE64_INVALID_ENTRY) {
+      char errbuf[15];
+
+      jv err = jv_invalid_with_msg(jv_string_fmt("Invalid base64 data (%s)",
+                                                 jv_dump_string_trunc(jv_copy(input), errbuf, sizeof(errbuf))));
+      jv_free(input);
+      free(result);
+      return err;
+    }
+
+    code <<= 6;
+    code |= BASE64_DECODE_TABLE[data[i]];
+    input_bytes_read++;
+
+    if (input_bytes_read == 4) {
+      result[ri++] = (code >> 16) & 0xFF;
+      result[ri++] = (code >> 8) & 0xFF;
+      result[ri++] = code & 0xFF;
+      input_bytes_read = 0;
+      code = 0;
+    }
+  }
+  if (input_bytes_read == 3) {
+    result[ri++] = (code >> 10) & 0xFF;
+    result[ri++] = (code >> 2) & 0xFF;
+  } else if (input_bytes_read == 2) {
+    result[ri++] = (code >> 4) & 0xFF;
+  } else if (input_bytes_read == 1) {
+    char errbuf[15];
+
+    jv err = jv_invalid_with_msg(jv_string_fmt("Invalid base64 data (trailing base64 byte found) (%s)",
+                                               jv_dump_string_trunc(jv_copy(input), errbuf, sizeof(errbuf))));
+    jv_free(input);
+    free(result);
+    return err;
+  }
+
+  jv line = jv_binary_sized(result, ri);
+  line.subkind = JV_STRING_KIND_BINARY_UTF8;
+  jv_free(input);
+  free(result);
+  return line;
+}
+
 /*
  * Strings (public API)
  */
@@ -1264,6 +1424,12 @@ jv jv_string(const char* str) {
   return jv_string_sized(str, strlen(str));
 }
 
+jv jv_string_from_binary(jv j) {
+  jv r = jv_string_sized(jv_string_value(j), jv_string_length_bytes(jv_copy(j)));
+  jv_free(j);
+  return r;
+}
+
 int jv_string_length_bytes(jv j) {
   assert(JVP_HAS_KIND(j, JV_KIND_STRING));
   int r = jvp_string_length(jvp_string_ptr(j));
@@ -1275,12 +1441,49 @@ int jv_string_length_codepoints(jv j) {
   assert(JVP_HAS_KIND(j, JV_KIND_STRING));
   const char* i = jv_string_value(j);
   const char* end = i + jv_string_length_bytes(jv_copy(j));
-  int c = 0, len = 0;
+  uint32_t c = 0;
+  int len = 0;
   while ((i = jvp_utf8_next(i, end, &c))) len++;
   jv_free(j);
   return len;
 }
 
+uint32_t jv_string_index(jv j, int idx) {
+  assert(JVP_HAS_KIND(j, JV_KIND_STRING));
+  const char* i = jv_string_value(j);
+  const char* end = i + jv_string_length_bytes(jv_copy(j));
+  uint32_t c = 0;
+  switch (jv_get_string_kind(j)) {
+  case JV_STRING_KIND_UTF8:
+    if (idx < 0) {
+      idx += jv_string_length_codepoints(jv_copy(j));
+      if (idx < 0)
+        return 0;
+    }
+    while (i < end && idx >= 0) {
+      i = jvp_utf8_next(i, end, &c);
+      idx--;
+    }
+    if (i == end && idx != -1)
+      c = 0;
+    break;
+  case JV_STRING_KIND_BINARY:
+  case JV_STRING_KIND_BINARY_BYTEARRAY:
+  case JV_STRING_KIND_BINARY_UTF8:
+    if (idx < 0)
+      idx += end - i;
+    if (idx < 0)
+      return 0;
+    if (i + idx >= end)
+      return 0;
+    c = ((const unsigned char *)i)[idx];
+    break;
+  default:
+    break;
+  }
+  jv_free(j);
+  return c;
+}
 
 jv jv_string_indexes(jv j, jv k) {
   assert(JVP_HAS_KIND(j, JV_KIND_STRING));
@@ -1317,18 +1520,27 @@ jv jv_string_split(jv j, jv sep) {
   assert(jv_get_refcnt(a) == 1);
 
   if (seplen == 0) {
-    int c;
+    uint32_t c;
     while ((jstr = jvp_utf8_next(jstr, jend, &c)))
-      a = jv_array_append(a, jv_string_append_codepoint(jv_string(""), c));
+      a = jv_array_append(a, jv_string_append_codepoint(j.subkind == JV_STRING_KIND_UTF8 ?
+                                                          jv_string("") :
+                                                          jv_binary((const unsigned char *)""),
+                                                          c));
   } else {
     for (p = jstr; p < jend; p = s + seplen) {
       s = _jq_memmem(p, jend - p, sepstr, seplen);
       if (s == NULL)
         s = jend;
-      a = jv_array_append(a, jv_string_sized(p, s - p));
-      // Add an empty string to denote that j ends on a sep
+      a = jv_array_append(a,
+                          j.subkind == JV_STRING_KIND_UTF8 ?
+                            jv_string_sized(p, s - p) :
+                            jv_binary_sized((const unsigned char *)p, s - p));
+      // Add an empty string o denote that j ends on a sep
       if (s + seplen == jend && seplen != 0)
-        a = jv_array_append(a, jv_string(""));
+        a = jv_array_append(a,
+                            j.subkind == JV_STRING_KIND_UTF8 ?
+                              jv_string("") :
+                              jv_binary((const unsigned char *)""));
     }
   }
   jv_free(j);
@@ -1342,7 +1554,7 @@ jv jv_string_explode(jv j) {
   int len = jv_string_length_bytes(jv_copy(j));
   const char* end = i + len;
   jv a = jv_array_sized(len);
-  int c;
+  uint32_t c;
   while ((i = jvp_utf8_next(i, end, &c)))
     a = jv_array_append(a, jv_number(c));
   jv_free(j);
@@ -1385,11 +1597,13 @@ const char* jv_string_value(jv j) {
 
 jv jv_string_slice(jv j, int start, int end) {
   assert(JVP_HAS_KIND(j, JV_KIND_STRING));
+  if (j.subkind != JV_STRING_KIND_UTF8)
+    return jv_binary_slice(j, start, end);
   const char *s = jv_string_value(j);
   int len = jv_string_length_bytes(jv_copy(j));
   int i;
   const char *p, *e;
-  int c;
+  uint32_t c;
   jv res;
 
   jvp_clamp_slice_params(len, &start, &end);
@@ -1402,7 +1616,7 @@ jv jv_string_slice(jv j, int start, int end) {
       jv_free(j);
       return jv_string_empty(16);
     }
-    if (c == -1) {
+    if (c == (uint32_t)-1) {
       jv_free(j);
       return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
     }
@@ -1414,7 +1628,7 @@ jv jv_string_slice(jv j, int start, int end) {
       e = s + len;
       break;
     }
-    if (c == -1) {
+    if (c == (uint32_t)-1) {
       jv_free(j);
       return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
     }
@@ -1433,27 +1647,31 @@ jv jv_string_slice(jv j, int start, int end) {
 }
 
 jv jv_string_concat(jv a, jv b) {
+  jv_string_kind subkind = a.subkind | b.subkind;
   a = jvp_string_append(a, jv_string_value(b),
                         jvp_string_length(jvp_string_ptr(b)));
   jv_free(b);
+  a.subkind = subkind;
   return a;
 }
 
 jv jv_string_append_buf(jv a, const char* buf, int len) {
-  if (jvp_utf8_is_valid(buf, buf+len)) {
-    a = jvp_string_append(a, buf, len);
-  } else {
-    jv b = jvp_string_copy_replace_bad(buf, len);
-    a = jv_string_concat(a, b);
-  }
-  return a;
+  if (a.subkind != JV_STRING_KIND_UTF8)
+    return jvp_string_append(a, buf, len);
+  if (jvp_utf8_is_valid(buf, buf+len))
+    return jvp_string_append(a, buf, len);
+  jv b = jvp_string_copy_replace_bad(buf, len);
+  return jv_string_concat(a, b);
 }
 
 jv jv_string_append_codepoint(jv a, uint32_t c) {
+  if (a.subkind != JV_STRING_KIND_UTF8 && c < 256) {
+    unsigned char uc = c;
+    return jvp_string_append(a, (char *)&uc, 1);
+  }
   char buf[5];
   int len = jvp_utf8_encode(c, buf);
-  a = jvp_string_append(a, buf, len);
-  return a;
+  return jvp_string_append(a, buf, len);
 }
 
 jv jv_string_append_str(jv a, const char* str) {
diff --git a/src/jv.h b/src/jv.h
index 8c96f822f0..ef764f2fcb 100644
--- a/src/jv.h
+++ b/src/jv.h
@@ -16,13 +16,26 @@ typedef enum {
   JV_KIND_OBJECT
 } jv_kind;
 
+typedef enum {
+  /* String subtypes */
+  JV_STRING_KIND_UTF8,
+  JV_STRING_KIND_BINARY,            /* prints as base64 */
+  JV_STRING_KIND_BINARY_BYTEARRAY,  /* prints as array of byte values */
+  JV_STRING_KIND_BINARY_UTF8,       /* prints as UTF-8 with bad character substitutions */
+  /* Maybe add empty and 1-element arrays as array subtypes to avoid allocations? */
+  /*
+   * XXX TODO MAYBE: merge with JVP_FLAGS concept, move to using kind_flags field,
+   * and/or combine the old pad_ and kind/kind_flags fields.
+   */
+} jv_string_kind;
+
 struct jv_refcnt;
 
 /* All of the fields of this struct are private.
    Really. Do not play with them. */
 typedef struct {
   unsigned char kind_flags;
-  unsigned char pad_;
+  unsigned char subkind;
   unsigned short offset;  /* array offsets */
   int size;
   union {
@@ -40,6 +53,9 @@ jv_kind jv_get_kind(jv);
 const char* jv_kind_name(jv_kind);
 static int jv_is_valid(jv x) { return jv_get_kind(x) != JV_KIND_INVALID; }
 
+jv_string_kind jv_get_string_kind(jv);
+const char* jv_string_kind_name(jv_string_kind);
+
 jv jv_copy(jv);
 void jv_free(jv);
 
@@ -105,13 +121,24 @@ jv jv_array_indexes(jv, jv);
 #endif
 
 
+jv jv_binary_sized(const unsigned char *, int);
+jv jv_binary(const unsigned char *);
+int jv_binary_length(jv);
+jv jv_binary_slice(jv, int, int);
+jv jv_binary_append_buf(jv, const unsigned char *, int);
+jv jv_binary_to_base64(jv);
+jv jv_binary_from_base64(jv);
+jv jv_binary_from_string(jv);
+
 jv jv_string(const char*);
 jv jv_string_sized(const char*, int);
+jv jv_string_from_binary(jv);
 jv jv_string_empty(int len);
 int jv_string_length_bytes(jv);
 int jv_string_length_codepoints(jv);
 unsigned long jv_string_hash(jv);
 const char* jv_string_value(jv);
+uint32_t jv_string_index(jv, int);
 jv jv_string_indexes(jv j, jv k);
 jv jv_string_slice(jv j, int start, int end);
 jv jv_string_concat(jv, jv);
diff --git a/src/jv_print.c b/src/jv_print.c
index d1db88aa89..49e14cd52c 100644
--- a/src/jv_print.c
+++ b/src/jv_print.c
@@ -113,16 +113,40 @@ static void put_indent(int n, int flags, FILE* fout, jv* strout, int T) {
   }
 }
 
-static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
+static void jvp_dump_string(struct dtoa_context *C, jv str, int ascii_only, FILE* F, jv* S, int T) {
   assert(jv_get_kind(str) == JV_KIND_STRING);
+  if (jv_get_string_kind(str) == JV_STRING_KIND_BINARY_BYTEARRAY) {
+    const unsigned char *s = (const unsigned char *)jv_string_value(str);
+    char buf[JVP_DTOA_FMT_MAX_LEN];
+    int i, len = jv_string_length_bytes(jv_copy(str));
+
+    put_char('[', F, S, T);
+    for (i = 0; i < len; i++) {
+      /* XXX This is way too slow */
+      /* XXX Need to do indentation */
+      put_str(jvp_dtoa_fmt(C, buf, s[i]), F, S, 0 /* XXX flags */);
+      if (i < len - 1)
+        put_char(',', F, S, T);
+    }
+    put_char(']', F, S, T);
+    return;
+  }
+  if (jv_get_string_kind(str) == JV_STRING_KIND_BINARY) {
+    // TODO: Add several different ways to represent binary.
+    //       We should have: base64, hex, array of bytes, and
+    //       maybe even a just-8bit representation
+    str = jv_binary_to_base64(str);
+  } else if (jv_get_string_kind(str) == JV_STRING_KIND_BINARY_UTF8) {
+    str = jv_string_from_binary(str);
+  }
   const char* i = jv_string_value(str);
   const char* end = i + jv_string_length_bytes(jv_copy(str));
   const char* cstart;
-  int c = 0;
+  uint32_t c = 0;
   char buf[32];
   put_char('"', F, S, T);
   while ((i = jvp_utf8_next((cstart = i), end, &c))) {
-    assert(c != -1);
+    assert(c != (uint32_t)-1);
     int unicode_escape = 0;
     if (0x20 <= c && c <= 0x7E) {
       // printable ASCII
@@ -176,7 +200,7 @@ static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
       put_str(buf, F, S, T);
     }
   }
-  assert(c != -1);
+  assert(c != (uint32_t)-1);
   put_char('"', F, S, T);
 }
 
@@ -205,7 +229,7 @@ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FI
       jv msg = jv_invalid_get_msg(jv_copy(x));
       if (jv_get_kind(msg) == JV_KIND_STRING) {
         put_str("<invalid:", F, S, flags & JV_PRINT_ISATTY);
-        jvp_dump_string(msg, flags | JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
+        jvp_dump_string(C, msg, flags | JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
         put_str(">", F, S, flags & JV_PRINT_ISATTY);
       } else {
         put_str("<invalid>", F, S, flags & JV_PRINT_ISATTY);
@@ -250,7 +274,7 @@ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FI
     break;
   }
   case JV_KIND_STRING:
-    jvp_dump_string(x, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
+    jvp_dump_string(C, x, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
     if (flags & JV_PRINT_REFCOUNT)
       put_refcnt(C, refcnt, F, S, flags & JV_PRINT_ISATTY);
     break;
@@ -337,7 +361,7 @@ static void jv_dump_term(struct dtoa_context* C, jv x, int flags, int indent, FI
 
       first = 0;
       if (color) put_str(FIELD_COLOR, F, S, flags & JV_PRINT_ISATTY);
-      jvp_dump_string(key, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
+      jvp_dump_string(C, key, flags & JV_PRINT_ASCII, F, S, flags & JV_PRINT_ISATTY);
       jv_free(key);
       if (color) put_str(COLRESET, F, S, flags & JV_PRINT_ISATTY);
 
diff --git a/src/jv_unicode.c b/src/jv_unicode.c
index d197349f48..275547a0d9 100644
--- a/src/jv_unicode.c
+++ b/src/jv_unicode.c
@@ -26,12 +26,12 @@ const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_
   return start;
 }
 
-const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
+const char* jvp_utf8_next(const char* in, const char* end, uint32_t *codepoint_ret) {
   assert(in <= end);
   if (in == end) {
     return 0;
   }
-  int codepoint = -1;
+  uint32_t codepoint = 0xffffffff;
   unsigned char first = (unsigned char)in[0];
   int length = utf8_coding_length[first];
   if ((first & 0x80) == 0) {
@@ -50,7 +50,7 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
       unsigned ch = (unsigned char)in[i];
       if (utf8_coding_length[ch] != UTF8_CONTINUATION_BYTE){
         /* Invalid UTF8 sequence - not followed by the right number of continuation bytes */
-        codepoint = -1;
+        codepoint = 0xffffffff;
         length = i;
         break;
       }
@@ -75,9 +75,9 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
 }
 
 int jvp_utf8_is_valid(const char* in, const char* end) {
-  int codepoint;
+  uint32_t codepoint;
   while ((in = jvp_utf8_next(in, end, &codepoint))) {
-    if (codepoint == -1) return 0;
+    if (codepoint == 0xffffffff) return 0;
   }
   return 1;
 }
@@ -90,15 +90,16 @@ int jvp_utf8_decode_length(char startchar) {
 	else return 4;                                 // 1111 ____
 }
 
-int jvp_utf8_encode_length(int codepoint) {
+int jvp_utf8_encode_length(uint32_t codepoint) {
+  assert(codepoint <= 0x10FFFF);
   if (codepoint <= 0x7F) return 1;
   else if (codepoint <= 0x7FF) return 2;
   else if (codepoint <= 0xFFFF) return 3;
   else return 4;
 }
 
-int jvp_utf8_encode(int codepoint, char* out) {
-  assert(codepoint >= 0 && codepoint <= 0x10FFFF);
+int jvp_utf8_encode(uint32_t codepoint, char* out) {
+  assert(codepoint <= 0x10FFFF);
   char* start = out;
   if (codepoint <= 0x7F) {
     *out++ = codepoint;
diff --git a/src/jv_unicode.h b/src/jv_unicode.h
index 558721a8fd..ce511ee08a 100644
--- a/src/jv_unicode.h
+++ b/src/jv_unicode.h
@@ -1,12 +1,14 @@
 #ifndef JV_UNICODE_H
 #define JV_UNICODE_H
 
+#include <stdint.h>
+
 const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_bytes);
-const char* jvp_utf8_next(const char* in, const char* end, int* codepoint);
+const char* jvp_utf8_next(const char* in, const char* end, uint32_t *codepoint);
 int jvp_utf8_is_valid(const char* in, const char* end);
 
 int jvp_utf8_decode_length(char startchar);
 
-int jvp_utf8_encode_length(int codepoint);
-int jvp_utf8_encode(int codepoint, char* out);
+int jvp_utf8_encode_length(uint32_t codepoint);
+int jvp_utf8_encode(uint32_t codepoint, char* out);
 #endif
diff --git a/src/jv_utf8_tables.h b/src/jv_utf8_tables.h
index f1a4252fce..544f6be78a 100644
--- a/src/jv_utf8_tables.h
+++ b/src/jv_utf8_tables.h
@@ -33,5 +33,5 @@ static const unsigned char utf8_coding_bits[] =
   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
   0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
   0x07, 0x07, 0x07, 0x07, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-static const int utf8_first_codepoint[] =
+static const uint32_t utf8_first_codepoint[] =
  {0x00, 0x00, 0x80, 0x800, 0x10000};
diff --git a/src/util.c b/src/util.c
index 250bdf75f4..f87f144b76 100644
--- a/src/util.c
+++ b/src/util.c
@@ -41,6 +41,7 @@ void *alloca (size_t);
 #include "util.h"
 #include "jq.h"
 #include "jv_alloc.h"
+#include "jv_unicode.h"
 
 #ifdef WIN32
 FILE *fopen(const char *fname, const char *mode) {
@@ -189,6 +190,7 @@ struct jq_util_input_state {
   size_t buf_valid_len;
   jv current_filename;
   size_t current_line;
+  jq_util_parser_enum flags;
 };
 
 static void fprinter(void *data, const char *fname) {
@@ -210,13 +212,16 @@ jq_util_input_state *jq_util_input_init(jq_util_msg_cb err_cb, void *err_cb_data
   return new_state;
 }
 
-void jq_util_input_set_parser(jq_util_input_state *state, jv_parser *parser, int slurp) {
+void jq_util_input_set_parser(jq_util_input_state *state,
+                              jv_parser *parser,
+                              jq_util_parser_enum flags) {
   assert(!jv_is_valid(state->slurped));
   state->parser = parser;
+  state->flags = flags;
 
-  if (parser == NULL && slurp)
+  if (parser == NULL && (flags & JQ_UTIL_PARSE_SLURP))
     state->slurped = jv_string("");
-  else if (slurp)
+  else if ((flags & JQ_UTIL_PARSE_SLURP))
     state->slurped = jv_array();
   else
     state->slurped = jv_invalid();
@@ -279,7 +284,9 @@ static int jq_util_input_read_more(jq_util_input_state *state) {
         state->current_filename = jv_string("<stdin>");
       } else {
         state->current_input = fopen(f, "r");
-        state->current_filename = jv_string(f);
+        state->current_filename = jvp_utf8_is_valid(f, f + strlen(f)) ?
+            jv_string(f) :
+            jv_binary((const unsigned char *)f);
         if (!state->current_input) {
           state->err_cb(state->err_cb_data, f);
           state->failures++;
@@ -406,7 +413,10 @@ jv jq_util_input_next_input(jq_util_input_state *state) {
         continue;
       if (jv_is_valid(state->slurped)) {
         // Slurped raw input
-        state->slurped = jv_string_concat(state->slurped, jv_string_sized(state->buf, state->buf_valid_len));
+        if (state->flags & JQ_UTIL_PARSE_BINARY)
+          state->slurped = jv_string_concat(state->slurped, jv_binary_sized((const unsigned char *)state->buf, state->buf_valid_len));
+        else
+          state->slurped = jv_string_concat(state->slurped, jv_string_sized(state->buf, state->buf_valid_len));
       } else {
         if (!jv_is_valid(value))
           value = jv_string("");
diff --git a/tests/base64.test b/tests/base64.test
index 0f82b0b71d..80e3927e9b 100644
--- a/tests/base64.test
+++ b/tests/base64.test
@@ -27,9 +27,9 @@
 # invalid base64 characters (whitespace)
 . | try @base64d catch .
 "Not base64 data"
-"string (\"Not base64...) is not valid base64 data"
+"Invalid base64 data (\"Not base64...)"
 
 # invalid base64 (too many bytes, QUJD = "ABCD"
 . | try @base64d catch .
 "QUJDa"
-"string (\"QUJDa\") trailing base64 byte found"
+"Invalid base64 data (trailing base64 byte found) (\"QUJDa\")"