Skip to content

Commit

Permalink
JSON::GeneratorError expose invalid object
Browse files Browse the repository at this point in the history
Fix: ruby#710

Makes it easier to debug why a given tree of objects can't
be dumped as JSON.

Co-Authored-By: Étienne Barrié <etienne.barrie@gmail.com>
  • Loading branch information
byroot and etiennebarrie committed Nov 25, 2024
1 parent 55015fa commit 03d7414
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 50 deletions.
54 changes: 41 additions & 13 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,28 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data

static int usascii_encindex, utf8_encindex, binary_encindex;

#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
#endif
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
{
VALUE exc = rb_exc_new_str(eGeneratorError, str);
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
rb_exc_raise(exc);
}

#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
#endif
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
VALUE str = rb_vsprintf(fmt, args);
va_end(args);
raise_generator_error_str(invalid_object, str);
}

/* Converts in_string to a JSON string (without the wrapping '"'
* characters) in FBuffer out_buffer.
*
Expand Down Expand Up @@ -867,6 +889,17 @@ static inline int enc_utf8_compatible_p(int enc_idx)
return 0;
}

static VALUE encode_json_string_try(VALUE str)
{
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
}

static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
{
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
return Qundef;
}

static inline VALUE ensure_valid_encoding(VALUE str)
{
int encindex = RB_ENCODING_GET(str);
Expand All @@ -886,7 +919,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
}
}

str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
}
return str;
}
Expand All @@ -909,7 +942,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
}
break;
default:
rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
break;
}
fbuffer_append_char(buffer, '"');
Expand Down Expand Up @@ -957,10 +990,8 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
char allow_nan = state->allow_nan;
VALUE tmp = rb_funcall(obj, i_to_s, 0);
if (!allow_nan) {
if (isinf(value)) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
} else if (isnan(value)) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
if (isinf(value) || isnan(value)) {
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp);
}
}
fbuffer_append_str(buffer, tmp);
Expand Down Expand Up @@ -1008,7 +1039,7 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
default:
general:
if (state->strict) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
} else if (rb_respond_to(obj, i_to_json)) {
tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
Check_Type(tmp, T_STRING);
Expand Down Expand Up @@ -1036,10 +1067,6 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
struct generate_json_data *data = (struct generate_json_data *)d;
fbuffer_free(data->buffer);

if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) {
exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0));
}

rb_exc_raise(exc);

return Qundef;
Expand Down Expand Up @@ -1537,10 +1564,11 @@ void Init_generator(void)
VALUE mExt = rb_define_module_under(mJSON, "Ext");
VALUE mGenerator = rb_define_module_under(mExt, "Generator");

rb_global_variable(&eGeneratorError);
eGeneratorError = rb_path2class("JSON::GeneratorError");

rb_global_variable(&eNestingError);
eNestingError = rb_path2class("JSON::NestingError");
rb_gc_register_mark_object(eGeneratorError);
rb_gc_register_mark_object(eNestingError);

cState = rb_define_class_under(mGenerator, "State", rb_cObject);
rb_define_alloc_func(cState, cState_s_allocate);
Expand Down
34 changes: 19 additions & 15 deletions java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.RubyException;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
Expand Down Expand Up @@ -254,7 +255,7 @@ void generate(ThreadContext context, Session session, RubyFloat object, OutputSt

if (Double.isInfinite(value) || Double.isNaN(value)) {
if (!session.getState(context).allowNaN()) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, object + " not allowed in JSON");
throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable();
}
}

Expand Down Expand Up @@ -429,20 +430,23 @@ int guessSize(ThreadContext context, Session session, RubyString object) {
void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException {
try {
object = ensureValidEncoding(context, object);
StringEncoder stringEncoder = session.getStringEncoder(context);
ByteList byteList = object.getByteList();
switch (object.scanForCodeRange()) {
case StringSupport.CR_7BIT:
stringEncoder.encodeASCII(context, byteList, buffer);
break;
case StringSupport.CR_VALID:
stringEncoder.encode(context, byteList, buffer);
break;
default:
throw stringEncoder.invalidUtf8(context);
}
} catch (RaiseException re) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, re.getMessage());
RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage());
exc.setCause(re.getException());
throw exc.toThrowable();
}

StringEncoder stringEncoder = session.getStringEncoder(context);
ByteList byteList = object.getByteList();
switch (object.scanForCodeRange()) {
case StringSupport.CR_7BIT:
stringEncoder.encodeASCII(context, byteList, buffer);
break;
case StringSupport.CR_VALID:
stringEncoder.encode(context, byteList, buffer);
break;
default:
throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable();
}
}
};
Expand Down Expand Up @@ -506,7 +510,7 @@ void generate(ThreadContext context, Session session, IRubyObject object, Output
RubyString generateNew(ThreadContext context, Session session, IRubyObject object) {
GeneratorState state = session.getState(context);
if (state.strict()) {
throw Utils.newException(context, Utils.M_GENERATOR_ERROR, object + " not allowed in JSON");
throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable();
} else if (object.respondsTo("to_json")) {
IRubyObject result = object.callMethod(context, "to_json", state);
if (result instanceof RubyString) return (RubyString)result;
Expand Down
2 changes: 1 addition & 1 deletion java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,6 @@ private void escapeCodeUnit(char c, int auxOffset) {

@Override
protected RaiseException invalidUtf8(ThreadContext context) {
return Utils.newException(context, Utils.M_GENERATOR_ERROR, "source sequence is illegal/malformed utf-8");
return Utils.newException(context, Utils.M_GENERATOR_ERROR, "source sequence is illegal/malformed utf-8");
}
}
12 changes: 12 additions & 0 deletions java/src/json/ext/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ static RaiseException newException(ThreadContext context,
return excptn.toThrowable();
}

static RubyException buildGeneratorError(ThreadContext context, IRubyObject invalidObject, RubyString message) {
RuntimeInfo info = RuntimeInfo.forRuntime(context.runtime);
RubyClass klazz = info.jsonModule.get().getClass(M_GENERATOR_ERROR);
RubyException excptn = (RubyException)klazz.newInstance(context, message, Block.NULL_BLOCK);
excptn.setInstanceVariable("@invalid_object", invalidObject);
return excptn;
}

static RubyException buildGeneratorError(ThreadContext context, IRubyObject invalidObject, String message) {
return buildGeneratorError(context, invalidObject, context.runtime.newString(message));
}

static byte[] repeat(ByteList a, int n) {
return repeat(a.unsafeBytes(), a.begin(), a.length(), n);
}
Expand Down
18 changes: 17 additions & 1 deletion lib/json/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,23 @@ class CircularDatastructure < NestingError; end
# :startdoc:

# This exception is raised if a generator or unparser error occurs.
class GeneratorError < JSONError; end
class GeneratorError < JSONError
attr_reader :invalid_object

def initialize(message, invalid_object = nil)
super(message)
@invalid_object = invalid_object
end

def detailed_message(...)
if @invalid_object.nil?
super
else
"#{super}\nInvalid object: #{@invalid_object.inspect}"
end
end
end

# For backwards compatibility
UnparserError = GeneratorError # :nodoc:

Expand Down
32 changes: 17 additions & 15 deletions lib/json/truffle_ruby/generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def utf8_to_json(string, script_safe = false) # :nodoc:
string
end

def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
string = string.b
def utf8_to_json_ascii(original_string, script_safe = false) # :nodoc:
string = original_string.b
map = script_safe ? SCRIPT_SAFE_MAP : MAP
string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& }
string.gsub!(/(
Expand All @@ -74,7 +74,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
)+ |
[\x80-\xc1\xf5-\xff] # invalid
)/nx) { |c|
c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'"
c.size == 1 and raise GeneratorError.new("invalid utf8 byte: '#{c}'", original_string)
s = c.encode(::Encoding::UTF_16BE, ::Encoding::UTF_8).unpack('H*')[0]
s.force_encoding(::Encoding::BINARY)
s.gsub!(/.{4}/n, '\\\\u\&')
Expand All @@ -83,7 +83,7 @@ def utf8_to_json_ascii(string, script_safe = false) # :nodoc:
string.force_encoding(::Encoding::UTF_8)
string
rescue => e
raise GeneratorError.wrap(e)
raise GeneratorError.new(e.message, original_string)
end

def valid_utf8?(string)
Expand Down Expand Up @@ -306,8 +306,10 @@ def generate(obj)
else
result = obj.to_json(self)
end
JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError,
"source sequence #{result.inspect} is illegal/malformed utf-8"
JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError.new(
"source sequence #{result.inspect} is illegal/malformed utf-8",
obj
)
result
end

Expand Down Expand Up @@ -364,10 +366,10 @@ def generate(obj)
begin
string = string.encode(::Encoding::UTF_8)
rescue Encoding::UndefinedConversionError => error
raise GeneratorError, error.message
raise GeneratorError.new(error.message, string)
end
end
raise GeneratorError, "source sequence is illegal/malformed utf-8" unless string.valid_encoding?
raise GeneratorError.new("source sequence is illegal/malformed utf-8", string) unless string.valid_encoding?

if /["\\\x0-\x1f]/n.match?(string)
buf << string.gsub(/["\\\x0-\x1f]/n, MAP)
Expand Down Expand Up @@ -403,7 +405,7 @@ module Object
# special method #to_json was defined for some object.
def to_json(state = nil, *)
if state && State.from_state(state).strict?
raise GeneratorError, "#{self.class} not allowed in JSON"
raise GeneratorError.new("#{self.class} not allowed in JSON", self)
else
to_s.to_json
end
Expand Down Expand Up @@ -454,7 +456,7 @@ def json_transform(state)

result = +"#{result}#{key_json}#{state.space_before}:#{state.space}"
if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
raise GeneratorError, "#{value.class} not allowed in JSON"
raise GeneratorError.new("#{value.class} not allowed in JSON", value)
elsif value.respond_to?(:to_json)
result << value.to_json(state)
else
Expand Down Expand Up @@ -507,7 +509,7 @@ def json_transform(state)
result << delim unless first
result << state.indent * depth if indent
if state.strict? && !(false == value || true == value || nil == value || String === value || Array === value || Hash === value || Integer === value || Float === value)
raise GeneratorError, "#{value.class} not allowed in JSON"
raise GeneratorError.new("#{value.class} not allowed in JSON", value)
elsif value.respond_to?(:to_json)
result << value.to_json(state)
else
Expand Down Expand Up @@ -536,13 +538,13 @@ def to_json(state = nil, *)
if state.allow_nan?
to_s
else
raise GeneratorError, "#{self} not allowed in JSON"
raise GeneratorError.new("#{self} not allowed in JSON", self)
end
when nan?
if state.allow_nan?
to_s
else
raise GeneratorError, "#{self} not allowed in JSON"
raise GeneratorError.new("#{self} not allowed in JSON", self)
end
else
to_s
Expand All @@ -558,7 +560,7 @@ def to_json(state = nil, *args)
state = State.from_state(state)
if encoding == ::Encoding::UTF_8
unless valid_encoding?
raise GeneratorError, "source sequence is illegal/malformed utf-8"
raise GeneratorError.new("source sequence is illegal/malformed utf-8", self)
end
string = self
else
Expand All @@ -570,7 +572,7 @@ def to_json(state = nil, *args)
%("#{JSON::TruffleRuby::Generator.utf8_to_json(string, state.script_safe)}")
end
rescue Encoding::UndefinedConversionError => error
raise ::JSON::GeneratorError, error.message
raise ::JSON::GeneratorError.new(error.message, self)
end

# Module that holds the extending methods if, the String module is
Expand Down
17 changes: 12 additions & 5 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,20 @@ def test_fast_state
end

def test_allow_nan
assert_raise(GeneratorError) { generate([JSON::NaN]) }
error = assert_raise(GeneratorError) { generate([JSON::NaN]) }
assert_same JSON::NaN, error.invalid_object
assert_equal '[NaN]', generate([JSON::NaN], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::NaN]) }
assert_raise(GeneratorError) { pretty_generate([JSON::NaN]) }
assert_equal "[\n NaN\n]", pretty_generate([JSON::NaN], :allow_nan => true)
assert_raise(GeneratorError) { generate([JSON::Infinity]) }
error = assert_raise(GeneratorError) { generate([JSON::Infinity]) }
assert_same JSON::Infinity, error.invalid_object
assert_equal '[Infinity]', generate([JSON::Infinity], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::Infinity]) }
assert_raise(GeneratorError) { pretty_generate([JSON::Infinity]) }
assert_equal "[\n Infinity\n]", pretty_generate([JSON::Infinity], :allow_nan => true)
assert_raise(GeneratorError) { generate([JSON::MinusInfinity]) }
error = assert_raise(GeneratorError) { generate([JSON::MinusInfinity]) }
assert_same JSON::MinusInfinity, error.invalid_object
assert_equal '[-Infinity]', generate([JSON::MinusInfinity], :allow_nan => true)
assert_raise(GeneratorError) { fast_generate([JSON::MinusInfinity]) }
assert_raise(GeneratorError) { pretty_generate([JSON::MinusInfinity]) }
Expand Down Expand Up @@ -487,9 +490,13 @@ def test_invalid_encoding_string
["\x82\xAC\xEF".b].to_json
end

assert_raise(JSON::GeneratorError) do
{ foo: "\x82\xAC\xEF".b }.to_json
badly_encoded = "\x82\xAC\xEF".b
exception = assert_raise(JSON::GeneratorError) do
{ foo: badly_encoded }.to_json
end

assert_kind_of EncodingError, exception.cause
assert_same badly_encoded, exception.invalid_object
end

class MyCustomString < String
Expand Down

0 comments on commit 03d7414

Please sign in to comment.