Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the option to iteratively encode JSON. #29

Merged
merged 7 commits into from
Aug 13, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,21 @@ Installing
Using
-----

To encode an object into the canonicaljson:

.. code:: python

import canonicaljson
assert canonicaljson.encode_canonical_json({}) == b'{}'

The underlying JSON implementation can be choosen with the following:
There's also an iterator version:

.. code:: python

import canonicaljson
assert b''.join(canonicaljson.iterencode_canonical_json({})) == b'{}'

The underlying JSON implementation can be chosen with the following:

.. code:: python

Expand Down
53 changes: 44 additions & 9 deletions canonicaljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ def _default(obj):
obj.__class__.__name__)


# ideally we'd set ensure_ascii=False, but the ensure_ascii codepath is so
# much quicker (assuming c speedups are enabled) that it's actually much
# quicker to let it do that and then substitute back (it's about 2.5x faster).
#
# (in any case, simplejson's ensure_ascii doesn't get U+2028 and U+2029 right,
# as per https://github.com/simplejson/simplejson/issues/206).
#

# Declare these in the module scope, but they get configured in
# set_json_library.
_canonical_encoder = None
Expand All @@ -54,6 +46,16 @@ def set_json_library(json_lib):
json_lib: The module to use for JSON encoding. Must have a
`JSONEncoder` property.
"""

# ideally we'd set ensure_ascii=False, but the ensure_ascii codepath is so
# much quicker (assuming c speedups are enabled) that it's actually much
# quicker to let it do that and then substitute back (it's about 2.5x
# faster).
#
# (in any case, simplejson's ensure_ascii doesn't get U+2028 and U+2029
# right, as per https://github.com/simplejson/simplejson/issues/206).
#

global _canonical_encoder
_canonical_encoder = json_lib.JSONEncoder(
ensure_ascii=True,
Expand Down Expand Up @@ -160,12 +162,45 @@ def encode_canonical_json(json_object):
return _unascii(s)


def iterencode_canonical_json(json_object):
"""Encodes the shortest UTF-8 JSON encoding with dictionary keys
lexicographically sorted by unicode code point.

Args:
json_object (dict): The JSON object to encode.

Returns:
generator which yields bytes encoding the JSON object"""
for chunk in _canonical_encoder.iterencode(json_object):
yield _unascii(chunk)


def encode_pretty_printed_json(json_object):
"""Encodes the JSON object dict as human readable ascii bytes."""
"""
Encodes the JSON object dict as human readable ascii bytes.

Args:
json_object (dict): The JSON object to encode.

Returns:
bytes encoding the JSON object"""

return _pretty_encoder.encode(json_object).encode("ascii")


def iterencode_pretty_printed_json(json_object):
"""Encodes the JSON object dict as human readable ascii bytes.

Args:
json_object (dict): The JSON object to encode.

Returns:
generator which yields bytes encoding the JSON object"""

for chunk in _pretty_encoder.iterencode(json_object):
yield chunk.encode("ascii")


if platform.python_implementation() == "PyPy": # pragma: no cover
# pypy ships with an optimised JSON encoder/decoder that is faster than
# simplejson's C extension.
Expand Down
6 changes: 6 additions & 0 deletions test_canonicaljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from canonicaljson import (
encode_canonical_json,
encode_pretty_printed_json,
iterencode_canonical_json,
iterencode_pretty_printed_json,
set_json_library,
)

Expand Down Expand Up @@ -62,8 +64,12 @@ def test_encode_canonical(self):
b'"\\\\u1234"',
)

# Iteratively encoding should work.
self.assertEqual(list(iterencode_canonical_json({})), [b'{}'])

def test_encode_pretty_printed(self):
self.assertEqual(encode_pretty_printed_json({}), b'{}')
self.assertEqual(list(iterencode_pretty_printed_json({})), [b'{}'])

def test_frozen_dict(self):
self.assertEqual(
Expand Down