gh-140793: Improve documentatation and tests for the ensure_ascii option in the json module (GH-140906)

* Document that ensure_ascii=True forces escaping not only non-ASCII, but also
  non-printable characters (the only affected ASCII character is U+007F).
* Ensure that the help output for the json module does not exceed 80
  columns (except one long line in an example and generated lines).
* Add more tests.
This commit is contained in:
Serhiy Storchaka
2025-11-08 12:07:27 +02:00
committed by GitHub
parent 8cec3d3a9d
commit 7e90bac3cc
6 changed files with 89 additions and 43 deletions

View File

@@ -183,8 +183,10 @@ Basic Usage
:param bool ensure_ascii: :param bool ensure_ascii:
If ``True`` (the default), the output is guaranteed to If ``True`` (the default), the output is guaranteed to
have all incoming non-ASCII characters escaped. have all incoming non-ASCII and non-printable characters escaped.
If ``False``, these characters will be outputted as-is. If ``False``, all characters will be outputted as-is, except for
the characters that must be escaped: quotation mark, reverse solidus,
and the control characters U+0000 through U+001F.
:param bool check_circular: :param bool check_circular:
If ``False``, the circular reference check for container types is skipped If ``False``, the circular reference check for container types is skipped
@@ -495,8 +497,10 @@ Encoders and Decoders
:class:`bool` or ``None``. If *skipkeys* is true, such items are simply skipped. :class:`bool` or ``None``. If *skipkeys* is true, such items are simply skipped.
If *ensure_ascii* is true (the default), the output is guaranteed to If *ensure_ascii* is true (the default), the output is guaranteed to
have all incoming non-ASCII characters escaped. If *ensure_ascii* is have all incoming non-ASCII and non-printable characters escaped.
false, these characters will be output as-is. If *ensure_ascii* is false, all characters will be output as-is, except for
the characters that must be escaped: quotation mark, reverse solidus,
and the control characters U+0000 through U+001F.
If *check_circular* is true (the default), then lists, dicts, and custom If *check_circular* is true (the default), then lists, dicts, and custom
encoded objects will be checked for circular references during encoding to encoded objects will be checked for circular references during encoding to
@@ -636,7 +640,7 @@ UTF-32, with UTF-8 being the recommended default for maximum interoperability.
As permitted, though not required, by the RFC, this module's serializer sets As permitted, though not required, by the RFC, this module's serializer sets
*ensure_ascii=True* by default, thus escaping the output so that the resulting *ensure_ascii=True* by default, thus escaping the output so that the resulting
strings only contain ASCII characters. strings only contain printable ASCII characters.
Other than the *ensure_ascii* parameter, this module is defined strictly in Other than the *ensure_ascii* parameter, this module is defined strictly in
terms of conversion between Python objects and terms of conversion between Python objects and

View File

@@ -127,8 +127,9 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
instead of raising a ``TypeError``. instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the strings written to ``fp`` can If ``ensure_ascii`` is false, then the strings written to ``fp`` can
contain non-ASCII characters if they appear in strings contained in contain non-ASCII and non-printable characters if they appear in strings
``obj``. Otherwise, all such characters are escaped in JSON strings. contained in ``obj``. Otherwise, all such characters are escaped in JSON
strings.
If ``check_circular`` is false, then the circular reference check If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
@@ -144,10 +145,11 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
level of 0 will only insert newlines. ``None`` is the most compact level of 0 will only insert newlines. ``None`` is the most compact
representation. representation.
If specified, ``separators`` should be an ``(item_separator, key_separator)`` If specified, ``separators`` should be an ``(item_separator,
tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
``(',', ': ')`` otherwise. To get the most compact JSON representation, ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON
you should specify ``(',', ':')`` to eliminate whitespace. representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``default(obj)`` is a function that should return a serializable version ``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError. of obj or raise TypeError. The default simply raises TypeError.
@@ -188,9 +190,10 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
instead of raising a ``TypeError``. instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value can contain non-ASCII If ``ensure_ascii`` is false, then the return value can contain
characters if they appear in strings contained in ``obj``. Otherwise, all non-ASCII and non-printable characters if they appear in strings
such characters are escaped in JSON strings. contained in ``obj``. Otherwise, all such characters are escaped in
JSON strings.
If ``check_circular`` is false, then the circular reference check If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will for container types will be skipped and a circular reference will
@@ -206,10 +209,11 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
level of 0 will only insert newlines. ``None`` is the most compact level of 0 will only insert newlines. ``None`` is the most compact
representation. representation.
If specified, ``separators`` should be an ``(item_separator, key_separator)`` If specified, ``separators`` should be an ``(item_separator,
tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
``(',', ': ')`` otherwise. To get the most compact JSON representation, ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON
you should specify ``(',', ':')`` to eliminate whitespace. representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``default(obj)`` is a function that should return a serializable version ``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError. of obj or raise TypeError. The default simply raises TypeError.
@@ -280,11 +284,12 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
``object_hook`` will be used instead of the ``dict``. This feature ``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting). can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``object_pairs_hook`` is an optional function that will be called with the ``object_pairs_hook`` is an optional function that will be called with
result of any object literal decoded with an ordered list of pairs. The the result of any object literal decoded with an ordered list of pairs.
return value of ``object_pairs_hook`` will be used instead of the ``dict``. The return value of ``object_pairs_hook`` will be used instead of the
This feature can be used to implement custom decoders. If ``object_hook`` ``dict``. This feature can be used to implement custom decoders. If
is also defined, the ``object_pairs_hook`` takes priority. ``object_hook`` is also defined, the ``object_pairs_hook`` takes
priority.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg; otherwise ``JSONDecoder`` is used. kwarg; otherwise ``JSONDecoder`` is used.
@@ -305,11 +310,12 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
``object_hook`` will be used instead of the ``dict``. This feature ``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting). can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``object_pairs_hook`` is an optional function that will be called with the ``object_pairs_hook`` is an optional function that will be called with
result of any object literal decoded with an ordered list of pairs. The the result of any object literal decoded with an ordered list of pairs.
return value of ``object_pairs_hook`` will be used instead of the ``dict``. The return value of ``object_pairs_hook`` will be used instead of the
This feature can be used to implement custom decoders. If ``object_hook`` ``dict``. This feature can be used to implement custom decoders. If
is also defined, the ``object_pairs_hook`` takes priority. ``object_hook`` is also defined, the ``object_pairs_hook`` takes
priority.
``parse_float``, if specified, will be called with the string ``parse_float``, if specified, will be called with the string
of every JSON float to be decoded. By default this is equivalent to of every JSON float to be decoded. By default this is equivalent to

View File

@@ -297,10 +297,10 @@ class JSONDecoder(object):
place of the given ``dict``. This can be used to provide custom place of the given ``dict``. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting). deserializations (e.g. to support JSON-RPC class hinting).
``object_pairs_hook``, if specified will be called with the result of ``object_pairs_hook``, if specified will be called with the result
every JSON object decoded with an ordered list of pairs. The return of every JSON object decoded with an ordered list of pairs. The
value of ``object_pairs_hook`` will be used instead of the ``dict``. return value of ``object_pairs_hook`` will be used instead of the
This feature can be used to implement custom decoders. ``dict``. This feature can be used to implement custom decoders.
If ``object_hook`` is also defined, the ``object_pairs_hook`` takes If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
priority. priority.

View File

@@ -111,9 +111,10 @@ class JSONEncoder(object):
encoding of keys that are not str, int, float, bool or None. encoding of keys that are not str, int, float, bool or None.
If skipkeys is True, such items are simply skipped. If skipkeys is True, such items are simply skipped.
If ensure_ascii is true, the output is guaranteed to be str If ensure_ascii is true, the output is guaranteed to be str objects
objects with all incoming non-ASCII characters escaped. If with all incoming non-ASCII and non-printable characters escaped.
ensure_ascii is false, the output can contain non-ASCII characters. If ensure_ascii is false, the output can contain non-ASCII and
non-printable characters.
If check_circular is true, then lists, dicts, and custom encoded If check_circular is true, then lists, dicts, and custom encoded
objects will be checked for circular references during encoding to objects will be checked for circular references during encoding to
@@ -134,14 +135,15 @@ class JSONEncoder(object):
indent level. An indent level of 0 will only insert newlines. indent level. An indent level of 0 will only insert newlines.
None is the most compact representation. None is the most compact representation.
If specified, separators should be an (item_separator, key_separator) If specified, separators should be an (item_separator,
tuple. The default is (', ', ': ') if *indent* is ``None`` and key_separator) tuple. The default is (', ', ': ') if *indent* is
(',', ': ') otherwise. To get the most compact JSON representation, ``None`` and (',', ': ') otherwise. To get the most compact JSON
you should specify (',', ':') to eliminate whitespace. representation, you should specify (',', ':') to eliminate
whitespace.
If specified, default is a function that gets called for objects If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable that can't otherwise be serialized. It should return a JSON
version of the object or raise a ``TypeError``. encodable version of the object or raise a ``TypeError``.
""" """

View File

@@ -8,13 +8,12 @@ CASES = [
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
('controls', '"controls"'), ('controls', '"controls"'),
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
('\x00\x1f\x7f', '"\\u0000\\u001f\\u007f"'),
('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), ('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
(' s p a c e d ', '" s p a c e d "'), (' s p a c e d ', '" s p a c e d "'),
('\U0001d120', '"\\ud834\\udd20"'), ('\U0001d120', '"\\ud834\\udd20"'),
('\u03b1\u03a9', '"\\u03b1\\u03a9"'), ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
] ]
class TestEncodeBasestringAscii: class TestEncodeBasestringAscii:

View File

@@ -32,6 +32,29 @@ class TestUnicode:
j = self.dumps(u + "\n", ensure_ascii=False) j = self.dumps(u + "\n", ensure_ascii=False)
self.assertEqual(j, f'"{u}\\n"') self.assertEqual(j, f'"{u}\\n"')
def test_ascii_non_printable_encode(self):
u = '\b\t\n\f\r\x00\x1f\x7f'
self.assertEqual(self.dumps(u),
'"\\b\\t\\n\\f\\r\\u0000\\u001f\\u007f"')
self.assertEqual(self.dumps(u, ensure_ascii=False),
'"\\b\\t\\n\\f\\r\\u0000\\u001f\x7f"')
def test_ascii_non_printable_decode(self):
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'),
'\b\t\n\f\r')
s = ''.join(map(chr, range(32)))
for c in s:
self.assertRaises(self.JSONDecodeError, self.loads, f'"{c}"')
self.assertEqual(self.loads(f'"{s}"', strict=False), s)
self.assertEqual(self.loads('"\x7f"'), '\x7f')
def test_escaped_decode(self):
self.assertEqual(self.loads('"\\b\\t\\n\\f\\r"'), '\b\t\n\f\r')
self.assertEqual(self.loads('"\\"\\\\\\/"'), '"\\/')
for c in set(map(chr, range(0x100))) - set('"\\/bfnrt'):
self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"')
self.assertRaises(self.JSONDecodeError, self.loads, f'"\\{c}"', strict=False)
def test_big_unicode_encode(self): def test_big_unicode_encode(self):
u = '\U0001d120' u = '\U0001d120'
self.assertEqual(self.dumps(u), '"\\ud834\\udd20"') self.assertEqual(self.dumps(u), '"\\ud834\\udd20"')
@@ -48,6 +71,18 @@ class TestUnicode:
s = f'"\\u{i:04x}"' s = f'"\\u{i:04x}"'
self.assertEqual(self.loads(s), u) self.assertEqual(self.loads(s), u)
def test_single_surrogate_encode(self):
self.assertEqual(self.dumps('\uD83D'), '"\\ud83d"')
self.assertEqual(self.dumps('\uD83D', ensure_ascii=False), '"\ud83d"')
self.assertEqual(self.dumps('\uDC0D'), '"\\udc0d"')
self.assertEqual(self.dumps('\uDC0D', ensure_ascii=False), '"\udc0d"')
def test_single_surrogate_decode(self):
self.assertEqual(self.loads('"\uD83D"'), '\ud83d')
self.assertEqual(self.loads('"\\uD83D"'), '\ud83d')
self.assertEqual(self.loads('"\udc0d"'), '\udc0d')
self.assertEqual(self.loads('"\\udc0d"'), '\udc0d')
def test_unicode_preservation(self): def test_unicode_preservation(self):
self.assertEqual(type(self.loads('""')), str) self.assertEqual(type(self.loads('""')), str)
self.assertEqual(type(self.loads('"a"')), str) self.assertEqual(type(self.loads('"a"')), str)