gh-136702: Deprecate passing non-ascii *encoding* (str) to encodings.normalize_encoding (#140030)
Closes #136702
This commit is contained in:
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
|
|||||||
(Contributed by Shantanu Jain in :gh:`91896`.)
|
(Contributed by Shantanu Jain in :gh:`91896`.)
|
||||||
|
|
||||||
|
|
||||||
|
* :mod:`encodings`:
|
||||||
|
|
||||||
|
- Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
|
||||||
|
is deprecated and scheduled for removal in Python 3.17.
|
||||||
|
(Contributed by Stan Ulbrych in :gh:`136702`)
|
||||||
|
|
||||||
* :mod:`typing`:
|
* :mod:`typing`:
|
||||||
|
|
||||||
- Before Python 3.14, old-style unions were implemented using the private class
|
- Before Python 3.14, old-style unions were implemented using the private class
|
||||||
|
|||||||
@@ -796,6 +796,10 @@ class MimeParameters(TokenList):
|
|||||||
value = urllib.parse.unquote(value, encoding='latin-1')
|
value = urllib.parse.unquote(value, encoding='latin-1')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
# Explicitly look up the codec for warning generation, see gh-140030
|
||||||
|
# Can be removed in 3.17
|
||||||
|
import codecs
|
||||||
|
codecs.lookup(charset)
|
||||||
value = value.decode(charset, 'surrogateescape')
|
value = value.decode(charset, 'surrogateescape')
|
||||||
except (LookupError, UnicodeEncodeError):
|
except (LookupError, UnicodeEncodeError):
|
||||||
# XXX: there should really be a custom defect for
|
# XXX: there should really be a custom defect for
|
||||||
|
|||||||
@@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace',
|
|||||||
charset = fallback_charset
|
charset = fallback_charset
|
||||||
rawbytes = bytes(text, 'raw-unicode-escape')
|
rawbytes = bytes(text, 'raw-unicode-escape')
|
||||||
try:
|
try:
|
||||||
|
# Explicitly look up the codec for warning generation, see gh-140030
|
||||||
|
# Can be removed in 3.17
|
||||||
|
import codecs
|
||||||
|
codecs.lookup(charset)
|
||||||
return str(rawbytes, charset, errors)
|
return str(rawbytes, charset, errors)
|
||||||
except LookupError:
|
except LookupError:
|
||||||
# charset is not a known codec.
|
# charset is not a known codec.
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
|
|||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
|
||||||
"""#"
|
"""
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import sys
|
import sys
|
||||||
@@ -56,6 +56,12 @@ def normalize_encoding(encoding):
|
|||||||
if isinstance(encoding, bytes):
|
if isinstance(encoding, bytes):
|
||||||
encoding = str(encoding, "ascii")
|
encoding = str(encoding, "ascii")
|
||||||
|
|
||||||
|
if not encoding.isascii():
|
||||||
|
import warnings
|
||||||
|
warnings.warn(
|
||||||
|
"Support for non-ascii encoding names will be removed in 3.17",
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
|
|
||||||
return _normalize_encoding(encoding)
|
return _normalize_encoding(encoding)
|
||||||
|
|
||||||
def search_function(encoding):
|
def search_function(encoding):
|
||||||
|
|||||||
@@ -3886,15 +3886,14 @@ class CodecNameNormalizationTest(unittest.TestCase):
|
|||||||
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
|
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
|
||||||
self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
|
self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
|
||||||
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3, 4))
|
self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3, 4))
|
||||||
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
|
|
||||||
self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
|
self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
|
||||||
self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
|
self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
|
||||||
|
|
||||||
def test_encodings_normalize_encoding(self):
|
def test_encodings_normalize_encoding(self):
|
||||||
# encodings.normalize_encoding() ignores non-ASCII characters.
|
|
||||||
normalize = encodings.normalize_encoding
|
normalize = encodings.normalize_encoding
|
||||||
self.assertEqual(normalize('utf_8'), 'utf_8')
|
self.assertEqual(normalize('utf_8'), 'utf_8')
|
||||||
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
|
|
||||||
self.assertEqual(normalize('utf 8'), 'utf_8')
|
self.assertEqual(normalize('utf 8'), 'utf_8')
|
||||||
# encodings.normalize_encoding() doesn't convert
|
# encodings.normalize_encoding() doesn't convert
|
||||||
# characters to lower case.
|
# characters to lower case.
|
||||||
@@ -3902,6 +3901,11 @@ class CodecNameNormalizationTest(unittest.TestCase):
|
|||||||
self.assertEqual(normalize('utf.8'), 'utf.8')
|
self.assertEqual(normalize('utf.8'), 'utf.8')
|
||||||
self.assertEqual(normalize('utf...8'), 'utf...8')
|
self.assertEqual(normalize('utf...8'), 'utf...8')
|
||||||
|
|
||||||
|
# Non-ASCII *encoding* is deprecated.
|
||||||
|
with self.assertWarnsRegex(DeprecationWarning,
|
||||||
|
"Support for non-ascii encoding names will be removed in 3.17"):
|
||||||
|
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -5738,7 +5738,8 @@ Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
msg = email.message_from_string(m)
|
msg = email.message_from_string(m)
|
||||||
self.assertEqual(msg.get_filename(), 'myfile.txt')
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(msg.get_filename(), 'myfile.txt')
|
||||||
|
|
||||||
def test_rfc2231_single_tick_in_filename_extended(self):
|
def test_rfc2231_single_tick_in_filename_extended(self):
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
|
|||||||
@@ -247,7 +247,15 @@ class TestContentTypeHeader(TestHeaderBase):
|
|||||||
decoded = args[2] if l>2 and args[2] is not DITTO else source
|
decoded = args[2] if l>2 and args[2] is not DITTO else source
|
||||||
header = 'Content-Type:' + ' ' if source else ''
|
header = 'Content-Type:' + ' ' if source else ''
|
||||||
folded = args[3] if l>3 else header + decoded + '\n'
|
folded = args[3] if l>3 else header + decoded + '\n'
|
||||||
h = self.make_header('Content-Type', source)
|
# Both rfc2231 test cases with utf-8%E2%80%9D raise warnings,
|
||||||
|
# clear encoding cache to ensure test isolation.
|
||||||
|
if 'utf-8%E2%80%9D' in source and 'ascii' not in source:
|
||||||
|
import encodings
|
||||||
|
encodings._cache.clear()
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
h = self.make_header('Content-Type', source)
|
||||||
|
else:
|
||||||
|
h = self.make_header('Content-Type', source)
|
||||||
self.assertEqual(h.content_type, content_type)
|
self.assertEqual(h.content_type, content_type)
|
||||||
self.assertEqual(h.maintype, maintype)
|
self.assertEqual(h.maintype, maintype)
|
||||||
self.assertEqual(h.subtype, subtype)
|
self.assertEqual(h.subtype, subtype)
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
|
||||||
|
:func:`encodings.normalize_encoding` and schedule removal of support for
|
||||||
|
Python 3.17.
|
||||||
Reference in New Issue
Block a user