gh-130631: Make join_header_words() more similar to the original Perl version (GH-130632)

* Always quote strings with non-ASCII characters.
* Allow some non-separator and non-control characters (like "." or "-")
  be unquoted.
* Always quote strings that end with "\n".
* Use the fullmatch() method for clarity and optimization.
This commit is contained in:
Serhiy Storchaka
2025-04-09 11:08:04 +03:00
committed by GitHub
parent 16dcb576f7
commit 7ebbd27144
3 changed files with 23 additions and 9 deletions

View File

@@ -430,6 +430,7 @@ def split_header_words(header_values):
if pairs: result.append(pairs) if pairs: result.append(pairs)
return result return result
HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
def join_header_words(lists): def join_header_words(lists):
"""Do the inverse (almost) of the conversion done by split_header_words. """Do the inverse (almost) of the conversion done by split_header_words.
@@ -437,10 +438,10 @@ def join_header_words(lists):
Takes a list of lists of (key, value) pairs and produces a single header Takes a list of lists of (key, value) pairs and produces a single header
value. Attribute values are quoted if needed. value. Attribute values are quoted if needed.
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
'text/plain; charset="iso-8859-1"' 'text/plain; charset="iso-8859/1"'
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
'text/plain, charset="iso-8859-1"' 'text/plain, charset="iso-8859/1"'
""" """
headers = [] headers = []
@@ -448,7 +449,7 @@ def join_header_words(lists):
attr = [] attr = []
for k, v in pairs: for k, v in pairs:
if v is not None: if v is not None:
if not re.search(r"^\w+$", v): if not HEADER_JOIN_TOKEN_RE.fullmatch(v):
v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
v = '"%s"' % v v = '"%s"' % v
k = "%s=%s" % (k, v) k = "%s=%s" % (k, v)

View File

@@ -285,11 +285,21 @@ Got: '%s'
("foo=bar;bar=baz", "foo=bar; bar=baz"), ("foo=bar;bar=baz", "foo=bar; bar=baz"),
('foo bar baz', "foo; bar; baz"), ('foo bar baz', "foo; bar; baz"),
(r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'), (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'),
("föo=bär", 'föo="bär"'),
('foo,,,bar', 'foo, bar'), ('foo,,,bar', 'foo, bar'),
('foo=bar,bar=baz', 'foo=bar, bar=baz'), ('foo=bar,bar=baz', 'foo=bar, bar=baz'),
("foo=\n", 'foo=""'),
('foo="\n"', 'foo="\n"'),
('foo=bar\n', 'foo=bar'),
('foo="bar\n"', 'foo="bar\n"'),
('foo=bar\nbaz', 'foo=bar; baz'),
('foo="bar\nbaz"', 'foo="bar\nbaz"'),
('text/html; charset=iso-8859-1', ('text/html; charset=iso-8859-1',
'text/html; charset="iso-8859-1"'), 'text/html; charset=iso-8859-1'),
('text/html; charset="iso-8859/1"',
'text/html; charset="iso-8859/1"'),
('foo="bar"; port="80,81"; discard, bar=baz', ('foo="bar"; port="80,81"; discard, bar=baz',
'foo=bar; port="80,81"; discard, bar=baz'), 'foo=bar; port="80,81"; discard, bar=baz'),
@@ -297,8 +307,8 @@ Got: '%s'
(r'Basic realm="\"foo\\\\bar\""', (r'Basic realm="\"foo\\\\bar\""',
r'Basic; realm="\"foo\\\\bar\""'), r'Basic; realm="\"foo\\\\bar\""'),
('n; foo="foo;_", bar=foo!_', ('n; foo="foo;_", bar="foo,_"',
'n; foo="foo;_", bar="foo!_"'), 'n; foo="foo;_", bar="foo,_"'),
] ]
for arg, expect in tests: for arg, expect in tests:
@@ -553,7 +563,7 @@ class CookieTests(unittest.TestCase):
self.assertIsNone(cookie.value) self.assertIsNone(cookie.value)
self.assertEqual(cookie.name, '"spam"') self.assertEqual(cookie.name, '"spam"')
self.assertEqual(lwp_cookie_str(cookie), ( self.assertEqual(lwp_cookie_str(cookie), (
r'"spam"; path="/foo/"; domain="www.acme.com"; ' r'"spam"; path="/foo/"; domain=www.acme.com; '
'path_spec; discard; version=0')) 'path_spec; discard; version=0'))
old_str = repr(c) old_str = repr(c)
c.save(ignore_expires=True, ignore_discard=True) c.save(ignore_expires=True, ignore_discard=True)

View File

@@ -0,0 +1,3 @@
:func:`!http.cookiejar.join_header_words` is now more similar to the original
Perl version. It now quotes the same set of characters and always quote
values that end with ``"\n"``.