diff --git a/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch new file mode 100644 index 0000000000..6da5b5ba42 --- /dev/null +++ b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch @@ -0,0 +1,344 @@ +From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001 +From: Sam Bull +Date: Sun, 28 Jan 2024 17:09:58 +0000 +Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078) + +Co-authored-by: Paul J. Dorn +Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) + +(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827) + +CVE: CVE-2024-23829 + +Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82] + +Signed-off-by: Soumya Sambu +--- + CONTRIBUTORS.txt | 1 + + aiohttp/http_parser.py | 30 ++++---- + tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++- + 3 files changed, 155 insertions(+), 15 deletions(-) + +diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt +index f8a8df5..b9cdf75 100644 +--- a/CONTRIBUTORS.txt ++++ b/CONTRIBUTORS.txt +@@ -240,6 +240,7 @@ Panagiotis Kolokotronis + Pankaj Pandey + Pau Freixes + Paul Colomiets ++Paul J. Dorn + Paulius Šileikis + Paulus Schoutsen + Pavel Kamaev +diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py +index 175eb7f..91784b3 100644 +--- a/aiohttp/http_parser.py ++++ b/aiohttp/http_parser.py +@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable) + # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / + # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA + # token = 1*tchar +-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") +-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") +-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]") +-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+") ++_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") ++TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") ++VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) ++DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) ++HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") + + + class RawRequestMessage(NamedTuple): +@@ -145,6 +146,7 @@ class HeadersParser: + self, lines: List[bytes] + ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: + headers: CIMultiDict[str] = CIMultiDict() ++ # note: "raw" does not mean inclusion of OWS before/after the field value + raw_headers = [] + + lines_idx = 0 +@@ -158,13 +160,14 @@ class HeadersParser: + except ValueError: + raise InvalidHeader(line) from None + ++ if len(bname) == 0: ++ raise InvalidHeader(bname) ++ + # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} + raise InvalidHeader(line) + + bvalue = bvalue.lstrip(b" \t") +- if HDRRE.search(bname): +- raise InvalidHeader(bname) + if len(bname) > self.max_field_size: + raise LineTooLong( + "request header name {}".format( +@@ -173,6 +176,9 @@ class HeadersParser: + str(self.max_field_size), + str(len(bname)), + ) ++ name = bname.decode("utf-8", "surrogateescape") ++ if not TOKENRE.fullmatch(name): ++ raise InvalidHeader(bname) + + header_length = len(bvalue) + +@@ -220,7 +226,6 @@ class HeadersParser: + ) + + bvalue = bvalue.strip(b" \t") +- name = bname.decode("utf-8", "surrogateescape") + value = bvalue.decode("utf-8", "surrogateescape") + + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 +@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]): + + # Shouldn't allow +/- or other number formats. + # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 +- if not length_hdr.strip(" \t").isdecimal(): ++ # msg.headers is already stripped of leading/trailing wsp ++ if not DIGITS.fullmatch(length_hdr): + raise InvalidHeader(CONTENT_LENGTH) + + return int(length_hdr) +@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]): + ) + + # method +- if not METHRE.match(method): ++ if not TOKENRE.fullmatch(method): + raise BadStatusLine(method) + + # version +@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]): + raise BadStatusLine(line) + version_o = HttpVersion(int(match.group(1)), int(match.group(2))) + +- # The status code is a three-digit number +- if len(status) != 3 or not status.isdecimal(): ++ # The status code is a three-digit ASCII number, no padding ++ if len(status) != 3 or not DIGITS.fullmatch(status): + raise BadStatusLine(line) + status_i = int(status) + +@@ -844,7 +850,7 @@ class HttpPayloadParser: + if self._lax: # Allow whitespace in lax mode. + size_b = size_b.strip() + +- if not re.fullmatch(HEXDIGIT, size_b): ++ if not re.fullmatch(HEXDIGITS, size_b): + exc = TransferEncodingError( + chunk[:pos].decode("ascii", "surrogateescape") + ) +diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py +index 4b185c9..bcf6058 100644 +--- a/tests/test_http_parser.py ++++ b/tests/test_http_parser.py +@@ -2,7 +2,8 @@ + + import asyncio + import re +-from typing import Any, List ++from contextlib import nullcontext ++from typing import Any, Dict, List + from unittest import mock + from urllib.parse import quote + +@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None: + parser.feed_data(text) + + ++@pytest.mark.parametrize( ++ "rfc9110_5_6_2_token_delim", ++ r'"(),/:;<=>?@[\]{}', ++) ++def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None: ++ text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode() ++ expectation = pytest.raises(http_exceptions.BadHttpMessage) ++ if rfc9110_5_6_2_token_delim == ":": ++ # Inserting colon into header just splits name/value earlier. ++ expectation = nullcontext() ++ with expectation: ++ parser.feed_data(text) ++ ++ + @pytest.mark.parametrize( + "hdr", + ( + # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length + "Content-Length: -5", + "Content-Length: +256", ++ "Content-Length: \N{superscript one}", ++ "Content-Length: \N{mathematical double-struck digit one}", + "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + "Bar: abc\ndef", + "Baz: abc\x00def", +@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None: + parser.feed_data(text) + + ++def test_parse_unusual_request_line(parser) -> None: ++ if not isinstance(response, HttpResponseParserPy): ++ pytest.xfail("Regression test for Py parser. May match C behaviour later.") ++ text = b"#smol //a HTTP/1.3\r\n\r\n" ++ messages, upgrade, tail = parser.feed_data(text) ++ assert len(messages) == 1 ++ msg, _ = messages[0] ++ assert msg.compression is None ++ assert not msg.upgrade ++ assert msg.method == "#smol" ++ assert msg.path == "//a" ++ assert msg.version == (1, 3) ++ ++ + def test_parse(parser) -> None: + text = b"GET /test HTTP/1.1\r\n\r\n" + messages, upgrade, tail = parser.feed_data(text) +@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None: + parser.feed_data(text) + + ++_pad: Dict[bytes, str] = { ++ b"": "empty", ++ # not a typo. Python likes triple zero ++ b"\000": "NUL", ++ b" ": "SP", ++ b" ": "SPSP", ++ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun ++ b"\n": "LF", ++ b"\xa0": "NBSP", ++ b"\t ": "TABSP", ++} ++ ++ ++@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"]) ++@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()]) ++@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()]) ++def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None: ++ text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2) ++ expectation = pytest.raises(http_exceptions.BadHttpMessage) ++ if pad1 == pad2 == b"" and hdr != b"": ++ # one entry in param matrix is correct: non-empty name, not padded ++ expectation = nullcontext() ++ if pad1 == pad2 == hdr == b"": ++ if not isinstance(response, HttpResponseParserPy): ++ pytest.xfail("Regression test for Py parser. May match C behaviour later.") ++ with expectation: ++ parser.feed_data(text) ++ ++ ++def test_empty_header_name(parser) -> None: ++ if not isinstance(response, HttpResponseParserPy): ++ pytest.xfail("Regression test for Py parser. May match C behaviour later.") ++ text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n" ++ with pytest.raises(http_exceptions.BadHttpMessage): ++ parser.feed_data(text) ++ ++ + def test_invalid_header(parser) -> None: + text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n" + with pytest.raises(http_exceptions.BadHttpMessage): +@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None: + assert r"\n" not in exc_info.value.message + + ++_num: Dict[bytes, str] = { ++ # dangerous: accepted by Python int() ++ # unicodedata.category("\U0001D7D9") == 'Nd' ++ "\N{mathematical double-struck digit one}".encode(): "utf8digit", ++ # only added for interop tests, refused by Python int() ++ # unicodedata.category("\U000000B9") == 'No' ++ "\N{superscript one}".encode(): "utf8number", ++ "\N{superscript one}".encode("latin-1"): "latin1number", ++} ++ ++ ++@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values()) ++def test_http_request_bad_status_line_number( ++ parser: Any, nonascii_digit: bytes ++) -> None: ++ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n" ++ with pytest.raises(http_exceptions.BadStatusLine): ++ parser.feed_data(text) ++ ++ ++def test_http_request_bad_status_line_separator(parser: Any) -> None: ++ # single code point, old, multibyte NFKC, multibyte NFKD ++ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode() ++ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n" ++ with pytest.raises(http_exceptions.BadStatusLine): ++ parser.feed_data(text) ++ ++ + def test_http_request_upgrade(parser) -> None: + text = ( + b"GET /test HTTP/1.1\r\n" +@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None: + assert tail == b"some raw data" + + ++def test_http_request_parser_utf8_request_line(parser) -> None: ++ if not isinstance(response, HttpResponseParserPy): ++ pytest.xfail("Regression test for Py parser. May match C behaviour later.") ++ messages, upgrade, tail = parser.feed_data( ++ # note the truncated unicode sequence ++ b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" + ++ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space ++ # note the leading and trailing spaces ++ "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} " ++ "\r\n\r\n".encode() ++ ) ++ msg = messages[0][0] ++ ++ assert msg.method == "GET" ++ assert msg.path == "/Pünktchen\udca0\udcef\udcb7" ++ assert msg.version == (1, 1) ++ assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")]) ++ assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),) ++ assert not msg.should_close ++ assert msg.compression is None ++ assert not msg.upgrade ++ assert not msg.chunked ++ assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path ++ ++ + def test_http_request_parser_utf8(parser) -> None: + text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode() + messages, upgrade, tail = parser.feed_data(text) +@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None: + assert not msg.chunked + + +-def test_http_request_parser_bad_method(parser) -> None: ++@pytest.mark.parametrize( ++ "rfc9110_5_6_2_token_delim", ++ [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'], ++) ++def test_http_request_parser_bad_method( ++ parser, rfc9110_5_6_2_token_delim: bytes ++) -> None: + with pytest.raises(http_exceptions.BadStatusLine): +- parser.feed_data(b'=":(e),[T];?" /get HTTP/1.1\r\n\r\n') ++ parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n') + + + def test_http_request_parser_bad_version(parser) -> None: +@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None: + response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n") + + ++@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values()) ++def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None: ++ with pytest.raises(http_exceptions.BadStatusLine): ++ response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n") ++ ++ + def test_http_request_chunked_payload(parser) -> None: + text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n" + msg, payload = parser.feed_data(text)[0][0] +-- +2.40.0 + diff --git a/meta-python/recipes-devtools/python/python3-aiohttp_3.8.6.bb b/meta-python/recipes-devtools/python/python3-aiohttp_3.8.6.bb index 50103da47d..c8ce113c42 100644 --- a/meta-python/recipes-devtools/python/python3-aiohttp_3.8.6.bb +++ b/meta-python/recipes-devtools/python/python3-aiohttp_3.8.6.bb @@ -11,6 +11,7 @@ SRC_URI += "file://CVE-2024-23334.patch \ file://CVE-2023-49082.patch \ file://CVE-2024-27306.patch \ file://CVE-2025-53643.patch \ + file://CVE-2024-23829.patch \ " SRC_URI[sha256sum] = "b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"