From 2349197bea8ebd1bf57a68f4a6549d8fd7585e66 Mon Sep 17 00:00:00 2001 From: Chenhao <24435007+tylzh97@users.noreply.github.com> Date: Wed, 22 Oct 2025 20:39:31 +0800 Subject: [PATCH] Fix: bug in `decode_definite_long_string()` that causes incorrect chunk length calculation (#265) Upstream-Status: Backport [https://github.com/agronholm/cbor2/commit/2349197bea8ebd1bf57a68f4a6549d8fd7585e66] CVE: CVE-2025-64076 Signed-off-by: Vijay Anusuri --- docs/versionhistory.rst | 2 ++ source/decoder.c | 8 +++++++- tests/test_decoder.py | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/docs/versionhistory.rst b/docs/versionhistory.rst index c8566ca..21960ff 100644 --- a/docs/versionhistory.rst +++ b/docs/versionhistory.rst @@ -8,6 +8,8 @@ This library adheres to `Semantic Versioning `_. **5.6.3** (2024-04-11) - Fixed decoding of epoch-based dates being affected by the local time zone in the C extension +- Fixed a read(-1) vulnerability caused by boundary handling error + (#264 _; PR by @tylzh97) **5.6.2** (2024-02-19) diff --git a/source/decoder.c b/source/decoder.c index 6fd74ce..bea7736 100644 --- a/source/decoder.c +++ b/source/decoder.c @@ -757,7 +757,7 @@ decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length) char *buffer = NULL; while (left) { // Read up to 65536 bytes of data from the stream - Py_ssize_t chunk_length = 65536 - buffer_size; + Py_ssize_t chunk_length = 65536 - buffer_length; if (left < chunk_length) chunk_length = left; @@ -827,7 +827,13 @@ decode_definite_long_string(CBORDecoderObject *self, Py_ssize_t length) memcpy(buffer, bytes_buffer + consumed, unconsumed); } buffer_length = unconsumed; + } else { + // All bytes consumed, reset buffer_length + buffer_length = 0; } + + Py_DECREF(chunk); + chunk = NULL; } if (ret && string_namespace_add(self, ret, length) == -1) diff --git a/tests/test_decoder.py b/tests/test_decoder.py index 485c604..47e6ac9 100644 --- a/tests/test_decoder.py +++ b/tests/test_decoder.py @@ -260,6 +260,28 @@ def test_string_oversized(impl) -> None: (impl.loads(unhexlify("aeaeaeaeaeaeaeaeae0108c29843d90100d8249f0000aeaeffc26ca799")),) +def test_string_issue_264_multiple_chunks_utf8_boundary(impl) -> None: + """Test for Issue #264: UTF-8 characters split across multiple 65536-byte chunk boundaries.""" + import struct + + # Construct: 65535 'a' + '€' (3 bytes) + 65533 'b' + '€' (3 bytes) + 100 'd' + # Total: 131174 bytes, which spans 3 chunks (65536 + 65536 + 102) + total_bytes = 65535 + 3 + 65533 + 3 + 100 + + payload = b"\x7a" + struct.pack(">I", total_bytes) # major type 3, 4-byte length + payload += b"a" * 65535 + payload += "€".encode() # U+20AC: E2 82 AC + payload += b"b" * 65533 + payload += "€".encode() + payload += b"d" * 100 + + expected = "a" * 65535 + "€" + "b" * 65533 + "€" + "d" * 100 + + result = impl.loads(payload) + assert result == expected + assert len(result) == 131170 # 65535 + 1 + 65533 + 1 + 100 characters + + @pytest.mark.parametrize( "payload, expected", [ -- 2.43.0