diff --git a/docs/nested_dynamic_arrays.rst b/docs/nested_dynamic_arrays.rst index 5a06f01a..a094c8f9 100644 --- a/docs/nested_dynamic_arrays.rst +++ b/docs/nested_dynamic_arrays.rst @@ -5,8 +5,3 @@ The ``eth-abi`` library supports the Solidity ABIv2 encoding format for nested dynamic arrays. This means that values for data types such as the following are legal and encodable/decodable: ``int[][]``, ``string[]``, ``string[2]``, etc. - -.. warning:: - - Though Solidity's ABIv2 has mostly been finalized, the specification is - technically still in development and may change. diff --git a/eth_abi/decoding.py b/eth_abi/decoding.py index 118a73c4..bc7a827b 100644 --- a/eth_abi/decoding.py +++ b/eth_abi/decoding.py @@ -19,6 +19,7 @@ ) from eth_abi.exceptions import ( InsufficientDataBytes, + InvalidPointer, NonEmptyPaddingBytes, ) from eth_abi.utils.numeric import ( @@ -78,13 +79,13 @@ def __init__(self, *args, **kwargs): self._frames = [] self._total_offset = 0 - def seek_in_frame(self, pos, *args, **kwargs): + def seek_in_frame(self, pos: int, *args: Any, **kwargs: Any) -> None: """ Seeks relative to the total offset of the current contextual frames. """ self.seek(self._total_offset + pos, *args, **kwargs) - def push_frame(self, offset): + def push_frame(self, offset: int) -> None: """ Pushes a new contextual frame onto the stack with the given offset and a return position at the current cursor position then seeks to the new @@ -131,6 +132,13 @@ def __call__(self, stream: ContextFramesBytesIO) -> Any: class HeadTailDecoder(BaseDecoder): + """ + Decoder for a dynamic element of a dynamic container (a dynamic array, or a sized + array or tuple that contains dynamic elements). A dynamic element consists of a + pointer, aka offset, which is located in the head section of the encoded container, + and the actual value, which is located in the tail section of the encoding. + """ + is_dynamic = True tail_decoder = None @@ -141,13 +149,18 @@ def validate(self): if self.tail_decoder is None: raise ValueError("No `tail_decoder` set") - def decode(self, stream): + def decode(self, stream: ContextFramesBytesIO) -> Any: + # Decode the offset and move the stream cursor forward 32 bytes start_pos = decode_uint_256(stream) - + # Jump ahead to the start of the value stream.push_frame(start_pos) + + # assertion check for mypy if self.tail_decoder is None: raise AssertionError("`tail_decoder` is None") + # Decode the value value = self.tail_decoder(stream) + # Return the cursor stream.pop_frame() return value @@ -172,8 +185,43 @@ def validate(self): if self.decoders is None: raise ValueError("No `decoders` set") + def validate_pointers(self, stream: ContextFramesBytesIO) -> None: + """ + Verify that all pointers point to a valid location in the stream. + """ + current_location = stream.tell() + len_of_head = sum( + decoder.array_size if hasattr(decoder, "array_size") else 1 + for decoder in self.decoders + ) + end_of_offsets = current_location + 32 * len_of_head + total_stream_length = len(stream.getbuffer()) + for decoder in self.decoders: + if isinstance(decoder, HeadTailDecoder): + # the next 32 bytes are a pointer + offset = decode_uint_256(stream) + indicated_idx = current_location + offset + if ( + indicated_idx < end_of_offsets + or indicated_idx >= total_stream_length + ): + # the pointer is indicating its data is located either within the + # offsets section of the stream or beyond the end of the stream, + # both of which are invalid + raise InvalidPointer( + "Invalid pointer in tuple at location " + f"{stream.tell() - 32} in payload" + ) + else: + # the next 32 bytes are not a pointer, so progress the stream per + # the decoder + decoder(stream) + # return the stream to its original location for actual decoding + stream.seek(current_location) + @to_tuple # type: ignore[misc] # untyped decorator def decode(self, stream: ContextFramesBytesIO) -> Generator[Any, None, None]: + self.validate_pointers(stream) for decoder in self.decoders: yield decoder(stream) @@ -248,6 +296,30 @@ def from_type_str(cls, abi_type, registry): # If array dimension is dynamic return DynamicArrayDecoder(item_decoder=item_decoder) + def validate_pointers(self, stream: ContextFramesBytesIO, array_size: int) -> None: + """ + Verify that all pointers point to a valid location in the stream. + """ + if isinstance(self.item_decoder, HeadTailDecoder): + current_location = stream.tell() + end_of_offsets = current_location + 32 * array_size + total_stream_length = len(stream.getbuffer()) + for _ in range(array_size): + offset = decode_uint_256(stream) + indicated_idx = current_location + offset + if ( + indicated_idx < end_of_offsets + or indicated_idx >= total_stream_length + ): + # the pointer is indicating its data is located either within the + # offsets section of the stream or beyond the end of the stream, + # both of which are invalid + raise InvalidPointer( + "Invalid pointer in array at location " + f"{stream.tell() - 32} in payload" + ) + stream.seek(current_location) + class SizedArrayDecoder(BaseArrayDecoder): array_size = None @@ -261,6 +333,8 @@ def __init__(self, **kwargs): def decode(self, stream): if self.item_decoder is None: raise AssertionError("`item_decoder` is None") + + self.validate_pointers(stream, self.array_size) for _ in range(self.array_size): yield self.item_decoder(stream) @@ -275,6 +349,8 @@ def decode(self, stream): stream.push_frame(32) if self.item_decoder is None: raise AssertionError("`item_decoder` is None") + + self.validate_pointers(stream, array_size) for _ in range(array_size): yield self.item_decoder(stream) stream.pop_frame() diff --git a/eth_abi/exceptions.py b/eth_abi/exceptions.py index 2ab204ad..d3626147 100644 --- a/eth_abi/exceptions.py +++ b/eth_abi/exceptions.py @@ -47,8 +47,7 @@ class DecodingError(Exception): class InsufficientDataBytes(DecodingError): """ - Raised when there are insufficient data to decode a value for a given ABI - type. + Raised when there are insufficient data to decode a value for a given ABI type. """ @@ -58,6 +57,12 @@ class NonEmptyPaddingBytes(DecodingError): """ +class InvalidPointer(DecodingError): + """ + Raised when the pointer to a value in the ABI encoding is invalid. + """ + + class ParseError(parsimonious.ParseError): # type: ignore[misc] # subclasses Any """ Raised when an ABI type string cannot be parsed. diff --git a/newsfragments/226.bugfix.rst b/newsfragments/226.bugfix.rst new file mode 100644 index 00000000..630c1045 --- /dev/null +++ b/newsfragments/226.bugfix.rst @@ -0,0 +1 @@ +During decoding, verify all pointers in arrays and tuples point to a valid location in the payload diff --git a/setup.py b/setup.py index 9e38f7f5..9e204d4d 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ ], "test": [ "pytest>=7.0.0", + "pytest-timeout>=2.0.0", "pytest-xdist>=2.4.0", "pytest-pythonpath>=0.7.1", "eth-hash[pycryptodome]", diff --git a/tests/abi/test_decode.py b/tests/abi/test_decode.py index f56045ec..2c195a9c 100644 --- a/tests/abi/test_decode.py +++ b/tests/abi/test_decode.py @@ -7,6 +7,7 @@ ) from eth_abi.exceptions import ( InsufficientDataBytes, + InvalidPointer, ) from eth_abi.grammar import ( parse, @@ -68,7 +69,6 @@ def test_abi_decode_for_single_dynamic_types( ) (actual,) = decode([type_str], abi_encoding, strict=strict) - assert actual == expected @@ -198,3 +198,54 @@ def test_abi_decode_with_shorter_data_than_32_bytes(types, hex_data, expected): # without the flag set (i.e. assert the default behavior is always ``strict=True``). with pytest.raises(InsufficientDataBytes): decode(types, bytes.fromhex(hex_data)) + + +@pytest.mark.parametrize( + "typestring,malformed_payload", + ( + ( + ["uint256[][][][][][][][][][]"], + ("0" * 62 + "20") * 10 + "00" * 2056, + ), + ( + ["uint256[][][][][][][][][][][][]"], + "0" * 62 + "20" + "0" * 62 + "a0" + ("0" * 62 + "20") * 9 + "00" * 1024, + ), + ( + ["uint8[]", "uint[2]", "uint8[]"], + "00000000000000000000000000000000000000000000000000000000000000600000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", # noqa: E501 + ), + ( + ["(uint8[2],uint8[])"], + "00000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000", # noqa: E501 + ), + ( + ["(uint8[2],uint8[])"], + "00000000000000000000000000000000000000000000000000000000000000f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000", # noqa: E501 + ), + ( + ["uint8[]"], + "00000000000000000000000000000000000000000000000000000000000000f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000", # noqa: E501 + ), + ( + ["(uint8[],uint8[8],uint8[])"], + "0000000000000000000000000000000000000000000000000000000000000020000000000000000000000000000000000000000000000000000000000000012000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", # noqa: E501 + ), + ), + ids=( + "nested array with all pointers equal to 0x20", + "nested array user example", + "separate dynamic, sized, dynamic arrays", + "tuple of sized array and dynamic array", + "pointer beyond end of data for tuple", + "pointer beyond end of data for array", + "tuple of arrays to check length of head section of tuple calcd correctly", + ), +) +@pytest.mark.timeout(1) +def test_decode_nested_dynamic_array_with_invalid_pointer_fails_fast( + typestring, malformed_payload +): + malformed_payload_bytes = bytearray.fromhex(malformed_payload) + with pytest.raises(InvalidPointer, match=r"^Invalid pointer in"): + decode(typestring, malformed_payload_bytes)