diff --git a/src/segy/arrays.py b/src/segy/arrays.py index fc88e62..0e1f24f 100644 --- a/src/segy/arrays.py +++ b/src/segy/arrays.py @@ -18,8 +18,12 @@ from pandas import DataFrame if TYPE_CHECKING: + from typing import Literal + from numpy.typing import NDArray + OrderKACF = Literal[None, "K", "A", "C", "F"] + class SegyArray(np.ndarray): # type: ignore[type-arg] """Base class for array interface. Like ndarray but extensible.""" @@ -33,6 +37,18 @@ def __array_finalize__(self, obj: NDArray[Any] | None) -> None: if obj is None: return + def copy(self, order: OrderKACF = "K") -> SegyArray: + """Copy structured array preserving the padded bytes as is. + + This method ensures that the copy includes raw binary data and any padding + bytes, preserving the entire memory layout of the array. This is necessary + for working with SEG-Y data where not all fields are parsed, but raw binary + data preservation is crucial. + """ + void_view = self.view("V") + void_copy = np.copy(void_view, order=order, subok=True) + return void_copy.view(self.dtype) # type: ignore[no-any-return] + class HeaderArray(SegyArray): """Header ndarray with convenience features.""" diff --git a/tests/test_arrays.py b/tests/test_arrays.py new file mode 100644 index 0000000..8b66f08 --- /dev/null +++ b/tests/test_arrays.py @@ -0,0 +1,24 @@ +"""Test Numpy array subclasses.""" + +import numpy as np + +from segy.arrays import SegyArray + + +def test_segy_array_copy() -> None: + """Test copying a segy array with exact underlying buffer.""" + buffer_expected = np.asarray([0, 1, 2, 3, 4], dtype="uint16").tobytes() + + dtype = np.dtype( + { + "names": ["f1", "f2"], + "offsets": [0, 4], + "formats": ["uint16", "uint16"], + "itemsize": 10, + } + ) + segy_array = SegyArray(np.frombuffer(buffer_expected, dtype=dtype)) + segy_array_copy = segy_array.copy() + + assert segy_array_copy.tobytes() == buffer_expected + assert np.may_share_memory(segy_array_copy, segy_array) is False