Skip to content

Commit

Permalink
Override Numpy's copy method to SegyArray to ensure uninterpreted b…
Browse files Browse the repository at this point in the history
…ytes are preserved. (#78)

* Add special copy method to SegyArray class

A 'copy' method, which maintains raw binary data and padding bytes, has been added to the SegyArray class in arrays.py. This is essential for SEG-Y data processing where unparsed fields are present but preserving raw binary data integrity is crucial.

* Update literal values and copy method static type checking

* Simplify array copy process

* make mypy happy

* Add test for SegyArray copy method

The test has been added to confirm that the copy method of SegyArray can successfully copy a segy array with the exact underlying buffer. The test also verifies that the memory between the original segy array and its copy are not shared.

---------

Co-authored-by: Altay Sansal <altay.sansal@tgs.com>
  • Loading branch information
tasansal and Altay Sansal authored Apr 5, 2024
1 parent af1922c commit 6c2949c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/segy/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
from pandas import DataFrame

if TYPE_CHECKING:
from typing import Literal

from numpy.typing import NDArray

OrderKACF = Literal[None, "K", "A", "C", "F"]


class SegyArray(np.ndarray): # type: ignore[type-arg]
"""Base class for array interface. Like ndarray but extensible."""
Expand All @@ -33,6 +37,18 @@ def __array_finalize__(self, obj: NDArray[Any] | None) -> None:
if obj is None:
return

def copy(self, order: OrderKACF = "K") -> SegyArray:
"""Copy structured array preserving the padded bytes as is.
This method ensures that the copy includes raw binary data and any padding
bytes, preserving the entire memory layout of the array. This is necessary
for working with SEG-Y data where not all fields are parsed, but raw binary
data preservation is crucial.
"""
void_view = self.view("V")
void_copy = np.copy(void_view, order=order, subok=True)
return void_copy.view(self.dtype) # type: ignore[no-any-return]


class HeaderArray(SegyArray):
"""Header ndarray with convenience features."""
Expand Down
24 changes: 24 additions & 0 deletions tests/test_arrays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Test Numpy array subclasses."""

import numpy as np

from segy.arrays import SegyArray


def test_segy_array_copy() -> None:
"""Test copying a segy array with exact underlying buffer."""
buffer_expected = np.asarray([0, 1, 2, 3, 4], dtype="uint16").tobytes()

dtype = np.dtype(
{
"names": ["f1", "f2"],
"offsets": [0, 4],
"formats": ["uint16", "uint16"],
"itemsize": 10,
}
)
segy_array = SegyArray(np.frombuffer(buffer_expected, dtype=dtype))
segy_array_copy = segy_array.copy()

assert segy_array_copy.tobytes() == buffer_expected
assert np.may_share_memory(segy_array_copy, segy_array) is False

0 comments on commit 6c2949c

Please sign in to comment.