diff --git a/src/segy/constants.py b/src/segy/constants.py new file mode 100644 index 0000000..6a7dfbf --- /dev/null +++ b/src/segy/constants.py @@ -0,0 +1,3 @@ +"""Constant values used in SEG_Y.""" + +REV1_BASE16 = 0x01_00 # hex -> int = 256 diff --git a/src/segy/factory.py b/src/segy/factory.py index ab9bf1a..ce2d4b3 100644 --- a/src/segy/factory.py +++ b/src/segy/factory.py @@ -11,6 +11,7 @@ from segy.arrays import HeaderArray from segy.arrays import TraceArray +from segy.constants import REV1_BASE16 from segy.schema.base import Endianness from segy.schema.format import ScalarType from segy.schema.segy import SegyStandard @@ -103,8 +104,11 @@ def samples_per_trace(self) -> int: return cast(int, self.spec.trace.data.samples) @property - def segy_revision(self) -> SegyStandard | None: + def segy_revision(self) -> SegyStandard: """Revision of the SEG-Y file.""" + if self.spec.segy_standard is None: + return SegyStandard.REV0 + return self.spec.segy_standard def create_textual_header(self, text: str | None = None) -> bytes: @@ -147,9 +151,12 @@ def create_binary_header(self, update: dict[str, Any] | None = None) -> bytes: binary_spec = self.spec.binary_header bin_header = HeaderArray(np.zeros(shape=1, dtype=binary_spec.dtype)) - rev0 = self.segy_revision == SegyStandard.REV0 - if self.segy_revision is not None and not rev0: - bin_header["segy_revision"] = self.segy_revision.value * 256 + if self.segy_revision == SegyStandard.REV1: + bin_header["segy_revision"] = REV1_BASE16 + elif self.segy_revision >= SegyStandard.REV2: + minor, major = np.modf(self.segy_revision.value) + bin_header["segy_revision_major"] = major + bin_header["segy_revision_minor"] = minor bin_header["sample_interval"] = self.sample_interval bin_header["orig_sample_interval"] = self.sample_interval diff --git a/src/segy/file.py b/src/segy/file.py index 61aea81..433dad2 100644 --- a/src/segy/file.py +++ b/src/segy/file.py @@ -13,6 +13,7 @@ from segy.accessors import TraceAccessor from segy.arrays import HeaderArray from segy.config import SegySettings +from segy.constants import REV1_BASE16 from segy.exceptions import EndiannessInferenceError from segy.indexing import DataIndexer from segy.indexing import HeaderIndexer @@ -76,7 +77,7 @@ def infer_endianness( bin_spec.endianness = endianness bin_hdr = np.frombuffer(buffer, dtype=bin_spec.dtype) - revision = bin_hdr["segy_revision"].item() / 256.0 + revision = bin_hdr["segy_revision"].item() / REV1_BASE16 sample_increment = bin_hdr["sample_interval"].item() sample_format_int = bin_hdr["data_sample_format"].item() @@ -239,6 +240,9 @@ def binary_header(self) -> HeaderArray: little_endian = TransformFactory.create("byte_swap", Endianness.LITTLE) transforms.add_transform(little_endian) + interpret_revision = TransformFactory.create("segy_revision") + transforms.add_transform(interpret_revision) + return HeaderArray(transforms.apply(bin_hdr)) def _update_spec(self) -> None: diff --git a/src/segy/transforms.py b/src/segy/transforms.py index 6c3412d..9b0af8c 100644 --- a/src/segy/transforms.py +++ b/src/segy/transforms.py @@ -8,6 +8,8 @@ import numpy as np +from segy.constants import REV1_BASE16 +from segy.schema import SegyStandard from segy.schema.base import Endianness if TYPE_CHECKING: @@ -197,6 +199,31 @@ def transform(self, data: NDArray[Any]) -> NDArray[Any]: return func(data.astype(cast_dtype)) # type: ignore +class SegyRevisionTransform(Transform): + """Interpret the SEG-Y revision field in binary header.""" + + def __init__(self) -> None: + super().__init__() + + def transform(self, data: NDArray[Any]) -> NDArray[Any]: + """Parse SEG-Y standard from binary header.""" + if data.dtype.names is not None and "segy_revision" not in data.dtype.names: + return data # rev0, no-op + + # Rev1 needs special treatment. + # Rev1 is 16-bit with Q-point between the bytes. That means + # SEG-Y 1.0 is written as 00000001 00000000 in binary, 256 in base-2. + if data["segy_revision"] == REV1_BASE16: + data["segy_revision"] = SegyStandard.REV1 + + # Rev2 doesn't need special treatment because it splits into + # two 8-bit integers for major and minor versions. + # SEG-Y Rev2.0 is 00000010 00000000 in binary, (2, 0) in base-2 + # SEG-Y Rev2.1 is 00000010 00000001 in binary, (2, 1) in base-2 + + return data + + class TraceTransform(Transform): """Composite transform to apply header and data pipeline to trace. @@ -235,6 +262,7 @@ class TransformFactory: transform_map: dict[str, type[Transform]] = { "byte_swap": ByteSwapTransform, "ibm_float": IbmFloatTransform, + "segy_revision": SegyRevisionTransform, "trace": TraceTransform, } diff --git a/tests/test_segy_factory.py b/tests/test_segy_factory.py index 37a652a..07d63a4 100644 --- a/tests/test_segy_factory.py +++ b/tests/test_segy_factory.py @@ -25,13 +25,14 @@ class SegyFactoryTestConfig: """Dataclass to configure common test patterns.""" - segy_standard: SegyStandard + segy_standard: SegyStandard | None endianness: Endianness sample_interval: int samples_per_trace: int SEGY_FACTORY_TEST_CONFIGS = [ + SegyFactoryTestConfig(None, Endianness.BIG, 2000, 51), SegyFactoryTestConfig(SegyStandard.REV0, Endianness.BIG, 2000, 51), SegyFactoryTestConfig(SegyStandard.REV1, Endianness.LITTLE, 3000, 1), SegyFactoryTestConfig(SegyStandard.REV0, Endianness.BIG, 5000, 10), @@ -128,7 +129,7 @@ def test_binary_header_default( mock_segy_factory.samples_per_trace, mock_segy_factory.samples_per_trace, SEGY_FORMAT_MAP[mock_segy_factory.sample_format], - mock_segy_factory.segy_revision.value * 256, # type: ignore[union-attr] + mock_segy_factory.segy_revision.value * 256, 0, # fixed length trace flag 0, # extended text headers ) @@ -152,7 +153,7 @@ def test_binary_header_custom(self, mock_segy_factory: SegyFactory) -> None: mock_segy_factory.samples_per_trace, mock_segy_factory.samples_per_trace, SEGY_FORMAT_MAP[mock_segy_factory.sample_format], - mock_segy_factory.segy_revision.value * 256, # type: ignore[union-attr] + mock_segy_factory.segy_revision.value * 256, 1, # fixed length trace flag 2, # extended text headers ) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 0522346..ab518f3 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -3,16 +3,21 @@ from __future__ import annotations from typing import TYPE_CHECKING -from typing import Any +from typing import cast import numpy as np import pytest +from segy import SegyFactory +from segy.schema import SegyStandard from segy.schema.base import Endianness +from segy.standards import get_segy_standard from segy.transforms import TransformFactory from segy.transforms import TransformPipeline if TYPE_CHECKING: + from typing import Any + from numpy.typing import NDArray @@ -128,6 +133,47 @@ def test_ibm_float_field(self, mock_header_ibm: NDArray[Any]) -> None: assert transformed_header[0].item() == expected.item() +class TestRevisionTransform: + """Test SEG-Y revision transforms. This tests both transform and factory.""" + + @pytest.mark.parametrize("endian", ["little", "big"]) + @pytest.mark.parametrize(("major", "minor"), [(0, 0), (1, 0), (2, 0)]) + def test_rev_parse(self, endian: str, major: int, minor: int) -> None: + """Test array scaling.""" + # Set parameters + rev_float = float(f"{major}.{minor}") + revision = SegyStandard(rev_float) + endianness = Endianness(endian) + + # Create binary header with factory + spec = get_segy_standard(revision) + spec.endianness = endianness + segy_factory = SegyFactory(spec) + bin_header_bytes = bytearray(segy_factory.create_binary_header()) + bin_header = np.frombuffer(bin_header_bytes, dtype=spec.binary_header) + + # Set up and apply transform + transform = TransformFactory.create("segy_revision") + transformed_bin_header = transform.apply(bin_header) + + header_fields = cast(tuple[str], transformed_bin_header.dtype.names) + + if revision == SegyStandard.REV0: + assert "segy_revision" not in header_fields + assert "segy_revision_major" not in header_fields + assert "segy_revision_minor" not in header_fields + + elif revision == SegyStandard.REV1: + assert transformed_bin_header["segy_revision"].squeeze() == major + assert "segy_revision_major" not in header_fields + assert "segy_revision_minor" not in header_fields + + elif revision == SegyStandard.REV2: + assert transformed_bin_header["segy_revision_major"].squeeze() == major + assert transformed_bin_header["segy_revision_minor"].squeeze() == minor + assert "segy_revision" not in header_fields + + class TestTransformPipeline: """Tests for transform pipeline and transform integration."""