Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interpret SEG-Y revision in binary header #190

Merged
merged 13 commits into from
Sep 18, 2024
3 changes: 3 additions & 0 deletions src/segy/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Constant values used in SEG_Y."""

REV1_BASE16 = 0x01_00 # hex -> int = 256
15 changes: 11 additions & 4 deletions src/segy/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from segy.arrays import HeaderArray
from segy.arrays import TraceArray
from segy.constants import REV1_BASE16
from segy.schema.base import Endianness
from segy.schema.format import ScalarType
from segy.schema.segy import SegyStandard
Expand Down Expand Up @@ -103,8 +104,11 @@ def samples_per_trace(self) -> int:
return cast(int, self.spec.trace.data.samples)

@property
def segy_revision(self) -> SegyStandard | None:
def segy_revision(self) -> SegyStandard:
"""Revision of the SEG-Y file."""
if self.spec.segy_standard is None:
return SegyStandard.REV0

return self.spec.segy_standard

def create_textual_header(self, text: str | None = None) -> bytes:
Expand Down Expand Up @@ -147,9 +151,12 @@ def create_binary_header(self, update: dict[str, Any] | None = None) -> bytes:
binary_spec = self.spec.binary_header
bin_header = HeaderArray(np.zeros(shape=1, dtype=binary_spec.dtype))

rev0 = self.segy_revision == SegyStandard.REV0
if self.segy_revision is not None and not rev0:
bin_header["segy_revision"] = self.segy_revision.value * 256
if self.segy_revision == SegyStandard.REV1:
bin_header["segy_revision"] = REV1_BASE16
elif self.segy_revision >= SegyStandard.REV2:
minor, major = np.modf(self.segy_revision.value)
bin_header["segy_revision_major"] = major
bin_header["segy_revision_minor"] = minor

bin_header["sample_interval"] = self.sample_interval
bin_header["orig_sample_interval"] = self.sample_interval
Expand Down
6 changes: 5 additions & 1 deletion src/segy/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from segy.accessors import TraceAccessor
from segy.arrays import HeaderArray
from segy.config import SegySettings
from segy.constants import REV1_BASE16
from segy.exceptions import EndiannessInferenceError
from segy.indexing import DataIndexer
from segy.indexing import HeaderIndexer
Expand Down Expand Up @@ -76,7 +77,7 @@ def infer_endianness(
bin_spec.endianness = endianness
bin_hdr = np.frombuffer(buffer, dtype=bin_spec.dtype)

revision = bin_hdr["segy_revision"].item() / 256.0
revision = bin_hdr["segy_revision"].item() / REV1_BASE16
sample_increment = bin_hdr["sample_interval"].item()
sample_format_int = bin_hdr["data_sample_format"].item()

Expand Down Expand Up @@ -239,6 +240,9 @@ def binary_header(self) -> HeaderArray:
little_endian = TransformFactory.create("byte_swap", Endianness.LITTLE)
transforms.add_transform(little_endian)

interpret_revision = TransformFactory.create("segy_revision")
transforms.add_transform(interpret_revision)

return HeaderArray(transforms.apply(bin_hdr))

def _update_spec(self) -> None:
Expand Down
28 changes: 28 additions & 0 deletions src/segy/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import numpy as np

from segy.constants import REV1_BASE16
from segy.schema import SegyStandard
from segy.schema.base import Endianness

if TYPE_CHECKING:
Expand Down Expand Up @@ -197,6 +199,31 @@ def transform(self, data: NDArray[Any]) -> NDArray[Any]:
return func(data.astype(cast_dtype)) # type: ignore


class SegyRevisionTransform(Transform):
"""Interpret the SEG-Y revision field in binary header."""

def __init__(self) -> None:
super().__init__()

def transform(self, data: NDArray[Any]) -> NDArray[Any]:
"""Parse SEG-Y standard from binary header."""
if data.dtype.names is not None and "segy_revision" not in data.dtype.names:
return data # rev0, no-op

# Rev1 needs special treatment.
# Rev1 is 16-bit with Q-point between the bytes. That means
# SEG-Y 1.0 is written as 00000001 00000000 in binary, 256 in base-2.
if data["segy_revision"] == REV1_BASE16:
data["segy_revision"] = SegyStandard.REV1

# Rev2 doesn't need special treatment because it splits into
# two 8-bit integers for major and minor versions.
# SEG-Y Rev2.0 is 00000010 00000000 in binary, (2, 0) in base-2
# SEG-Y Rev2.1 is 00000010 00000001 in binary, (2, 1) in base-2

return data


class TraceTransform(Transform):
"""Composite transform to apply header and data pipeline to trace.

Expand Down Expand Up @@ -235,6 +262,7 @@ class TransformFactory:
transform_map: dict[str, type[Transform]] = {
"byte_swap": ByteSwapTransform,
"ibm_float": IbmFloatTransform,
"segy_revision": SegyRevisionTransform,
"trace": TraceTransform,
}

Expand Down
7 changes: 4 additions & 3 deletions tests/test_segy_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
class SegyFactoryTestConfig:
"""Dataclass to configure common test patterns."""

segy_standard: SegyStandard
segy_standard: SegyStandard | None
endianness: Endianness
sample_interval: int
samples_per_trace: int


SEGY_FACTORY_TEST_CONFIGS = [
SegyFactoryTestConfig(None, Endianness.BIG, 2000, 51),
SegyFactoryTestConfig(SegyStandard.REV0, Endianness.BIG, 2000, 51),
SegyFactoryTestConfig(SegyStandard.REV1, Endianness.LITTLE, 3000, 1),
SegyFactoryTestConfig(SegyStandard.REV0, Endianness.BIG, 5000, 10),
Expand Down Expand Up @@ -128,7 +129,7 @@ def test_binary_header_default(
mock_segy_factory.samples_per_trace,
mock_segy_factory.samples_per_trace,
SEGY_FORMAT_MAP[mock_segy_factory.sample_format],
mock_segy_factory.segy_revision.value * 256, # type: ignore[union-attr]
mock_segy_factory.segy_revision.value * 256,
0, # fixed length trace flag
0, # extended text headers
)
Expand All @@ -152,7 +153,7 @@ def test_binary_header_custom(self, mock_segy_factory: SegyFactory) -> None:
mock_segy_factory.samples_per_trace,
mock_segy_factory.samples_per_trace,
SEGY_FORMAT_MAP[mock_segy_factory.sample_format],
mock_segy_factory.segy_revision.value * 256, # type: ignore[union-attr]
mock_segy_factory.segy_revision.value * 256,
1, # fixed length trace flag
2, # extended text headers
)
Expand Down
48 changes: 47 additions & 1 deletion tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,21 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any
from typing import cast

import numpy as np
import pytest

from segy import SegyFactory
from segy.schema import SegyStandard
from segy.schema.base import Endianness
from segy.standards import get_segy_standard
from segy.transforms import TransformFactory
from segy.transforms import TransformPipeline

if TYPE_CHECKING:
from typing import Any

from numpy.typing import NDArray


Expand Down Expand Up @@ -128,6 +133,47 @@ def test_ibm_float_field(self, mock_header_ibm: NDArray[Any]) -> None:
assert transformed_header[0].item() == expected.item()


class TestRevisionTransform:
"""Test SEG-Y revision transforms. This tests both transform and factory."""

@pytest.mark.parametrize("endian", ["little", "big"])
@pytest.mark.parametrize(("major", "minor"), [(0, 0), (1, 0), (2, 0)])
def test_rev_parse(self, endian: str, major: int, minor: int) -> None:
"""Test array scaling."""
# Set parameters
rev_float = float(f"{major}.{minor}")
revision = SegyStandard(rev_float)
endianness = Endianness(endian)

# Create binary header with factory
spec = get_segy_standard(revision)
spec.endianness = endianness
segy_factory = SegyFactory(spec)
bin_header_bytes = bytearray(segy_factory.create_binary_header())
bin_header = np.frombuffer(bin_header_bytes, dtype=spec.binary_header)

# Set up and apply transform
transform = TransformFactory.create("segy_revision")
transformed_bin_header = transform.apply(bin_header)

header_fields = cast(tuple[str], transformed_bin_header.dtype.names)

if revision == SegyStandard.REV0:
assert "segy_revision" not in header_fields
assert "segy_revision_major" not in header_fields
assert "segy_revision_minor" not in header_fields

elif revision == SegyStandard.REV1:
assert transformed_bin_header["segy_revision"].squeeze() == major
assert "segy_revision_major" not in header_fields
assert "segy_revision_minor" not in header_fields

elif revision == SegyStandard.REV2:
assert transformed_bin_header["segy_revision_major"].squeeze() == major
assert transformed_bin_header["segy_revision_minor"].squeeze() == minor
assert "segy_revision" not in header_fields


class TestTransformPipeline:
"""Tests for transform pipeline and transform integration."""

Expand Down
Loading