Skip to content

Commit

Permalink
MAINT: Make format and extension provider properties
Browse files Browse the repository at this point in the history
  • Loading branch information
mferrera committed Jun 12, 2024
1 parent b3be25b commit 8d7e62c
Show file tree
Hide file tree
Showing 10 changed files with 245 additions and 225 deletions.
84 changes: 37 additions & 47 deletions src/fmu/dataio/_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum, unique
from typing import Final, Type

Expand All @@ -21,52 +20,43 @@ class ConfigurationError(ValueError):
pass


@dataclass
class ValidFormats:
surface: dict[str, str] = field(
default_factory=lambda: {
"irap_binary": ".gri",
}
)
grid: dict[str, str] = field(
default_factory=lambda: {
"hdf": ".hdf",
"roff": ".roff",
}
)
cube: dict[str, str] = field(
default_factory=lambda: {
"segy": ".segy",
}
)
table: dict[str, str] = field(
default_factory=lambda: {
"hdf": ".hdf",
"csv": ".csv",
"parquet": ".parquet",
}
)
polygons: dict[str, str] = field(
default_factory=lambda: {
"hdf": ".hdf",
"csv": ".csv", # columns will be X Y Z, ID
"csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, ... POLY_ID
"irap_ascii": ".pol",
}
)
points: dict[str, str] = field(
default_factory=lambda: {
"hdf": ".hdf",
"csv": ".csv", # columns will be X Y Z
"csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, Y_UTMN, Z_TVDSS
"irap_ascii": ".poi",
}
)
dictionary: dict[str, str] = field(
default_factory=lambda: {
"json": ".json",
}
)
class ValidFormats(Enum):
surface = {
"irap_binary": ".gri",
}

grid = {
"hdf": ".hdf",
"roff": ".roff",
}

cube = {
"segy": ".segy",
}

table = {
"hdf": ".hdf",
"csv": ".csv",
"parquet": ".parquet",
}

polygons = {
"hdf": ".hdf",
"csv": ".csv", # columns will be X Y Z, ID
"csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, ... POLY_ID
"irap_ascii": ".pol",
}

points = {
"hdf": ".hdf",
"csv": ".csv", # columns will be X Y Z
"csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, Y_UTMN, Z_TVDSS
"irap_ascii": ".poi",
}

dictionary = {
"json": ".json",
}


STANDARD_TABLE_INDEX_COLUMNS: Final[dict[str, list[str]]] = {
Expand Down
5 changes: 2 additions & 3 deletions src/fmu/dataio/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,7 @@ def _generate_aggrd_metadata(
}
etemp = dataio.ExportData(config=config, name=self.name)

objectdata_provider = objectdata_provider_factory(obj=obj, dataio=etemp)
objdata = objectdata_provider.get_objectdata()
objdata = objectdata_provider_factory(obj=obj, dataio=etemp)

template["tracklog"] = [generate_meta_tracklog()[0].model_dump(mode="json")]
template["file"] = {
Expand All @@ -262,7 +261,7 @@ def _generate_aggrd_metadata(
template["data"]["name"] = self.name
if self.tagname:
template["data"]["tagname"] = self.tagname
if bbox := objectdata_provider.get_bbox():
if bbox := objdata.get_bbox():
template["data"]["bbox"] = bbox.model_dump(mode="json", exclude_none=True)

self._metadata = template
Expand Down
79 changes: 40 additions & 39 deletions src/fmu/dataio/providers/objectdata/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from copy import deepcopy
from dataclasses import dataclass, field
from datetime import datetime
from typing import TYPE_CHECKING, Final, TypeVar
from typing import TYPE_CHECKING, Final
from warnings import warn

from fmu.dataio._definitions import ConfigurationError
from fmu.dataio._definitions import ConfigurationError, ValidFormats
from fmu.dataio._logging import null_logger
from fmu.dataio._utils import generate_description
from fmu.dataio.datastructure._internal.internal import AllowedContent, UnsetAnyContent
Expand All @@ -24,20 +24,15 @@
from fmu.dataio.datastructure.meta.content import BoundingBox2D, BoundingBox3D
from fmu.dataio.datastructure.meta.enums import FMUClassEnum
from fmu.dataio.datastructure.meta.specification import AnySpecification
from fmu.dataio.types import Efolder, Inferrable, Layout, Subtype
from fmu.dataio.types import Efolder, Inferrable, Layout

logger: Final = null_logger(__name__)

V = TypeVar("V")


@dataclass
class DerivedObjectDescriptor:
subtype: Subtype
layout: Layout
efolder: Efolder | str
fmt: str
extension: str
table_index: list[str] | None


Expand Down Expand Up @@ -76,8 +71,6 @@ class ObjectDataProvider(Provider):
_metadata: dict = field(default_factory=dict)
name: str = field(default="")
efolder: str = field(default="")
extension: str = field(default="")
fmt: str = field(default="")
time0: datetime | None = field(default=None)
time1: datetime | None = field(default=None)

Expand All @@ -87,8 +80,6 @@ def __post_init__(self) -> None:
obj_data = self.get_objectdata()

self.name = named_stratigraphy.name
self.extension = obj_data.extension
self.fmt = obj_data.fmt
self.efolder = obj_data.efolder

if self.dataio.forcefolder:
Expand Down Expand Up @@ -149,6 +140,40 @@ def __post_init__(self) -> None:
self._metadata["description"] = generate_description(self.dataio.description)
logger.info("Derive all metadata for data object... DONE")

@property
@abstractmethod
def classname(self) -> FMUClassEnum:
raise NotImplementedError

@property
@abstractmethod
def extension(self) -> str:
raise NotImplementedError

@property
@abstractmethod
def fmt(self) -> str:
raise NotImplementedError

@abstractmethod
def get_bbox(self) -> BoundingBox2D | BoundingBox3D | None:
raise NotImplementedError

@abstractmethod
def get_spec(self) -> AnySpecification | None:
raise NotImplementedError

@abstractmethod
def get_objectdata(self) -> DerivedObjectDescriptor:
raise NotImplementedError

def get_metadata(self) -> AnyContent | UnsetAnyContent:
return (
UnsetAnyContent.model_validate(self._metadata)
if self._metadata["content"] == "unset"
else AnyContent.model_validate(self._metadata)
)

def _get_validated_content(self, content: str | dict | None) -> AllowedContent:
"""Check content and return a validated model."""
logger.info("Evaluate content")
Expand Down Expand Up @@ -254,37 +279,13 @@ def _get_timedata(self) -> Time | None:

return Time(t0=start, t1=stop)

@property
@abstractmethod
def classname(self) -> FMUClassEnum:
raise NotImplementedError

@abstractmethod
def get_spec(self) -> AnySpecification | None:
raise NotImplementedError

@abstractmethod
def get_bbox(self) -> BoundingBox2D | BoundingBox3D | None:
raise NotImplementedError

@abstractmethod
def get_objectdata(self) -> DerivedObjectDescriptor:
raise NotImplementedError

def get_metadata(self) -> AnyContent | UnsetAnyContent:
return (
UnsetAnyContent.model_validate(self._metadata)
if self._metadata["content"] == "unset"
else AnyContent.model_validate(self._metadata)
)

@staticmethod
def _validate_get_ext(fmt: str, subtype: str, validator: dict[str, V]) -> V:
def _validate_get_ext(fmt: str, validator: ValidFormats) -> str:
"""Validate that fmt (file format) matches data and return legal extension."""
try:
return validator[fmt]
return validator.value[fmt]
except KeyError:
raise ConfigurationError(
f"The file format {fmt} is not supported. ",
f"Valid {subtype} formats are: {list(validator.keys())}",
f"Valid formats are: {list(validator.value.keys())}",
)
11 changes: 8 additions & 3 deletions src/fmu/dataio/providers/objectdata/_faultroom.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ class FaultRoomSurfaceProvider(ObjectDataProvider):
def classname(self) -> FMUClassEnum:
return FMUClassEnum.surface

@property
def extension(self) -> str:
return self._validate_get_ext(self.fmt, ValidFormats.dictionary)

@property
def fmt(self) -> str:
return self.dataio.dict_fformat

def get_bbox(self) -> BoundingBox3D:
"""Derive data.bbox for FaultRoomSurface."""
logger.info("Get bbox for FaultRoomSurface")
Expand Down Expand Up @@ -53,10 +61,7 @@ def get_spec(self) -> FaultRoomSurfaceSpecification:
def get_objectdata(self) -> DerivedObjectDescriptor:
"""Derive object data for FaultRoomSurface"""
return DerivedObjectDescriptor(
subtype="JSON",
layout="faultroom_triangulated",
efolder="maps",
fmt=(fmt := self.dataio.dict_fformat),
extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary),
table_index=None,
)
15 changes: 10 additions & 5 deletions src/fmu/dataio/providers/objectdata/_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,24 @@ class DictionaryDataProvider(ObjectDataProvider):
def classname(self) -> FMUClassEnum:
return FMUClassEnum.dictionary

def get_spec(self) -> None:
"""Derive data.spec for dict."""
@property
def extension(self) -> str:
return self._validate_get_ext(self.fmt, ValidFormats.dictionary)

@property
def fmt(self) -> str:
return self.dataio.dict_fformat

def get_bbox(self) -> None:
"""Derive data.bbox for dict."""

def get_spec(self) -> None:
"""Derive data.spec for dict."""

def get_objectdata(self) -> DerivedObjectDescriptor:
"""Derive object data for dict."""
return DerivedObjectDescriptor(
subtype="JSON",
layout="dictionary",
efolder="dictionaries",
fmt=(fmt := self.dataio.dict_fformat),
extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary),
table_index=None,
)
34 changes: 22 additions & 12 deletions src/fmu/dataio/providers/objectdata/_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@ class DataFrameDataProvider(ObjectDataProvider):
def classname(self) -> FMUClassEnum:
return FMUClassEnum.table

@property
def extension(self) -> str:
return self._validate_get_ext(self.fmt, ValidFormats.table)

@property
def fmt(self) -> str:
return self.dataio.table_fformat

def get_bbox(self) -> None:
"""Derive data.bbox for pd.DataFrame."""

def get_spec(self) -> TableSpecification:
"""Derive data.spec for pd.DataFrame."""
logger.info("Get spec for pd.DataFrame (tables)")
Expand All @@ -72,18 +83,12 @@ def get_spec(self) -> TableSpecification:
size=int(self.obj.size),
)

def get_bbox(self) -> None:
"""Derive data.bbox for pd.DataFrame."""

def get_objectdata(self) -> DerivedObjectDescriptor:
"""Derive object data for pd.DataFrame."""
table_index = _derive_index(self.dataio.table_index, list(self.obj.columns))
return DerivedObjectDescriptor(
subtype="DataFrame",
layout="table",
efolder="tables",
fmt=(fmt := self.dataio.table_fformat),
extension=self._validate_get_ext(fmt, "DataFrame", ValidFormats().table),
table_index=table_index,
)

Expand All @@ -96,6 +101,17 @@ class ArrowTableDataProvider(ObjectDataProvider):
def classname(self) -> FMUClassEnum:
return FMUClassEnum.table

@property
def extension(self) -> str:
return self._validate_get_ext(self.fmt, ValidFormats.table)

@property
def fmt(self) -> str:
return self.dataio.arrow_fformat

def get_bbox(self) -> None:
"""Derive data.bbox for pyarrow.Table."""

def get_spec(self) -> TableSpecification:
"""Derive data.spec for pyarrow.Table."""
logger.info("Get spec for pyarrow (tables)")
Expand All @@ -104,17 +120,11 @@ def get_spec(self) -> TableSpecification:
size=self.obj.num_columns * self.obj.num_rows,
)

def get_bbox(self) -> None:
"""Derive data.bbox for pyarrow.Table."""

def get_objectdata(self) -> DerivedObjectDescriptor:
"""Derive object data from pyarrow.Table."""
table_index = _derive_index(self.dataio.table_index, self.obj.column_names)
return DerivedObjectDescriptor(
subtype="ArrowTable",
layout="table",
efolder="tables",
fmt=(fmt := self.dataio.arrow_fformat),
extension=self._validate_get_ext(fmt, "ArrowTable", ValidFormats().table),
table_index=table_index,
)
Loading

0 comments on commit 8d7e62c

Please sign in to comment.