Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalise data loading #50

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
88 changes: 39 additions & 49 deletions ubermagtable/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,9 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ubermagutil.typesystem as ts
import ubermagutil.units

import ubermagtable.util as uu


@ts.typesystem(
data=ts.Typed(expected_type=pd.DataFrame), units=ts.Typed(expected_type=dict)
)
class Table:
"""Tabular data class.

Expand Down Expand Up @@ -57,59 +51,50 @@ def __init__(self, data, units, x=None, attributes=None):
self.x = x
self.attributes = attributes if attributes is not None else {}
self.attributes.setdefault("fourierspace", False)
# Detect duplicated lines as a last step to make use of the checks when
# assigning 'x' as independent variable.
self._duplicated_lines = any(self.data.duplicated(subset=self.x, keep="last"))
if self._duplicated_lines:
self.data.drop_duplicates(
subset=self.x,
keep="last",
inplace=True,
ignore_index=True, # reset the index to 0, 1, ..., n-1
)

@classmethod
def fromfile(cls, filename, /, x=None, rename=True):
"""Reads an OOMMF ``.odt`` or mumax3 ``.txt`` scalar data file and
returns a ``ubermagtable.Table`` object.

Parameters
----------
filename : str

OOMMF ``.odt`` or mumax3 ``.txt`` file.

x : str, optional

Independent variable name. Defaults to ``None``.

rename : bool, optional

If ``rename=True``, the column names are renamed with their shorter
versions. Defaults to ``True``.
@property
def data(self):
"""Scalar data of the drive.

Returns
-------
ubermagtable.Table

Table object.

Examples
--------
1. Defining ``ubermagtable.Table`` by reading an OOMMF ``.odt`` file.
pd.DataFrame
"""
return self._data

>>> import os
>>> import ubermagtable as ut
...
>>> odtfile = os.path.join(os.path.dirname(__file__),
... 'tests', 'test_sample',
... 'oommf-hysteresis1.odt')
>>> table = ut.Table.fromfile(odtfile, x='B_hysteresis')
@data.setter
def data(self, data):
if not isinstance(data, pd.DataFrame):
raise TypeError(f"Invalid {type(data)=}; expected 'pandas.DataFrame'.")
self._data = data

2. Defining ``ubermagtable.Table`` by reading a mumax3 ``.txt`` file.
@property
def units(self):
"""Units of the scalar data.

>>> odtfile = os.path.join(os.path.dirname(__file__),
... 'tests', 'test_sample', 'mumax3-file1.txt')
>>> table = ut.Table.fromfile(odtfile, x='t')
Returns
-------
dict

Keys are the columns in the ``data`` property, values the respective units.
"""
cols = uu.columns(filename, rename=rename)
return self._units

return cls(
data=pd.DataFrame(uu.data(filename), columns=cols),
units=uu.units(filename, rename=rename),
x=x,
)
@units.setter
def units(self, units):
if not isinstance(units, dict):
raise TypeError(f"Invalid {type(units)=}; 'expected dict'.")
self._units = units

@property
def x(self):
Expand Down Expand Up @@ -199,6 +184,11 @@ def xmax(self):
"""
return self.data[self.x].iloc[-1]

@property
def deduplicated(self):
"""Indicate if the table on disk contains duplicated steps."""
return self._duplicated_lines

def apply(self, func, columns=None, args=(), **kwargs):
r"""Apply function.

Expand Down
33 changes: 31 additions & 2 deletions ubermagtable/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,34 @@ def test_init(self):
assert isinstance(table.data, pd.DataFrame)

def test_fromfile(self):
for odtfile in self.odtfiles:
table = ut.Table.fromfile(odtfile, rename=False)
check_table(table)

table_short_names = ut.Table.fromfile(odtfile, rename=True)
check_table(table_short_names)

assert len(table.data) == len(table_short_names.data)
assert len(table.data.columns) == len(table_short_names.data.columns)

def test_columns(self):
for odtfile in self.odtfiles:
for rename in [True, False]:
table = ut.Table.fromfile(odtfile)
check_table(table)
table = ut.Table.fromfile(odtfile, rename=rename)
columns = table.data.columns
assert all(isinstance(column, str) for column in columns)
assert len(columns) == len(set(columns)) # unique column names

def test_units(self):
for odtfile in self.odtfiles:
for rename in [True, False]:
table = ut.Table.fromfile(odtfile, rename=rename)
units = table.units
assert isinstance(units, dict)
assert all(isinstance(unit, str) for unit in units.keys())
assert all(isinstance(unit, str) for unit in units.values())
assert "J" in units.values() # Energy is always in
assert "" in units.values() # Columns with no units are always in

def test_xy(self):
table = ut.Table.fromfile(self.odtfiles[0], x="t")
Expand Down Expand Up @@ -178,6 +202,11 @@ def test_oommf_mel(self):
assert len(columns) == 16

def test_oommf_issue1(self):
"""The odt file contains columns ``Oxs_Exchange6Ngbr:...`` and
``My_Exchange6Ngbr:...``. During processing we remove the ``Oxs_`` or ``My_``
prefix and subsequently loose the "duplicated" columns in the pandas dataframe.

"""
table = ut.Table.fromfile(self.odtfiles[-1])
columns = table.data.columns.to_list()
assert len(columns) == 30
Expand Down
55 changes: 0 additions & 55 deletions ubermagtable/tests/test_util.py

This file was deleted.

2 changes: 0 additions & 2 deletions ubermagtable/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
"""Utility tools"""
from .util import columns, data, units
Loading
Loading