Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: make pytables get_atom_data non-stateful #30074

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
from pandas.io.formats.printing import adjoin, pprint_thing

if TYPE_CHECKING:
from tables import File, Node # noqa:F401
from tables import File, Node, Col # noqa:F401


# versioning attribute
Expand Down Expand Up @@ -1092,6 +1092,9 @@ def remove(self, key: str, where=None, start=None, stop=None):
except KeyError:
# the key is not a valid store, re-raising KeyError
raise
except AssertionError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are these from?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are unrelated to most of the PR, but related to the couple-weeks-ago push to get rid of except Exception. In cases where we can't (yet) get rid of except Exception, at least re-raising assertion errors is useful to debugginig

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this is a sticking point im happy to revert and do this elsewhere

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no its fine, if possible can you add a comment indicating where its raised from

# surface any assertion errors for e.g. debugging
raise
except Exception:
# In tests we get here with ClosedFileError, TypeError, and
# _table_mod.NoSuchNodeError. TODO: Catch only these?
Expand Down Expand Up @@ -1519,6 +1522,9 @@ def info(self) -> str:
if s is not None:
keys.append(pprint_thing(s.pathname or k))
values.append(pprint_thing(s or "invalid_HDFStore node"))
except AssertionError:
# surface any assertion errors for e.g. debugging
raise
except Exception as detail:
keys.append(k)
dstr = pprint_thing(detail)
Expand Down Expand Up @@ -1680,7 +1686,7 @@ def _write_to_group(
self._handle.remove_node(group, recursive=True)
group = None

# we don't want to store a table node at all if are object is 0-len
# we don't want to store a table node at all if our object is 0-len
# as there are not dtypes
if getattr(value, "empty", None) and (format == "table" or append):
return
Expand Down Expand Up @@ -2355,11 +2361,9 @@ def set_atom_string(self, itemsize: int, data_converted: np.ndarray):
self.typ = self.get_atom_string(data_converted.shape, itemsize)
self.set_data(data_converted.astype(f"|S{itemsize}", copy=False))

def get_atom_coltype(self, kind=None):
def get_atom_coltype(self, kind: str) -> Type["Col"]:
""" return the PyTables column class for this column """
if kind is None:
kind = self.kind
if self.kind.startswith("uint"):
if kind.startswith("uint"):
k4 = kind[4:]
col_name = f"UInt{k4}Col"
else:
Expand All @@ -2368,8 +2372,8 @@ def get_atom_coltype(self, kind=None):

return getattr(_tables(), col_name)

def get_atom_data(self, block, kind=None):
return self.get_atom_coltype(kind=kind)(shape=block.shape[0])
def get_atom_data(self, shape, kind: str) -> "Col":
return self.get_atom_coltype(kind=kind)(shape=shape[0])

def set_atom_complex(self, block):
self.kind = block.dtype.name
Expand All @@ -2379,7 +2383,7 @@ def set_atom_complex(self, block):

def set_atom_data(self, block):
self.kind = block.dtype.name
self.typ = self.get_atom_data(block)
self.typ = self.get_atom_data(block.shape, kind=block.dtype.name)
self.set_data(block.values)

def set_atom_categorical(self, block):
Expand All @@ -2388,19 +2392,22 @@ def set_atom_categorical(self, block):

values = block.values
codes = values.codes
self.kind = "integer"
self.dtype = codes.dtype.name

if values.ndim > 1:
raise NotImplementedError("only support 1-d categoricals")

assert codes.dtype.name.startswith("int"), codes.dtype.name

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I follow the motivation for this. Seems out of scope.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

making sure that i understand correctly that the codes is always signed-integer

# write the codes; must be in a block shape
self.ordered = values.ordered
self.typ = self.get_atom_data(block, kind=codes.dtype.name)
self.typ = self.get_atom_data(block.shape, kind=codes.dtype.name)
self.set_data(block.values)

# write the categories
self.meta = "category"
self.metadata = np.array(block.values.categories, copy=False).ravel()
assert self.kind == "integer", self.kind
assert self.dtype == codes.dtype.name, codes.dtype.name

def get_atom_datetime64(self, block):
return _tables().Int64Col(shape=block.shape[0])
Expand Down Expand Up @@ -2553,7 +2560,7 @@ def validate_names(self):
def get_atom_string(self, shape, itemsize):
return _tables().StringCol(itemsize=itemsize)

def get_atom_data(self, block, kind=None):
def get_atom_data(self, shape, kind: str) -> "Col":
return self.get_atom_coltype(kind=kind)()

def get_atom_datetime64(self, block):
Expand Down