Skip to content

Commit

Permalink
REF: collect pytables DataCol.set_data calls in one place (pandas-dev…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and proost committed Dec 19, 2019
1 parent 6eea19c commit b275e0d
Showing 1 changed file with 8 additions and 51 deletions.
59 changes: 8 additions & 51 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2334,26 +2334,14 @@ def set_kind(self):
if self.typ is None:
self.typ = getattr(self.description, self.cname, None)

def set_atom(self, block, data_converted, use_str: bool):
def set_atom(self, block):
""" create and setup my atom from the block b """

# short-cut certain block types
if block.is_categorical:
self.set_atom_categorical(block)
elif block.is_datetimetz:
self.set_atom_datetime64tz(block)
elif block.is_datetime:
self.set_atom_datetime64(block)
elif block.is_timedelta:
self.set_atom_timedelta64(block)
elif block.is_complex:
self.set_atom_complex(block)

elif use_str:
self.set_atom_string(data_converted)
else:
# set as a data block
self.set_atom_data(block)

@classmethod
def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col":
Expand Down Expand Up @@ -2391,10 +2379,6 @@ def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col":
def get_atom_string(cls, shape, itemsize):
return _tables().StringCol(itemsize=itemsize, shape=shape[0])

def set_atom_string(self, data_converted: np.ndarray):
self.kind = "string"
self.set_data(data_converted)

@classmethod
def get_atom_coltype(cls, kind: str) -> Type["Col"]:
""" return the PyTables column class for this column """
Expand All @@ -2411,60 +2395,35 @@ def get_atom_coltype(cls, kind: str) -> Type["Col"]:
def get_atom_data(cls, shape, kind: str) -> "Col":
return cls.get_atom_coltype(kind=kind)(shape=shape[0])

def set_atom_complex(self, block):
self.kind = block.dtype.name
self.set_data(block.values)

def set_atom_data(self, block):
self.kind = block.dtype.name
self.set_data(block.values)

def set_atom_categorical(self, block):
# currently only supports a 1-D categorical
# in a 1-D block

values = block.values
codes = values.codes

if values.ndim > 1:
raise NotImplementedError("only support 1-d categoricals")

assert codes.dtype.name.startswith("int"), codes.dtype.name

# write the codes; must be in a block shape
self.ordered = values.ordered
self.set_data(block.values)

# write the categories
self.meta = "category"
self.metadata = np.array(block.values.categories, copy=False).ravel()
assert self.kind == "integer", self.kind
assert self.dtype == codes.dtype.name, codes.dtype.name
self.metadata = np.array(values.categories, copy=False).ravel()

@classmethod
def get_atom_datetime64(cls, shape):
return _tables().Int64Col(shape=shape[0])

def set_atom_datetime64(self, block):
self.kind = "datetime64"
self.set_data(block.values)

def set_atom_datetime64tz(self, block):

# store a converted timezone
self.tz = _get_tz(block.values.tz)

self.kind = "datetime64"
self.set_data(block.values)

@classmethod
def get_atom_timedelta64(cls, shape):
return _tables().Int64Col(shape=shape[0])

def set_atom_timedelta64(self, block):
self.kind = "timedelta64"
self.set_data(block.values)

@property
def shape(self):
return getattr(self.data, "shape", None)
Expand Down Expand Up @@ -3946,7 +3905,7 @@ def get_blk_items(mgr, blocks):
existing_col = None

new_name = name or f"values_block_{i}"
data_converted, use_str = _maybe_convert_for_string_atom(
data_converted = _maybe_convert_for_string_atom(
new_name,
b,
existing_col=existing_col,
Expand All @@ -3961,7 +3920,8 @@ def get_blk_items(mgr, blocks):
col = klass.create_for_block(i=i, name=new_name, version=self.version)
col.values = list(b_items)
col.typ = typ
col.set_atom(block=b, data_converted=data_converted, use_str=use_str)
col.set_atom(block=b)
col.set_data(data_converted)
col.update_info(self.info)
col.set_pos(j)

Expand Down Expand Up @@ -4830,10 +4790,9 @@ def _unconvert_index(data, kind: str, encoding=None, errors="strict"):
def _maybe_convert_for_string_atom(
name: str, block, existing_col, min_itemsize, nan_rep, encoding, errors
):
use_str = False

if not block.is_object:
return block.values, use_str
return block.values

dtype_name = block.dtype.name
inferred_type = lib.infer_dtype(block.values, skipna=False)
Expand All @@ -4848,9 +4807,7 @@ def _maybe_convert_for_string_atom(
)

elif not (inferred_type == "string" or dtype_name == "object"):
return block.values, use_str

use_str = True
return block.values

block = block.fillna(nan_rep, downcast=False)
if isinstance(block, list):
Expand Down Expand Up @@ -4893,7 +4850,7 @@ def _maybe_convert_for_string_atom(
itemsize = eci

data_converted = data_converted.astype(f"|S{itemsize}", copy=False)
return data_converted, use_str
return data_converted


def _convert_string_array(data, encoding, errors, itemsize=None):
Expand Down

0 comments on commit b275e0d

Please sign in to comment.