Skip to content

Commit

Permalink
Finalization nan as attribute values
Browse files Browse the repository at this point in the history
Solves #938
  • Loading branch information
MariusWirtz committed Jan 30, 2024
1 parent f1209a8 commit de9ca52
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 144 deletions.
160 changes: 81 additions & 79 deletions TM1py/Services/CellService.py
Original file line number Diff line number Diff line change
Expand Up @@ -2187,7 +2187,7 @@ def execute_mdx_csv(self, mdx: Union[str, MdxBuilder], top: int = None, skip: in
csv_dialect: 'csv.Dialect' = None, line_separator: str = "\r\n", value_separator: str = ",",
sandbox_name: str = None, include_attributes: bool = False, use_iterative_json: bool = False,
use_compact_json: bool = False, use_blob: bool = False, mdx_headers: bool = False,
**kwargs) -> (Dict, str):
**kwargs) -> str:
""" Optimized for performance. Get csv string of coordinates and values.
:param mdx: Valid MDX Query
Expand All @@ -2207,8 +2207,6 @@ def execute_mdx_csv(self, mdx: Union[str, MdxBuilder], top: int = None, skip: in
:param use_compact_json: bool
:param use_blob: Has better performance on datasets > 1M cells and lower memory footprint in any case.
:param mdx_headers: boolean, fully qualified hierarchy name as header instead of simple dimension name
:param empty_string_attribute_as_string: boolean, indicate whether you want to have empty strings (True) when
attributes are empty, or nan (False) when attributes are empty.
:return: String
"""
if use_blob:
Expand Down Expand Up @@ -2259,7 +2257,7 @@ def execute_view_csv(self, cube_name: str, view_name: str, private: bool = False
line_separator: str = "\r\n", value_separator: str = ",", sandbox_name: str = None,
use_iterative_json: bool = False, use_compact_json: bool = False, use_blob: bool = False,
arranged_axes: Tuple[List, List, List] = None, mdx_headers: bool = False,
empty_string_attribute_as_string: bool = False, **kwargs) -> (Dict, str):
**kwargs) -> str:
""" Optimized for performance. Get csv string of coordinates and values.
:param cube_name: String, name of the cube
Expand All @@ -2283,8 +2281,6 @@ def execute_view_csv(self, cube_name: str, view_name: str, private: bool = False
Allows function to skip retrieval of cellset composition.
E.g.: arranged_axes=(["Year"], ["Region","Product"], ["Period", "Version"])
:param mdx_headers: boolean, fully qualified hierarchy name as header instead of simple dimension name
:param empty_string_attribute_as_string: boolean, indicate whether you want to have empty strings (True) when
attributes are empty or nan (False) when attributes are empty.
:return: dict, String
"""
if use_blob:
Expand Down Expand Up @@ -2321,16 +2317,14 @@ def execute_view_csv(self, cube_name: str, view_name: str, private: bool = False
skip_consolidated_cells=skip_consolidated_cells,
skip_rule_derived_cells=skip_rule_derived_cells, csv_dialect=csv_dialect,
line_separator=line_separator, value_separator=value_separator,
sandbox_name=sandbox_name, mdx_headers=mdx_headers,
empty_string_attribute_as_string=empty_string_attribute_as_string, **kwargs)
sandbox_name=sandbox_name, mdx_headers=mdx_headers, **kwargs)

return self.extract_cellset_csv(
cellset_id=cellset_id, skip_zeros=skip_zeros, top=top, skip=skip,
skip_consolidated_cells=skip_consolidated_cells,
skip_rule_derived_cells=skip_rule_derived_cells, csv_dialect=csv_dialect,
line_separator=line_separator, value_separator=value_separator, sandbox_name=sandbox_name,
use_compact_json=use_compact_json, mdx_headers=mdx_headers,
empty_string_attribute_as_string=empty_string_attribute_as_string, **kwargs)
use_compact_json=use_compact_json, mdx_headers=mdx_headers, **kwargs)

def execute_mdx_elements_value_dict(self, mdx: str, top: int = None, skip: int = None, skip_zeros: bool = True,
skip_consolidated_cells: bool = False, skip_rule_derived_cells: bool = False,
Expand Down Expand Up @@ -2402,15 +2396,22 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk
:param fillna_string_attributes_value: Any, value with which to replace na if fillna_string_attributes is True
:return: Pandas Dataframe
"""
# necessary to assure column order in line with cube view

if (fillna_numeric_attributes or fillna_string_attributes) and not include_attributes:
raise ValueError('Include attributes must be True if fillna_numeric or fillna_string is True.')

# necessary to assure column order in line with cube view
if shaped:
skip_zeros = False

if use_blob:
if any([
fillna_numeric_attributes,
fillna_numeric_attributes_value,
fillna_string_attributes,
fillna_string_attributes_value]
):
raise ValueError("fillna attributes' feature must not be used with use_blob as True")

raw_csv = self.execute_mdx_csv(
mdx=mdx,
top=top,
Expand All @@ -2434,10 +2435,10 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk
sandbox_name=sandbox_name, include_attributes=include_attributes,
use_iterative_json=use_iterative_json, use_compact_json=use_compact_json,
shaped=shaped, mdx_headers=mdx_headers,
fillna_numeric=fillna_numeric_attributes,
fillna_numeric_value=fillna_numeric_attributes_value,
fillna_string=fillna_string_attributes,
fillna_string_value=fillna_string_attributes_value,
fillna_numeric_attributes=fillna_numeric_attributes,
fillna_numeric_attributes_value=fillna_numeric_attributes_value,
fillna_string_attributes=fillna_string_attributes,
fillna_string_attributes_value=fillna_string_attributes_value,
**kwargs)

@require_pandas
Expand Down Expand Up @@ -2676,8 +2677,7 @@ def execute_view_dataframe(self, cube_name: str, view_name: str, private: bool =
skip_rule_derived_cells: bool = False, sandbox_name: str = None,
use_iterative_json: bool = False, use_blob: bool = False, shaped: bool = False,
arranged_axes: Tuple[List, List, List] = None,
mdx_headers: bool = False, fillna_numeric:bool=False, fillna_string:bool = False,
fillna_numeric_value: Any = None, fillna_string_value:Any = None, **kwargs) -> 'pd.DataFrame':
mdx_headers: bool = False, **kwargs) -> 'pd.DataFrame':
""" Optimized for performance. Get Pandas DataFrame from an existing Cube View
Context dimensions are omitted in the resulting Dataframe !
Cells with Zero/null are omitted !
Expand Down Expand Up @@ -2736,9 +2736,7 @@ def execute_view_dataframe(self, cube_name: str, view_name: str, private: bool =
skip_consolidated_cells=skip_consolidated_cells,
skip_rule_derived_cells=skip_rule_derived_cells,
sandbox_name=sandbox_name, use_iterative_json=use_iterative_json,
shaped=shaped, mdx_headers=mdx_headers,
fillna_numeric=fillna_numeric, fillna_string=fillna_string,
**kwargs)
shaped=shaped, mdx_headers=mdx_headers, **kwargs)

def execute_view_cellcount(self, cube_name: str, view_name: str, private: bool = False, sandbox_name: str = None,
**kwargs) -> int:
Expand Down Expand Up @@ -3187,7 +3185,7 @@ def extract_cellset_raw(
skip_contexts=skip_contexts,
include_hierarchies=include_hierarchies,
sandbox_name=sandbox_name,
**kwargs)
**{**kwargs, 'delete_cellset': False})
cells = self.extract_cellset_cells_raw(cellset_id=cellset_id,
cell_properties=cell_properties,
top=top,
Expand Down Expand Up @@ -3698,8 +3696,6 @@ def extract_cellset_csv(
use_compact_json: bool = False,
include_headers: bool = True,
mdx_headers: bool = False,
fillna_numeric: bool = False,
fillna_string: bool = False,
**kwargs) -> str:
""" Execute cellset and return only the 'Content', in csv format
Expand All @@ -3718,16 +3714,15 @@ def extract_cellset_csv(
:param use_compact_json: boolean
:param include_headers: boolean
:param mdx_headers: boolean. Fully qualified hierarchy name as header instead of simple dimension name
:return: attributes: dict of attributes by dimension, and: Raw format from TM1.
:return: Raw format from TM1.
"""
delete_cellset = kwargs.pop('delete_cellset', True)
postpone_delete_cellset = fillna_numeric or fillna_string
delete_cellset = False if postpone_delete_cellset else delete_cellset

cube, _, rows, columns = self.extract_cellset_composition(
cellset_id,
delete_cellset=False,
sandbox_name=sandbox_name, **kwargs)
sandbox_name=sandbox_name,
**kwargs)

cellset_dict = self.extract_cellset_raw(
cellset_id,
Expand Down Expand Up @@ -3771,7 +3766,7 @@ def extract_cellset_csv_iter_json(
sandbox_name: str = None,
include_attributes: bool = False,
mdx_headers: bool = False,
**kwargs) -> tuple[dict[str, list[str]], str]:
**kwargs) -> str:
""" Execute cellset and return only the 'Content', in csv format
:param cellset_id: String; ID of existing cellset
Expand Down Expand Up @@ -3907,7 +3902,7 @@ def extract_cellset_csv_iter_json(

# comply with prior implementations: return empty string when cellset is empty
if csv_body.getvalue() == "":
return attributes_by_dimension, ""
return ""

# prepare header
if include_attributes:
Expand All @@ -3920,7 +3915,7 @@ def extract_cellset_csv_iter_json(
csv_header_writer.writerow(row_headers + column_headers + ['Value'])

cellset_response.close()
return attributes_by_dimension, csv_header.getvalue() + csv_body.getvalue().strip()
return csv_header.getvalue() + csv_body.getvalue().strip()

@require_pandas
def extract_cellset_dataframe(
Expand All @@ -3937,10 +3932,10 @@ def extract_cellset_dataframe(
use_compact_json: bool = False,
shaped: bool = False,
mdx_headers: bool = False,
fillna_numeric: bool = False,
fillna_numeric_value: Any = 0,
fillna_string: bool = False,
fillna_string_value: Any = '',
fillna_numeric_attributes: bool = False,
fillna_numeric_attributes_value: Any = 0,
fillna_string_attributes: bool = False,
fillna_string_attributes_value: Any = '',
**kwargs) -> 'pd.DataFrame':
""" Build pandas data frame from cellset_id
Expand All @@ -3966,60 +3961,67 @@ def extract_cellset_dataframe(
cellset_id=cellset_id, top=top, skip=skip, skip_zeros=skip_zeros,
skip_rule_derived_cells=skip_rule_derived_cells, skip_consolidated_cells=skip_consolidated_cells,
value_separator='~', sandbox_name=sandbox_name, include_attributes=include_attributes,
mdx_headers=mdx_headers, fillna_numeric=fillna_numeric, fillna_string=fillna_string,
fillna_numeric_value=fillna_numeric_value, fillna_string_value=fillna_string_value, **kwargs)
mdx_headers=mdx_headers, **kwargs)
else:
raw_csv = self.extract_cellset_csv(
cellset_id=cellset_id, top=top, skip=skip, skip_zeros=skip_zeros,
skip_rule_derived_cells=skip_rule_derived_cells, skip_consolidated_cells=skip_consolidated_cells,
value_separator='~', sandbox_name=sandbox_name, include_attributes=include_attributes,
use_compact_json=use_compact_json, mdx_headers=mdx_headers, fillna_numeric=fillna_numeric,
fillna_string=fillna_string, fillna_numeric_value=fillna_numeric_value,
fillna_string_value=fillna_string_value, **kwargs)

attribute_types_by_dimension = {}
use_compact_json=use_compact_json, mdx_headers=mdx_headers,
# dont delete cellset if attribute types must be retrieved later
delete_cellset=not any([fillna_string_attributes, fillna_string_attributes]), **kwargs)

if fillna_numeric or fillna_string:
_, _, rows, columns = self.extract_cellset_composition(
cellset_id,
delete_cellset=False,
sandbox_name=sandbox_name, **kwargs)

metadata = self.extract_cellset_metadata_raw(cellset_id=cellset_id,
elem_properties=['Name'],
member_properties=['Name', 'Attributes'] if include_attributes else None,
top=1,
skip=0,
skip_contexts=True,
include_hierarchies=False,
sandbox_name=sandbox_name,
delete_cellset=True,
**kwargs)

# gets the attribute names from the first member from the first tuple of each axis.
attributes_by_dimension = dict(zip(
rows + columns,
[list(member['Attributes'].keys()) for axes in metadata['Axes'][::-1] for member in
axes['Tuples'][0]['Members']]))
attribute_types_by_dimension = None
if fillna_string_attributes or fillna_string_attributes:
attribute_types_by_dimension = self._extract_attribute_types_by_dimension(
cellset_id=cellset_id,
sandbox_name=sandbox_name,
delete_cellset=True,
**kwargs)

element_service = self.get_element_service()
return build_dataframe_from_csv(raw_csv, sep="~", shaped=shaped,
fillna_numeric_attributes=fillna_numeric_attributes,
fillna_string_attributes=fillna_string_attributes,
fillna_numeric_attributes_value=fillna_numeric_attributes_value,
fillna_string_attributes_value=fillna_string_attributes_value,
attribute_types_by_dimension=attribute_types_by_dimension, **kwargs)

for dimension in rows + columns:
attribute_types_by_dimension[dimension] = element_service.get_element_types(
'}ElementAttributes_' + dimension.split('].[')[0][1:],
'}ElementAttributes_' + dimension.split('].[')[0][1:])
def _extract_attribute_types_by_dimension(self, cellset_id: str, sandbox_name: str, delete_cellset: bool, **kwargs):
attribute_types_by_dimension = {}

attribute_types_by_dimension[dimension] = {
attribute_name: attribute_type for attribute_name, attribute_type in
attribute_types_by_dimension[dimension].items()
if attribute_name in attributes_by_dimension[dimension]}
_, _, rows, columns = self.extract_cellset_composition(
cellset_id,
delete_cellset=False,
sandbox_name=sandbox_name, **kwargs)

return build_dataframe_from_csv(raw_csv, sep="~", shaped=shaped,
fillna_numeric=fillna_numeric,
fillna_string=fillna_string,
fillna_numeric_value=fillna_numeric_value,
fillna_string_value=fillna_string_value,
attribute_types_by_dimension=attribute_types_by_dimension, **kwargs)
metadata = self.extract_cellset_metadata_raw(
cellset_id=cellset_id,
elem_properties=['Name'],
member_properties=['Name', 'Attributes'],
top=1,
skip=0,
skip_contexts=True,
include_hierarchies=False,
sandbox_name=sandbox_name,
delete_cellset=delete_cellset,
**kwargs)
# gets the attribute names from the first member from the first tuple of each axis.
attributes_by_dimension = dict(zip(
rows + columns,
[list(member['Attributes'].keys()) for axes in metadata['Axes'][::-1] for member in
axes['Tuples'][0]['Members']]))
element_service = self.get_element_service()
for dimension in rows + columns:
attribute_types_by_dimension[dimension] = element_service.get_element_types(
'}ElementAttributes_' + dimension.split('].[')[0][1:],
'}ElementAttributes_' + dimension.split('].[')[0][1:])

attribute_types_by_dimension[dimension] = {
attribute_name: attribute_type for attribute_name, attribute_type in
attribute_types_by_dimension[dimension].items()
if attribute_name in attributes_by_dimension[dimension]}

return attribute_types_by_dimension

@tidy_cellset
@require_pandas
Expand Down
Loading

0 comments on commit de9ca52

Please sign in to comment.