Skip to content

Commit

Permalink
Finish up shaped arg for _dataframe functions
Browse files Browse the repository at this point in the history
  • Loading branch information
MariusWirtz committed Apr 12, 2023
1 parent 9056758 commit dac6867
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 61 deletions.
82 changes: 55 additions & 27 deletions TM1py/Services/CellService.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,7 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk
Comes at a cost of 3-5% performance.
:param use_compact_json: bool
:param use_blob: Has better performance on datasets > 1M cells and lower memory footprint in any case.
:param shaped: preserve shape of view/mdx in data frame
:return: Pandas Dataframe
"""
if use_blob:
Expand All @@ -1980,7 +1981,7 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk
value_separator="~",
use_blob=use_blob)

return build_dataframe_from_csv(raw_csv, sep='~', shaped=shaped, **kwargs)
return build_dataframe_from_csv(raw_csv, sep='~', skip_zeros=skip_zeros, shaped=shaped, **kwargs)

cellset_id = self.create_cellset(mdx, sandbox_name=sandbox_name, **kwargs)
return self.extract_cellset_dataframe(cellset_id, top=top, skip=skip, skip_zeros=skip_zeros,
Expand All @@ -1993,25 +1994,59 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk

@require_pandas
def execute_mdx_dataframe_shaped(self, mdx: str, sandbox_name: str = None, display_attribute: bool = False,
use_iterative_json: bool = False, use_blob: bool = False,
**kwargs) -> 'pd.DataFrame':
""" Retrieves data from cube in the shape of the query.
Dimensions on rows can be stacked. One dimension must be placed on columns. Title selections are ignored.
:param mdx:
:param sandbox_name: str
:param use_blob
:param use_iterative_json
:param display_attribute: bool, show element name or first attribute from MDX PROPERTIES clause
:param kwargs:
:return:
"""
cellset_id = self.create_cellset(mdx, sandbox_name=sandbox_name)
return self.extract_cellset_dataframe_shaped(cellset_id, delete_cellset=True, sandbox_name=sandbox_name,
display_attribute=display_attribute, **kwargs)
if display_attribute and any([use_blob, use_iterative_json]):
raise ValueError("When 'use_blob' or 'use_iterative_json' is True, 'display_attribute' must be False")

# default case
if not any([use_blob, use_iterative_json]):
cellset_id = self.create_cellset(
mdx=mdx,
sandbox_name=sandbox_name)
return self.extract_cellset_dataframe_shaped(
cellset_id=cellset_id,
delete_cellset=True,
sandbox_name=sandbox_name,
display_attribute=display_attribute,
**kwargs)

if all([use_blob, use_iterative_json]):
raise ValueError("'use_blob' and 'use_iterative_json' must not be used together")

# ijson approach
if use_iterative_json:
return self.execute_mdx_dataframe(
mdx=mdx,
shaped=True,
sandbox_name=sandbox_name,
use_iterative_json=use_iterative_json,
use_blob=False,
**kwargs)

# blob approach
return self.execute_mdx_dataframe(
mdx=mdx,
shaped=True,
sandbox_name=sandbox_name,
use_blob=True,
**kwargs)

@require_pandas
def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private: bool = False,
sandbox_name: str = None, use_iterative_json: bool = False,
use_blob: bool = False,
**kwargs) -> 'pd.DataFrame':
use_blob: bool = False, **kwargs) -> 'pd.DataFrame':
""" Retrieves data from cube in the shape of the query.
Dimensions on rows can be stacked. One dimension must be placed on columns. Title selections are ignored.
Expand All @@ -2020,10 +2055,13 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private:
:param private:
:param sandbox_name: str
:param use_blob
:param use_iterative_json
:param kwargs:
:return:
"""
if not use_blob and not use_iterative_json:

# default approach
if not any([use_blob, use_iterative_json]):
cellset_id = self.create_cellset_from_view(
cube_name=cube_name,
view_name=view_name,
Expand All @@ -2035,9 +2073,10 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private:
sandbox_name=sandbox_name,
**kwargs)

if use_blob and use_iterative_json:
if all([use_blob, use_iterative_json]):
raise ValueError("'use_blob' and 'use_iterative_json' must not be used together")

# ijson approach
if use_iterative_json:
return self.execute_view_dataframe(
cube_name=cube_name,
Expand All @@ -2046,32 +2085,21 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private:
shaped=True,
sandbox_name=sandbox_name,
use_iterative_json=use_iterative_json,
# use blob for public views
use_blob=False)
use_blob=False,
**kwargs)

# case use_blob
# blob approach
if private:
raise ValueError("view must be public when 'use_blob' argument is True")

view_service = ViewService(self._rest)
view = view_service.get(cube_name=cube_name, view_name=view_name, private=False)

if isinstance(view, MDXView):
mdx = view.MDX
return self.execute_mdx_dataframe_shaped(
mdx=mdx,
sandbox_name=sandbox_name,
use_blob=True,
display_attribute=False)

return self.execute_view_dataframe(
cube_name=cube_name,
view_name=view_name,
private=private,
shaped=True,
sandbox_name=sandbox_name,
# use blob for public views
use_blob=True)
use_blob=True,
**kwargs)

@require_pandas
def execute_view_dataframe_pivot(self, cube_name: str, view_name: str, private: bool = False, dropna: bool = False,
Expand Down Expand Up @@ -2197,7 +2225,7 @@ def execute_view_dataframe(self, cube_name: str, view_name: str, private: bool =
value_separator="~",
use_blob=True,
**kwargs)
return build_dataframe_from_csv(raw_csv, sep='~', shaped=shaped, **kwargs)
return build_dataframe_from_csv(raw_csv, sep='~', skip_zeros=skip_zeros, shaped=shaped, **kwargs)

cellset_id = self.create_cellset_from_view(cube_name=cube_name, view_name=view_name, private=private,
sandbox_name=sandbox_name, **kwargs)
Expand Down Expand Up @@ -3167,7 +3195,7 @@ def extract_cellset_dataframe(
value_separator='~', sandbox_name=sandbox_name, include_attributes=include_attributes,
use_compact_json=use_compact_json, **kwargs)

return build_dataframe_from_csv(raw_csv, sep="~", shaped=shaped, **kwargs)
return build_dataframe_from_csv(raw_csv, sep="~", skip_zeros=skip_zeros, shaped=shaped, **kwargs)

@tidy_cellset
@require_pandas
Expand Down Expand Up @@ -3562,7 +3590,7 @@ def _execute_view_csv_use_blob(self, cube_name: str, view_name: str, top: int, s
# alter native-view to assure only one element per dimension in title
native_view = view_service.get_native_view(cube_name=cube_name, view_name=view_name, private=False)
for title in native_view.titles:
title.subset = AnonymousSubset(
title._subset = AnonymousSubset(
dimension_name=title.dimension_name,
elements=[title.selected])
native_view.name = view_name = unique_name
Expand Down
16 changes: 9 additions & 7 deletions TM1py/Services/ElementService.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def get_elements_dataframe(self, dimension_name: str = None, hierarchy_name: str

if not isinstance(elements, str):
if isinstance(elements, Iterable):
elements = "{" + ",".join(f"[{dimension_name}].[{hierarchy_name}].[{member}]" for member in elements) + "}"
elements = "{" + ",".join(
f"[{dimension_name}].[{hierarchy_name}].[{member}]" for member in elements) + "}"
else:
raise ValueError("Argument 'element_selection' must be None or str")

Expand Down Expand Up @@ -534,10 +535,11 @@ def get_levels_count(self, dimension_name: str, hierarchy_name: str, **kwargs) -
def get_element_types(self, dimension_name: str, hierarchy_name: str,
skip_consolidations: bool = False, **kwargs) -> CaseAndSpaceInsensitiveDict:
url = format_url(
"/api/v1/Dimensions('{}')/Hierarchies('{}')/Elements?$select=Name,Type{}",
"/api/v1/Dimensions('{}')/Hierarchies('{}')/Elements?$select=Name,Type",
dimension_name,
hierarchy_name,
"&$filter=Type ne 3" if skip_consolidations else "")
hierarchy_name)
if skip_consolidations:
url += "&$filter=Type ne 3"
response = self._rest.GET(url, **kwargs)

result = CaseAndSpaceInsensitiveDict()
Expand All @@ -548,9 +550,9 @@ def get_element_types(self, dimension_name: str, hierarchy_name: str,
def get_element_types_from_all_hierarchies(
self, dimension_name: str, skip_consolidations: bool = False, **kwargs) -> CaseAndSpaceInsensitiveDict:
url = format_url(
"/api/v1/Dimensions('{}')?$expand=Hierarchies($select=Elements;$expand=Elements($select=Name,Type{}",
dimension_name,
";$filter=Type ne 3))" if skip_consolidations else "))")
"/api/v1/Dimensions('{}')?$expand=Hierarchies($select=Elements;$expand=Elements($select=Name,Type",
dimension_name)
url += ";$filter=Type ne 3))" if skip_consolidations else "))"
response = self._rest.GET(url, **kwargs)

result = CaseAndSpaceInsensitiveDict()
Expand Down
3 changes: 2 additions & 1 deletion TM1py/Services/PowerBiService.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def execute_mdx(self, mdx, **kwargs) -> 'pd.DataFrame':
return self.cells.execute_mdx_dataframe_shaped(mdx, **kwargs)

@require_pandas
def execute_view(self, cube_name, view_name, private, use_iterative_json=False, use_blob=False, **kwargs) -> 'pd.DataFrame':
def execute_view(self, cube_name: str, view_name: str, private: bool, use_iterative_json=False, use_blob=False,
**kwargs) -> 'pd.DataFrame':
return self.cells.execute_view_dataframe_shaped(
cube_name,
view_name,
Expand Down
13 changes: 10 additions & 3 deletions TM1py/Utils/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,24 +425,31 @@ def build_csv_from_cellset_dict(
return csv_content.getvalue().strip()


def build_dataframe_from_csv(raw_csv, sep='~', shaped: bool = False, **kwargs) -> 'pd.DataFrame':
def build_dataframe_from_csv(raw_csv, sep='~', skip_zeros: bool = True, shaped: bool = False,
**kwargs) -> 'pd.DataFrame':
if not raw_csv:
return pd.DataFrame()

memory_file = StringIO(raw_csv)
# make sure all element names are strings and values column is derived from data
if 'dtype' not in kwargs:
kwargs['dtype'] = {'Value': None, **{col: str for col in range(999)}}
try:
df = pd.read_csv(StringIO(raw_csv), sep=sep, na_values=["", None], keep_default_na=False, **kwargs)
except ValueError:
# retry with dtype 'str' for results with a mixed value column
kwargs['dtype'] = {'Value': str, **{col: str for col in range(999)}}
df = pd.read_csv(StringIO(raw_csv), sep=sep, na_values=["", None], keep_default_na=False, **kwargs)

df = pd.read_csv(memory_file, sep=sep, na_values=["", None], keep_default_na=False, **kwargs)
if not shaped:
return df

# due to csv creation logic, last column is bottom dimension from the column selection
return df.pivot_table(
index=tuple(df.columns[:-2]),
aggfunc="sum",
columns=df.columns[-2],
values=df.columns[-1],
dropna=skip_zeros,
sort=False).reset_index()


Expand Down
Loading

0 comments on commit dac6867

Please sign in to comment.