From dac6867bdb1b470da21a167196c3ae678e3d2af4 Mon Sep 17 00:00:00 2001 From: MariusWirtz Date: Tue, 11 Apr 2023 20:06:24 +0200 Subject: [PATCH] Finish up shaped arg for _dataframe functions --- TM1py/Services/CellService.py | 82 +++++++++++------ TM1py/Services/ElementService.py | 16 ++-- TM1py/Services/PowerBiService.py | 3 +- TM1py/Utils/Utils.py | 13 ++- Tests/PowerBiService_test.py | 146 ++++++++++++++++++++++++++----- Tests/ServerService_test.py | 2 + 6 files changed, 201 insertions(+), 61 deletions(-) diff --git a/TM1py/Services/CellService.py b/TM1py/Services/CellService.py index 788fbfdf..2c60ef99 100644 --- a/TM1py/Services/CellService.py +++ b/TM1py/Services/CellService.py @@ -1964,6 +1964,7 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk Comes at a cost of 3-5% performance. :param use_compact_json: bool :param use_blob: Has better performance on datasets > 1M cells and lower memory footprint in any case. + :param shaped: preserve shape of view/mdx in data frame :return: Pandas Dataframe """ if use_blob: @@ -1980,7 +1981,7 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk value_separator="~", use_blob=use_blob) - return build_dataframe_from_csv(raw_csv, sep='~', shaped=shaped, **kwargs) + return build_dataframe_from_csv(raw_csv, sep='~', skip_zeros=skip_zeros, shaped=shaped, **kwargs) cellset_id = self.create_cellset(mdx, sandbox_name=sandbox_name, **kwargs) return self.extract_cellset_dataframe(cellset_id, top=top, skip=skip, skip_zeros=skip_zeros, @@ -1993,25 +1994,59 @@ def execute_mdx_dataframe(self, mdx: Union[str, MdxBuilder], top: int = None, sk @require_pandas def execute_mdx_dataframe_shaped(self, mdx: str, sandbox_name: str = None, display_attribute: bool = False, + use_iterative_json: bool = False, use_blob: bool = False, **kwargs) -> 'pd.DataFrame': """ Retrieves data from cube in the shape of the query. Dimensions on rows can be stacked. One dimension must be placed on columns. Title selections are ignored. :param mdx: :param sandbox_name: str + :param use_blob + :param use_iterative_json :param display_attribute: bool, show element name or first attribute from MDX PROPERTIES clause :param kwargs: :return: """ - cellset_id = self.create_cellset(mdx, sandbox_name=sandbox_name) - return self.extract_cellset_dataframe_shaped(cellset_id, delete_cellset=True, sandbox_name=sandbox_name, - display_attribute=display_attribute, **kwargs) + if display_attribute and any([use_blob, use_iterative_json]): + raise ValueError("When 'use_blob' or 'use_iterative_json' is True, 'display_attribute' must be False") + + # default case + if not any([use_blob, use_iterative_json]): + cellset_id = self.create_cellset( + mdx=mdx, + sandbox_name=sandbox_name) + return self.extract_cellset_dataframe_shaped( + cellset_id=cellset_id, + delete_cellset=True, + sandbox_name=sandbox_name, + display_attribute=display_attribute, + **kwargs) + + if all([use_blob, use_iterative_json]): + raise ValueError("'use_blob' and 'use_iterative_json' must not be used together") + + # ijson approach + if use_iterative_json: + return self.execute_mdx_dataframe( + mdx=mdx, + shaped=True, + sandbox_name=sandbox_name, + use_iterative_json=use_iterative_json, + use_blob=False, + **kwargs) + + # blob approach + return self.execute_mdx_dataframe( + mdx=mdx, + shaped=True, + sandbox_name=sandbox_name, + use_blob=True, + **kwargs) @require_pandas def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private: bool = False, sandbox_name: str = None, use_iterative_json: bool = False, - use_blob: bool = False, - **kwargs) -> 'pd.DataFrame': + use_blob: bool = False, **kwargs) -> 'pd.DataFrame': """ Retrieves data from cube in the shape of the query. Dimensions on rows can be stacked. One dimension must be placed on columns. Title selections are ignored. @@ -2020,10 +2055,13 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private: :param private: :param sandbox_name: str :param use_blob + :param use_iterative_json :param kwargs: :return: """ - if not use_blob and not use_iterative_json: + + # default approach + if not any([use_blob, use_iterative_json]): cellset_id = self.create_cellset_from_view( cube_name=cube_name, view_name=view_name, @@ -2035,9 +2073,10 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private: sandbox_name=sandbox_name, **kwargs) - if use_blob and use_iterative_json: + if all([use_blob, use_iterative_json]): raise ValueError("'use_blob' and 'use_iterative_json' must not be used together") + # ijson approach if use_iterative_json: return self.execute_view_dataframe( cube_name=cube_name, @@ -2046,32 +2085,21 @@ def execute_view_dataframe_shaped(self, cube_name: str, view_name: str, private: shaped=True, sandbox_name=sandbox_name, use_iterative_json=use_iterative_json, - # use blob for public views - use_blob=False) + use_blob=False, + **kwargs) - # case use_blob + # blob approach if private: raise ValueError("view must be public when 'use_blob' argument is True") - view_service = ViewService(self._rest) - view = view_service.get(cube_name=cube_name, view_name=view_name, private=False) - - if isinstance(view, MDXView): - mdx = view.MDX - return self.execute_mdx_dataframe_shaped( - mdx=mdx, - sandbox_name=sandbox_name, - use_blob=True, - display_attribute=False) - return self.execute_view_dataframe( cube_name=cube_name, view_name=view_name, private=private, shaped=True, sandbox_name=sandbox_name, - # use blob for public views - use_blob=True) + use_blob=True, + **kwargs) @require_pandas def execute_view_dataframe_pivot(self, cube_name: str, view_name: str, private: bool = False, dropna: bool = False, @@ -2197,7 +2225,7 @@ def execute_view_dataframe(self, cube_name: str, view_name: str, private: bool = value_separator="~", use_blob=True, **kwargs) - return build_dataframe_from_csv(raw_csv, sep='~', shaped=shaped, **kwargs) + return build_dataframe_from_csv(raw_csv, sep='~', skip_zeros=skip_zeros, shaped=shaped, **kwargs) cellset_id = self.create_cellset_from_view(cube_name=cube_name, view_name=view_name, private=private, sandbox_name=sandbox_name, **kwargs) @@ -3167,7 +3195,7 @@ def extract_cellset_dataframe( value_separator='~', sandbox_name=sandbox_name, include_attributes=include_attributes, use_compact_json=use_compact_json, **kwargs) - return build_dataframe_from_csv(raw_csv, sep="~", shaped=shaped, **kwargs) + return build_dataframe_from_csv(raw_csv, sep="~", skip_zeros=skip_zeros, shaped=shaped, **kwargs) @tidy_cellset @require_pandas @@ -3562,7 +3590,7 @@ def _execute_view_csv_use_blob(self, cube_name: str, view_name: str, top: int, s # alter native-view to assure only one element per dimension in title native_view = view_service.get_native_view(cube_name=cube_name, view_name=view_name, private=False) for title in native_view.titles: - title.subset = AnonymousSubset( + title._subset = AnonymousSubset( dimension_name=title.dimension_name, elements=[title.selected]) native_view.name = view_name = unique_name diff --git a/TM1py/Services/ElementService.py b/TM1py/Services/ElementService.py index 080cd255..4edce868 100644 --- a/TM1py/Services/ElementService.py +++ b/TM1py/Services/ElementService.py @@ -124,7 +124,8 @@ def get_elements_dataframe(self, dimension_name: str = None, hierarchy_name: str if not isinstance(elements, str): if isinstance(elements, Iterable): - elements = "{" + ",".join(f"[{dimension_name}].[{hierarchy_name}].[{member}]" for member in elements) + "}" + elements = "{" + ",".join( + f"[{dimension_name}].[{hierarchy_name}].[{member}]" for member in elements) + "}" else: raise ValueError("Argument 'element_selection' must be None or str") @@ -534,10 +535,11 @@ def get_levels_count(self, dimension_name: str, hierarchy_name: str, **kwargs) - def get_element_types(self, dimension_name: str, hierarchy_name: str, skip_consolidations: bool = False, **kwargs) -> CaseAndSpaceInsensitiveDict: url = format_url( - "/api/v1/Dimensions('{}')/Hierarchies('{}')/Elements?$select=Name,Type{}", + "/api/v1/Dimensions('{}')/Hierarchies('{}')/Elements?$select=Name,Type", dimension_name, - hierarchy_name, - "&$filter=Type ne 3" if skip_consolidations else "") + hierarchy_name) + if skip_consolidations: + url += "&$filter=Type ne 3" response = self._rest.GET(url, **kwargs) result = CaseAndSpaceInsensitiveDict() @@ -548,9 +550,9 @@ def get_element_types(self, dimension_name: str, hierarchy_name: str, def get_element_types_from_all_hierarchies( self, dimension_name: str, skip_consolidations: bool = False, **kwargs) -> CaseAndSpaceInsensitiveDict: url = format_url( - "/api/v1/Dimensions('{}')?$expand=Hierarchies($select=Elements;$expand=Elements($select=Name,Type{}", - dimension_name, - ";$filter=Type ne 3))" if skip_consolidations else "))") + "/api/v1/Dimensions('{}')?$expand=Hierarchies($select=Elements;$expand=Elements($select=Name,Type", + dimension_name) + url += ";$filter=Type ne 3))" if skip_consolidations else "))" response = self._rest.GET(url, **kwargs) result = CaseAndSpaceInsensitiveDict() diff --git a/TM1py/Services/PowerBiService.py b/TM1py/Services/PowerBiService.py index e53b1408..97ef3e83 100644 --- a/TM1py/Services/PowerBiService.py +++ b/TM1py/Services/PowerBiService.py @@ -27,7 +27,8 @@ def execute_mdx(self, mdx, **kwargs) -> 'pd.DataFrame': return self.cells.execute_mdx_dataframe_shaped(mdx, **kwargs) @require_pandas - def execute_view(self, cube_name, view_name, private, use_iterative_json=False, use_blob=False, **kwargs) -> 'pd.DataFrame': + def execute_view(self, cube_name: str, view_name: str, private: bool, use_iterative_json=False, use_blob=False, + **kwargs) -> 'pd.DataFrame': return self.cells.execute_view_dataframe_shaped( cube_name, view_name, diff --git a/TM1py/Utils/Utils.py b/TM1py/Utils/Utils.py index 1a6c1af0..d800ca9a 100644 --- a/TM1py/Utils/Utils.py +++ b/TM1py/Utils/Utils.py @@ -425,24 +425,31 @@ def build_csv_from_cellset_dict( return csv_content.getvalue().strip() -def build_dataframe_from_csv(raw_csv, sep='~', shaped: bool = False, **kwargs) -> 'pd.DataFrame': +def build_dataframe_from_csv(raw_csv, sep='~', skip_zeros: bool = True, shaped: bool = False, + **kwargs) -> 'pd.DataFrame': if not raw_csv: return pd.DataFrame() - memory_file = StringIO(raw_csv) # make sure all element names are strings and values column is derived from data if 'dtype' not in kwargs: kwargs['dtype'] = {'Value': None, **{col: str for col in range(999)}} + try: + df = pd.read_csv(StringIO(raw_csv), sep=sep, na_values=["", None], keep_default_na=False, **kwargs) + except ValueError: + # retry with dtype 'str' for results with a mixed value column + kwargs['dtype'] = {'Value': str, **{col: str for col in range(999)}} + df = pd.read_csv(StringIO(raw_csv), sep=sep, na_values=["", None], keep_default_na=False, **kwargs) - df = pd.read_csv(memory_file, sep=sep, na_values=["", None], keep_default_na=False, **kwargs) if not shaped: return df # due to csv creation logic, last column is bottom dimension from the column selection return df.pivot_table( index=tuple(df.columns[:-2]), + aggfunc="sum", columns=df.columns[-2], values=df.columns[-1], + dropna=skip_zeros, sort=False).reset_index() diff --git a/Tests/PowerBiService_test.py b/Tests/PowerBiService_test.py index 1de6900c..92e46b5e 100644 --- a/Tests/PowerBiService_test.py +++ b/Tests/PowerBiService_test.py @@ -1,11 +1,9 @@ import configparser +import math import unittest -from io import StringIO from math import nan from pathlib import Path -import numpy as np -import pandas as pd from pandas import DataFrame from TM1py import MDXView, NativeView, AnonymousSubset @@ -18,27 +16,13 @@ class TestPowerBiService(unittest.TestCase): tm1: TM1Service prefix = 'TM1py_Tests_PowerBiService_' cube_name = prefix + "Cube" - view_name = prefix + "View" + mdx_view_name = prefix + "MDXView" native_view_name = prefix + "NativeView" dimension_name = prefix + "Dimension" dimension_names = [ prefix + 'Dimension1', prefix + 'Dimension2', prefix + 'Dimension3'] - string_cube_name = prefix + "StringCube" - string_dimension_names = [ - prefix + 'StringDimension1', - prefix + 'StringDimension2', - prefix + 'StringDimension3'] - cells_in_string_cube = { - ('d1e1', 'd2e1', 'd3e1'): 'String1', - ('d1e2', 'd2e2', 'd3e2'): 'String2', - ('d1e3', 'd2e3', 'd3e3'): 'String3'} - - cube_name_rps1 = prefix + "Cube" + "_RPS1" - cube_name_rps2 = prefix + "Cube" + "_RPS2" - dimension_name_rps1 = prefix + "Dimension" + "_RPS1" - dimension_name_rps2 = prefix + "Dimension" + "_RPS2" MDX_TEMPLATE = """ SELECT @@ -176,6 +160,18 @@ def setUpClass(cls): cls.tm1.views.update_or_create(view, False) + mdx = cls.MDX_TEMPLATE.format( + rows="{[" + cls.dimension_names[0] + "].[Element1], [" + cls.dimension_names[0] + "].[Element2]}", + columns="{[" + cls.dimension_names[1] + "].[Element1], [" + cls.dimension_names[1] + "].[Element2]}", + cube=cls.cube_name, + where="[" + cls.dimension_names[2] + "].[Element1]") + cls.tm1.cubes.views.update_or_create( + MDXView( + cls.cube_name, + cls.mdx_view_name, + mdx), + private=False) + def add_unbalanced_hierarchy(self, hierarchy_name): dimension = self.tm1.dimensions.get(self.dimension_name) # other hierarchy @@ -213,7 +209,7 @@ def test_execute_mdx(self): @skip_if_no_pandas def test_execute_native_view(self): - df = self.tm1.power_bi.execute_view(self.cube_name, self.native_view_name, use_blob=True, private=False) + df = self.tm1.power_bi.execute_view(self.cube_name, self.native_view_name, use_blob=False, private=False) expected_df = DataFrame( {'TM1py_Tests_PowerBiService_Dimension1': {0: 'Element 1', 1: 'Element 1', 2: 'Element 2', 3: 'Element 2'}, @@ -225,15 +221,61 @@ def test_execute_native_view(self): @skip_if_no_pandas def test_execute_view(self): + df = self.tm1.power_bi.execute_view(self.cube_name, self.mdx_view_name, private=False) + + self.assertEqual(len(df), 2) + + self.assertEqual( + tuple(df.columns), + (self.dimension_names[0], "Element 1", "Element 2")) + + element1 = df.loc[df[self.dimension_names[0]] == "Element 1"] + self.assertEqual( + tuple(element1.values[0]), + ("Element 1", "1.0", None)) + + @skip_if_no_pandas + def test_execute_mdx_use_blob(self): mdx = self.MDX_TEMPLATE.format( rows="{[" + self.dimension_names[0] + "].[Element1], [" + self.dimension_names[0] + "].[Element2]}", columns="{[" + self.dimension_names[1] + "].[Element1], [" + self.dimension_names[1] + "].[Element2]}", cube=self.cube_name, where="[" + self.dimension_names[2] + "].[Element1]") + df = self.tm1.power_bi.execute_mdx(mdx, use_blob=True, skip_zeros=False) + + self.assertEqual(len(df), 2) + + self.assertEqual( + tuple(df.columns), + (self.dimension_names[0], "Element 1", "Element 2")) - self.tm1.cubes.views.create(MDXView(self.cube_name, self.view_name, mdx), private=False) + element1 = df.loc[df[self.dimension_names[0]] == "Element 1"] + self.assertEqual( + ("Element 1", 1.0, 0.0), + tuple(element1.values[0]) + ) - df = self.tm1.power_bi.execute_view(self.cube_name, self.view_name, private=False) + @skip_if_no_pandas + def test_execute_native_view_use_blob(self): + df = self.tm1.power_bi.execute_view(self.cube_name, self.native_view_name, + use_blob=True, skip_zeros=False, private=False) + + expected_df = DataFrame( + {'TM1py_Tests_PowerBiService_Dimension1': {0: 'Element 1', 1: 'Element 1', 2: 'Element 2', 3: 'Element 2'}, + 'TM1py_Tests_PowerBiService_Dimension2': {0: 'Element 1', 1: 'Element 2', 2: 'Element 1', 3: 'Element 2'}, + 'Element 1': {0: 1.0, 1: 0, 2: 0, 3: 0}, + 'Element 2': {0: 0, 1: 0, 2: 0, 3: 1.0}}) + + self.assertEqual(expected_df.to_markdown(), df.to_markdown()) + + @skip_if_no_pandas + def test_execute_view_use_blob(self): + df = self.tm1.power_bi.execute_view( + cube_name=self.cube_name, + view_name=self.mdx_view_name, + private=False, + use_blob=True, + skip_zeros=False) self.assertEqual(len(df), 2) @@ -243,8 +285,66 @@ def test_execute_view(self): element1 = df.loc[df[self.dimension_names[0]] == "Element 1"] self.assertEqual( - tuple(element1.values[0]), - ("Element 1", "1.0", None)) + ("Element 1", 1.0, 0), + tuple(element1.values[0]) + ) + + @skip_if_no_pandas + def test_execute_mdx_use_iterative_json(self): + mdx = self.MDX_TEMPLATE.format( + rows="{[" + self.dimension_names[0] + "].[Element1], [" + self.dimension_names[0] + "].[Element2]}", + columns="{[" + self.dimension_names[1] + "].[Element1], [" + self.dimension_names[1] + "].[Element2]}", + cube=self.cube_name, + where="[" + self.dimension_names[2] + "].[Element1]") + df = self.tm1.power_bi.execute_mdx(mdx, skip_zeros=False, use_iterative_json=True) + + self.assertEqual(len(df), 2) + + self.assertEqual( + tuple(df.columns), + (self.dimension_names[0], "Element 1", "Element 2")) + + row1 = df.loc[df[self.dimension_names[0]] == "Element 1"] + self.assertEqual( + ("Element 1", 1.0, 0.0), + tuple(row1.values[0]) + ) + + @skip_if_no_pandas + def test_execute_native_view_use_iterative_json(self): + df = self.tm1.power_bi.execute_view(self.cube_name, self.native_view_name, use_iterative_json=True, + private=False, skip_zeros=False) + + expected_df = DataFrame( + {'TM1py_Tests_PowerBiService_Dimension1': {0: 'Element 1', 1: 'Element 1', 2: 'Element 2', 3: 'Element 2'}, + 'TM1py_Tests_PowerBiService_Dimension2': {0: 'Element 1', 1: 'Element 2', 2: 'Element 1', 3: 'Element 2'}, + 'Element 1': {0: '1.0', 1: 0, 2: 0, 3: 0}, + 'Element 2': {0: 0, 1: 0, 2: 0, 3: '1.0'}}) + + self.assertEqual(expected_df.to_markdown(), df.to_markdown()) + + @skip_if_no_pandas + def test_execute_view_use_iterative_json(self): + df = self.tm1.power_bi.execute_view( + cube_name=self.cube_name, + view_name=self.mdx_view_name, + private=False, + skip_zeros=False, + use_iterative_json=True) + + self.assertEqual(len(df), 2) + + self.assertEqual( + tuple(df.columns), + (self.dimension_names[0], "Element 1", "Element 2")) + + row1 = df.loc[df[self.dimension_names[0]] == "Element 1"] + row1 = df.loc[df[self.dimension_names[0]] == "Element 1"] + self.assertEqual( + ("Element 1", 1.0, 0.0), + tuple(row1.values[0]) + ) + @skip_if_no_pandas def test_get_member_properties_default(self): diff --git a/Tests/ServerService_test.py b/Tests/ServerService_test.py index 87128234..84da7dad 100644 --- a/Tests/ServerService_test.py +++ b/Tests/ServerService_test.py @@ -7,6 +7,7 @@ from datetime import timedelta from pathlib import Path +import pytest from dateutil import parser from TM1py.Exceptions import TM1pyRestException @@ -14,6 +15,7 @@ from TM1py.Services import TM1Service +@pytest.mark.skip(reason="Too slow for regular tests. Only run before releases") class TestServerService(unittest.TestCase): tm1: TM1Service