cubewise-code · MariusWirtz · Sep 19, 2023 · Jul 26, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/TM1py/Services/ElementService.py b/TM1py/Services/ElementService.py
@@ -899,7 +899,7 @@ def add_edges(self, dimension_name: str, hierarchy_name: str = None, edges: Dict
 
         return self._rest.POST(url=url, data=json.dumps(body), **kwargs)
 
-    def add_elements(self, dimension_name: str, hierarchy_name: str, elements: List[Element], **kwargs):
+    def add_elements(self, dimension_name: str, hierarchy_name: str, elements: Iterable[Element], **kwargs):
         """ Add elements to hierarchy. Fails if one element already exists.
 
         :param dimension_name:

diff --git a/TM1py/Services/HierarchyService.py b/TM1py/Services/HierarchyService.py
@@ -1,16 +1,27 @@
 # -*- coding: utf-8 -*-
+
+try:
+    import pandas as pd
+
+    _has_pandas = True
+except ImportError:
+    _has_pandas = False
+
 import json
+import math
 from typing import Dict, Tuple, List, Optional
 
+import networkx as nx
 from requests import Response
 
-from TM1py.Objects import Hierarchy, Element, ElementAttribute
+from TM1py.Exceptions import TM1pyRestException
+from TM1py.Objects import Hierarchy, Element, ElementAttribute, Dimension
 from TM1py.Services.ElementService import ElementService
 from TM1py.Services.ObjectService import ObjectService
 from TM1py.Services.RestService import RestService
 from TM1py.Services.SubsetService import SubsetService
 from TM1py.Utils.Utils import case_and_space_insensitive_equals, format_url, CaseAndSpaceInsensitiveDict, \
-    CaseAndSpaceInsensitiveSet
+    CaseAndSpaceInsensitiveSet, CaseAndSpaceInsensitiveTuplesDict, require_pandas, require_admin
 
 
 class HierarchyService(ObjectService):
@@ -27,6 +38,46 @@ def __init__(self, rest: RestService):
         self.subsets = SubsetService(rest)
         self.elements = ElementService(rest)
 
+    @staticmethod
+    def _validate_edges(df: 'pd.DataFrame'):
+        graph = nx.DiGraph()
+        for _, *record in df.itertuples():
+            child = record[0]
+            for parent in record[1:]:
+                if not parent:
+                    continue
+
+                graph.add_edge(child, parent)
+                child = parent
+
+        cycles = list(nx.simple_cycles(graph))
+        if cycles:
+            raise ValueError(f"Circular reference{'s' if len(cycles) > 1 else ''} found in edges: {cycles}")
+
+    @staticmethod
+    def _validate_alias_uniqueness(df: 'pd.DataFrame'):
+        # map alias values against their principal element name
+        seen_values = CaseAndSpaceInsensitiveDict()
+
+        for row in df.itertuples(index=False):
+            normalized_row = tuple(col.strip().lower() for col in row)
+            element_name, *alias_values = normalized_row
+            # Register e.g. 'Deutschand' -> 'Deutschand'
+            seen_values[element_name] = element_name
+
+            for value in alias_values:
+                if not value:
+                    continue
+
+                if value in seen_values:
+                    # Duplicate entries
+                    if case_and_space_insensitive_equals(element_name, seen_values[value]):
+                        continue
+
+                    raise ValueError(f"Invalid alias value found in record {tuple(row)}")
+                # Register e.g. 'Deutschand' -> 'Germany'
+                seen_values[value] = element_name
+
     def create(self, hierarchy: Hierarchy, **kwargs):
         """ Create a hierarchy in an existing dimension
 
@@ -110,7 +161,7 @@ def update(self, hierarchy: Hierarchy, keep_existing_attributes=False, **kwargs)
     def update_or_create(self, hierarchy: Hierarchy, **kwargs):
         """ update if exists else create
 
-        :param Hierarchy:
+        :param hierarchy:
         :return:
         """
         if self.exists(dimension_name=hierarchy.dimension_name, hierarchy_name=hierarchy.name, **kwargs):
@@ -331,3 +382,255 @@ def is_balanced(self, dimension_name: str, hierarchy_name: str, **kwargs):
             return False
         else:
             raise RuntimeError(f"Unexpected return value from TM1 API request: {str(structure)}")
+
+    @require_pandas
+    @require_admin
+    def update_or_create_hierarchy_from_dataframe(
+            self,
+            dimension_name: str,
+            hierarchy_name: str,
+            df: pd.DataFrame,
+            element_column: str = None,
+            verify_unique_elements: bool = False,
+            verify_edges: bool = True,
+            element_type_column: str = 'ElementType',
+            unwind: bool = False):
+        """ Update or Create a hierarchy based on a dataframe, while never deleting existing elements.
+
+        :param dimension_name:
+            Name of the dimension
+        :param hierarchy_name:
+            Name of the hierarchy
+        :param df: pd.DataFrame the data frame. Example:
+            |    | Region  | ElementType | Alias:a     | Currency:s | population:n | level001 | level000 | level001_weight | level000_weight |
+            |---:|:--------|:------------|:------------|:-----------|-------------:|:---------|:---------|----------------:|----------------:|
+            |  0 | France  | Numeric     | Frankreich  | EUR        |     60000000 | Europe   | World    |               1 |               1 |
+            |  1 | Belgium | Numeric     | Schweiz     | CHF        |      9000000 | Europe   | World    |               1 |               1 |
+            |  2 | Germany | Numeric     | Deutschland | EUR        |     84000000 | Europe   | World    |               1 |               1 |
+
+            Names for the parent columns (level001, level000) are not configurable and `level000` is the top node.
+            All columns except for the element_column, element_type_colums and parent columns are attribute columns.
+            On attribute columns, you specify the type as a suffix. If no type is provided string attributes are created
+
+        :param element_type_column: str
+            The column name in the df which specifies which element is which type.
+            If None, all will be considered N level.
+        :param element_column: str
+            The column name of the element ID. If None, assumes first column is the element ID.
+        :param verify_unique_elements:
+            Abort early if element names are not unique
+        :param verify_edges:
+            Abort early if edges have circular reference
+        :param unwind: bool
+            Unwind hierarch before creating new edges
+        :return:
+
+        """
+        # element ID is in first column if not specified.
+        element_column = df.columns[0] if not element_column else element_column
+        df[element_column] = df[element_column].astype(str)
+
+        # assume all Numeric if no type is provided
+        if element_type_column not in df.columns:
+            df[element_type_column] = "Numeric"
+
+        # verify uniqueness of element names
+        if verify_unique_elements:
+            unique_element_names = len(set(df[element_column].str.lower().str.replace(' ', '')))
+            if df.shape[0] != unique_element_names:
+                raise ValueError("There must be no duplicates in the element column")
+
+        # verify alias uniqueness
+        alias_columns = tuple([col for col in df.columns if col.lower().endswith((":a", ":alias"))])
+        if len(alias_columns) > 0:
+            self._validate_alias_uniqueness(df=df[[element_column, *alias_columns]])
+
+        # identify level columns
+        level_columns = []
+        level_weight_columns = []
+        # sort to assure right order of levels
+        for column in sorted(df.columns, reverse=True):
+            if column.lower().startswith('level') and column[5:8].isdigit():
+                if len(column) == 8:
+                    level_columns.append(column)
+                elif len(column) == 15 and column.lower().endswith('_weight'):
+                    level_weight_columns.append(column)
+
+        if not len(level_columns) == len(level_weight_columns):
+            raise ValueError("Number of level columns must be equal to number of level weight columns")
+
+        if verify_edges:
+            self._validate_edges(df=df[[element_column, *level_columns]])
+
+        hierarchy_exists = self.exists(dimension_name, hierarchy_name)
+
+        if not hierarchy_exists:
+            existing_element_identifiers = set()
+        else:
+            existing_element_identifiers = self.elements.get_all_element_identifiers(
+                dimension_name=dimension_name,
+                hierarchy_name=hierarchy_name)
+
+        if not hierarchy_exists:
+            hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
+            dimension_service = self.get_dimension_service()
+            if not dimension_service.exists(dimension_name):
+                dimension = Dimension(name=dimension_name, hierarchies=[hierarchy])
+                dimension_service.create(dimension)
+            else:
+                hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
+                self.create(hierarchy)
+
+        # determine new elements based on Element Name column
+        elements_to_create = CaseAndSpaceInsensitiveDict({
+            element_name: Element.Types(element_type)
+            for element_name, element_type
+            in df.loc[
+                ~df[element_column].isin(existing_element_identifiers),
+                (element_column, element_type_column)
+            ].itertuples(index=False)
+        })
+
+        # determine new consolidations based on level columns
+        for element_name in df[[*level_columns]].stack().unique():
+            if not element_name:
+                continue
+            if element_name in existing_element_identifiers:
+                continue
+            if element_name in elements_to_create and elements_to_create[element_name] != Element.Types.CONSOLIDATED:
+                raise ValueError(f"Inconsistent Type for element: '{element_name}' in hierarchy '{hierarchy_name}'")
+            elements_to_create[element_name] = Element.Types.CONSOLIDATED
+
+        if elements_to_create:
+            # add these elements to hierarchy in tm1
+            self.elements.add_elements(
+                dimension_name=dimension_name,
+                hierarchy_name=hierarchy_name,
+                elements=(
+                    Element(element_name, element_type)
+                    for element_name, element_type in
+                    elements_to_create.items()))
+
+        # define the attribute columns in df. Applies to all elements in df, not only new ones.
+        attribute_columns = df.columns.drop(
+            labels=[element_column] + [element_type_column] + level_columns + level_weight_columns,
+            errors='ignore')
+
+        # new attributes are created as strings if no type is provided
+        try:
+            existing_attributes = self.elements.get_all_element_identifiers(
+                dimension_name='}ElementAttributes_' + dimension_name,
+                hierarchy_name='}ElementAttributes_' + hierarchy_name)
+        except TM1pyRestException as ex:
+            if ex.status_code == 404:
+                existing_attributes = set()
+            else:
+                raise ex
+
+        new_attributes = []
+        for attribute_column in attribute_columns:
+            if ':' in attribute_column:
+                attribute_name, attribute_type = attribute_column.rsplit(":", maxsplit=1)
+                attribute_type = self._attribute_type_from_code(attribute_type)
+
+            else:
+                attribute_name = attribute_column
+                attribute_type = ElementAttribute.Types.STRING
+
+            if attribute_name not in existing_attributes:
+                new_attributes.append(ElementAttribute(attribute_name, attribute_type))
+
+        self.elements.add_element_attributes(
+            dimension_name=dimension_name,
+            hierarchy_name=hierarchy_name,
+            element_attributes=new_attributes)
+
+        # define attributes df with ID + attribute columns.
+        id_attribute_cols = [element_column] + list(attribute_columns.values)
+        attributes_df: pd.DataFrame = df.loc[:, id_attribute_cols]
+
+        # melt for write structure (ID, Attribute) : Attribute_value
+        attributes_df = attributes_df.melt(
+            id_vars=element_column,
+            value_vars=attribute_columns,
+            var_name='}ElementAttributes_' + dimension_name,
+            value_name='attribute_value', )
+        attributes_df.fillna('', inplace=True)
+
+        # drop ':' suffix in attribute column
+        attribute_column = '}ElementAttributes_' + dimension_name
+        attributes_df[attribute_column] = attributes_df[attribute_column].apply(lambda x: x.rsplit(':', 1)[0])
+
+        # write attributes to cube
+        cell_service = self.get_cell_service()
+        cell_service.write_dataframe(
+            cube_name='}ElementAttributes_' + dimension_name,
+            data=attributes_df,
+            sum_numeric_duplicates=False,
+            use_blob=True)
+
+        if unwind:
+            self.remove_all_edges(dimension_name, hierarchy_name)
+
+        edges = CaseAndSpaceInsensitiveTuplesDict()
+        for element_name, *record in df[[element_column, *level_columns, *level_weight_columns]].itertuples(
+                index=False):
+            levels = record[:len(level_columns)]
+            level_weights = record[len(level_columns):]
+
+            previous_level = element_name
+            for level, weight in zip(levels, level_weights):
+                if not level:
+                    continue
+                if not isinstance(level, str) and math.isnan(level):
+                    continue
+                if level == previous_level:
+                    continue
+
+                edges[level, previous_level] = weight
+                previous_level = level
+
+        if edges:
+            try:
+                current_edges = self.elements.get_edges(
+                    dimension_name=dimension_name,
+                    hierarchy_name=hierarchy_name)
+            except TM1pyRestException as ex:
+                if ex.status_code == 404:
+                    current_edges = CaseAndSpaceInsensitiveTuplesDict()
+                else:
+                    raise ex
+
+            new_edges = {
+                (k, v): w
+                for (k, v), w
+                in edges.items()
+                if (k, v) not in current_edges or w != current_edges[(k, v)]}
+            self.elements.add_edges(dimension_name=dimension_name, hierarchy_name=hierarchy_name, edges=new_edges)
+
+    def get_dimension_service(self):
+        from TM1py import DimensionService
+        return DimensionService(self._rest)
+
+    def get_cell_service(self):
+        from TM1py import CellService
+        return CellService(self._rest)
+
+    @staticmethod
+    def _attribute_type_from_code(attribute_type: str) -> ElementAttribute.Types:
+        attribute_type = attribute_type.lower()
+        if attribute_type not in ["a", "s", "n"] and attribute_type not in ElementAttribute.Types:
+            raise ValueError(f"Attribute Type '{attribute_type}' is not a valid "
+                             f"value: 'a', 's', 'n', 'alias', 'string', 'numeric'")
+
+        if attribute_type == 'a':
+            return ElementAttribute.Types.ALIAS
+
+        if attribute_type == 's':
+            return ElementAttribute.Types.STRING
+
+        if attribute_type == 'n':
+            return ElementAttribute.Types.NUMERIC
+
+        else:
+            return ElementAttribute.Types(attribute_type)