Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add update_or_create_hierarchy_from_dataframe Function #944

Merged
merged 3 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion TM1py/Services/ElementService.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def add_edges(self, dimension_name: str, hierarchy_name: str = None, edges: Dict

return self._rest.POST(url=url, data=json.dumps(body), **kwargs)

def add_elements(self, dimension_name: str, hierarchy_name: str, elements: List[Element], **kwargs):
def add_elements(self, dimension_name: str, hierarchy_name: str, elements: Iterable[Element], **kwargs):
""" Add elements to hierarchy. Fails if one element already exists.

:param dimension_name:
Expand Down
309 changes: 306 additions & 3 deletions TM1py/Services/HierarchyService.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
# -*- coding: utf-8 -*-

try:
import pandas as pd

_has_pandas = True
except ImportError:
_has_pandas = False

import json
import math
from typing import Dict, Tuple, List, Optional

import networkx as nx
from requests import Response

from TM1py.Objects import Hierarchy, Element, ElementAttribute
from TM1py.Exceptions import TM1pyRestException
from TM1py.Objects import Hierarchy, Element, ElementAttribute, Dimension
from TM1py.Services.ElementService import ElementService
from TM1py.Services.ObjectService import ObjectService
from TM1py.Services.RestService import RestService
from TM1py.Services.SubsetService import SubsetService
from TM1py.Utils.Utils import case_and_space_insensitive_equals, format_url, CaseAndSpaceInsensitiveDict, \
CaseAndSpaceInsensitiveSet
CaseAndSpaceInsensitiveSet, CaseAndSpaceInsensitiveTuplesDict, require_pandas, require_admin


class HierarchyService(ObjectService):
Expand All @@ -27,6 +38,46 @@ def __init__(self, rest: RestService):
self.subsets = SubsetService(rest)
self.elements = ElementService(rest)

@staticmethod
def _validate_edges(df: 'pd.DataFrame'):
graph = nx.DiGraph()
for _, *record in df.itertuples():
child = record[0]
for parent in record[1:]:
if not parent:
continue

graph.add_edge(child, parent)
child = parent

cycles = list(nx.simple_cycles(graph))
if cycles:
raise ValueError(f"Circular reference{'s' if len(cycles) > 1 else ''} found in edges: {cycles}")

@staticmethod
def _validate_alias_uniqueness(df: 'pd.DataFrame'):
# map alias values against their principal element name
seen_values = CaseAndSpaceInsensitiveDict()

for row in df.itertuples(index=False):
normalized_row = tuple(col.strip().lower() for col in row)
element_name, *alias_values = normalized_row
# Register e.g. 'Deutschand' -> 'Deutschand'
seen_values[element_name] = element_name

for value in alias_values:
if not value:
continue

if value in seen_values:
# Duplicate entries
if case_and_space_insensitive_equals(element_name, seen_values[value]):
continue

raise ValueError(f"Invalid alias value found in record {tuple(row)}")
# Register e.g. 'Deutschand' -> 'Germany'
seen_values[value] = element_name

def create(self, hierarchy: Hierarchy, **kwargs):
""" Create a hierarchy in an existing dimension

Expand Down Expand Up @@ -110,7 +161,7 @@ def update(self, hierarchy: Hierarchy, keep_existing_attributes=False, **kwargs)
def update_or_create(self, hierarchy: Hierarchy, **kwargs):
""" update if exists else create

:param Hierarchy:
:param hierarchy:
:return:
"""
if self.exists(dimension_name=hierarchy.dimension_name, hierarchy_name=hierarchy.name, **kwargs):
Expand Down Expand Up @@ -331,3 +382,255 @@ def is_balanced(self, dimension_name: str, hierarchy_name: str, **kwargs):
return False
else:
raise RuntimeError(f"Unexpected return value from TM1 API request: {str(structure)}")

@require_pandas
@require_admin
def update_or_create_hierarchy_from_dataframe(
self,
dimension_name: str,
hierarchy_name: str,
df: pd.DataFrame,
element_column: str = None,
verify_unique_elements: bool = False,
verify_edges: bool = True,
element_type_column: str = 'ElementType',
unwind: bool = False):
""" Update or Create a hierarchy based on a dataframe, while never deleting existing elements.

:param dimension_name:
Name of the dimension
:param hierarchy_name:
Name of the hierarchy
:param df: pd.DataFrame the data frame. Example:
| | Region | ElementType | Alias:a | Currency:s | population:n | level001 | level000 | level001_weight | level000_weight |
|---:|:--------|:------------|:------------|:-----------|-------------:|:---------|:---------|----------------:|----------------:|
| 0 | France | Numeric | Frankreich | EUR | 60000000 | Europe | World | 1 | 1 |
| 1 | Belgium | Numeric | Schweiz | CHF | 9000000 | Europe | World | 1 | 1 |
| 2 | Germany | Numeric | Deutschland | EUR | 84000000 | Europe | World | 1 | 1 |

Names for the parent columns (level001, level000) are not configurable and `level000` is the top node.
All columns except for the element_column, element_type_colums and parent columns are attribute columns.
On attribute columns, you specify the type as a suffix. If no type is provided string attributes are created

:param element_type_column: str
The column name in the df which specifies which element is which type.
If None, all will be considered N level.
:param element_column: str
The column name of the element ID. If None, assumes first column is the element ID.
:param verify_unique_elements:
Abort early if element names are not unique
:param verify_edges:
Abort early if edges have circular reference
:param unwind: bool
Unwind hierarch before creating new edges
:return:

"""
# element ID is in first column if not specified.
element_column = df.columns[0] if not element_column else element_column
df[element_column] = df[element_column].astype(str)

# assume all Numeric if no type is provided
if element_type_column not in df.columns:
df[element_type_column] = "Numeric"

# verify uniqueness of element names
if verify_unique_elements:
unique_element_names = len(set(df[element_column].str.lower().str.replace(' ', '')))
if df.shape[0] != unique_element_names:
raise ValueError("There must be no duplicates in the element column")

# verify alias uniqueness
alias_columns = tuple([col for col in df.columns if col.lower().endswith((":a", ":alias"))])
if len(alias_columns) > 0:
self._validate_alias_uniqueness(df=df[[element_column, *alias_columns]])

# identify level columns
level_columns = []
level_weight_columns = []
# sort to assure right order of levels
for column in sorted(df.columns, reverse=True):
if column.lower().startswith('level') and column[5:8].isdigit():
if len(column) == 8:
level_columns.append(column)
elif len(column) == 15 and column.lower().endswith('_weight'):
level_weight_columns.append(column)

if not len(level_columns) == len(level_weight_columns):
raise ValueError("Number of level columns must be equal to number of level weight columns")

if verify_edges:
self._validate_edges(df=df[[element_column, *level_columns]])

hierarchy_exists = self.exists(dimension_name, hierarchy_name)

if not hierarchy_exists:
existing_element_identifiers = set()
else:
existing_element_identifiers = self.elements.get_all_element_identifiers(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name)

if not hierarchy_exists:
hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
dimension_service = self.get_dimension_service()
if not dimension_service.exists(dimension_name):
dimension = Dimension(name=dimension_name, hierarchies=[hierarchy])
dimension_service.create(dimension)
else:
hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
self.create(hierarchy)

# determine new elements based on Element Name column
elements_to_create = CaseAndSpaceInsensitiveDict({
element_name: Element.Types(element_type)
for element_name, element_type
in df.loc[
~df[element_column].isin(existing_element_identifiers),
(element_column, element_type_column)
].itertuples(index=False)
})

# determine new consolidations based on level columns
for element_name in df[[*level_columns]].stack().unique():
if not element_name:
continue
if element_name in existing_element_identifiers:
continue
if element_name in elements_to_create and elements_to_create[element_name] != Element.Types.CONSOLIDATED:
raise ValueError(f"Inconsistent Type for element: '{element_name}' in hierarchy '{hierarchy_name}'")
elements_to_create[element_name] = Element.Types.CONSOLIDATED

if elements_to_create:
# add these elements to hierarchy in tm1
self.elements.add_elements(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name,
elements=(
Element(element_name, element_type)
for element_name, element_type in
elements_to_create.items()))

# define the attribute columns in df. Applies to all elements in df, not only new ones.
attribute_columns = df.columns.drop(
labels=[element_column] + [element_type_column] + level_columns + level_weight_columns,
errors='ignore')

# new attributes are created as strings if no type is provided
try:
existing_attributes = self.elements.get_all_element_identifiers(
dimension_name='}ElementAttributes_' + dimension_name,
hierarchy_name='}ElementAttributes_' + hierarchy_name)
except TM1pyRestException as ex:
if ex.status_code == 404:
existing_attributes = set()
else:
raise ex

new_attributes = []
for attribute_column in attribute_columns:
if ':' in attribute_column:
attribute_name, attribute_type = attribute_column.rsplit(":", maxsplit=1)
attribute_type = self._attribute_type_from_code(attribute_type)

else:
attribute_name = attribute_column
attribute_type = ElementAttribute.Types.STRING

if attribute_name not in existing_attributes:
new_attributes.append(ElementAttribute(attribute_name, attribute_type))

self.elements.add_element_attributes(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name,
element_attributes=new_attributes)

# define attributes df with ID + attribute columns.
id_attribute_cols = [element_column] + list(attribute_columns.values)
attributes_df: pd.DataFrame = df.loc[:, id_attribute_cols]

# melt for write structure (ID, Attribute) : Attribute_value
attributes_df = attributes_df.melt(
id_vars=element_column,
value_vars=attribute_columns,
var_name='}ElementAttributes_' + dimension_name,
value_name='attribute_value', )
attributes_df.fillna('', inplace=True)

# drop ':' suffix in attribute column
attribute_column = '}ElementAttributes_' + dimension_name
attributes_df[attribute_column] = attributes_df[attribute_column].apply(lambda x: x.rsplit(':', 1)[0])

# write attributes to cube
cell_service = self.get_cell_service()
cell_service.write_dataframe(
cube_name='}ElementAttributes_' + dimension_name,
data=attributes_df,
sum_numeric_duplicates=False,
use_blob=True)

if unwind:
self.remove_all_edges(dimension_name, hierarchy_name)

edges = CaseAndSpaceInsensitiveTuplesDict()
for element_name, *record in df[[element_column, *level_columns, *level_weight_columns]].itertuples(
index=False):
levels = record[:len(level_columns)]
level_weights = record[len(level_columns):]

previous_level = element_name
for level, weight in zip(levels, level_weights):
if not level:
continue
if not isinstance(level, str) and math.isnan(level):
continue
if level == previous_level:
continue

edges[level, previous_level] = weight
previous_level = level

if edges:
try:
current_edges = self.elements.get_edges(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name)
except TM1pyRestException as ex:
if ex.status_code == 404:
current_edges = CaseAndSpaceInsensitiveTuplesDict()
else:
raise ex

new_edges = {
MariusWirtz marked this conversation as resolved.
Show resolved Hide resolved
(k, v): w
for (k, v), w
in edges.items()
if (k, v) not in current_edges or w != current_edges[(k, v)]}
self.elements.add_edges(dimension_name=dimension_name, hierarchy_name=hierarchy_name, edges=new_edges)

def get_dimension_service(self):
from TM1py import DimensionService
return DimensionService(self._rest)

def get_cell_service(self):
from TM1py import CellService
return CellService(self._rest)

@staticmethod
def _attribute_type_from_code(attribute_type: str) -> ElementAttribute.Types:
attribute_type = attribute_type.lower()
if attribute_type not in ["a", "s", "n"] and attribute_type not in ElementAttribute.Types:
raise ValueError(f"Attribute Type '{attribute_type}' is not a valid "
f"value: 'a', 's', 'n', 'alias', 'string', 'numeric'")

if attribute_type == 'a':
return ElementAttribute.Types.ALIAS

if attribute_type == 's':
return ElementAttribute.Types.STRING

if attribute_type == 'n':
return ElementAttribute.Types.NUMERIC

else:
return ElementAttribute.Types(attribute_type)
Loading