Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add update_or_create_hierarchy_from_dataframe Function #944

Merged
merged 3 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion TM1py/Services/ElementService.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def add_edges(self, dimension_name: str, hierarchy_name: str = None, edges: Dict

return self._rest.POST(url=url, data=json.dumps(body), **kwargs)

def add_elements(self, dimension_name: str, hierarchy_name: str, elements: List[Element], **kwargs):
def add_elements(self, dimension_name: str, hierarchy_name: str, elements: Iterable[Element], **kwargs):
""" Add elements to hierarchy. Fails if one element already exists.

:param dimension_name:
Expand Down
285 changes: 282 additions & 3 deletions TM1py/Services/HierarchyService.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# -*- coding: utf-8 -*-

try:
import pandas as pd

_has_pandas = True
except ImportError:
_has_pandas = False

import json
import math
from typing import Dict, Tuple, List, Optional

from requests import Response

from TM1py.Objects import Hierarchy, Element, ElementAttribute
from TM1py.Exceptions import TM1pyRestException
from TM1py.Objects import Hierarchy, Element, ElementAttribute, Dimension
from TM1py.Services.ElementService import ElementService
from TM1py.Services.ObjectService import ObjectService
from TM1py.Services.RestService import RestService
from TM1py.Services.SubsetService import SubsetService
from TM1py.Utils.Utils import case_and_space_insensitive_equals, format_url, CaseAndSpaceInsensitiveDict, \
CaseAndSpaceInsensitiveSet
CaseAndSpaceInsensitiveSet, CaseAndSpaceInsensitiveTuplesDict, require_pandas, require_admin


class HierarchyService(ObjectService):
Expand All @@ -27,6 +37,30 @@ def __init__(self, rest: RestService):
self.subsets = SubsetService(rest)
self.elements = ElementService(rest)

@staticmethod
def _validate_alias_uniqueness(df: 'pd.DataFrame'):
# map alias values against their principal element name
seen_values = CaseAndSpaceInsensitiveDict()

for row in df.itertuples(index=False):
normalized_row = tuple(col.strip().lower() for col in row)
element_name, *alias_values = normalized_row
# Register e.g. 'Deutschand' -> 'Deutschand'
seen_values[element_name] = element_name

for value in alias_values:
if not value:
continue

if value in seen_values:
# Duplicate entries
if case_and_space_insensitive_equals(element_name, seen_values[value]):
continue

raise ValueError(f"Invalid alias value found in record {tuple(row)}")
# Register e.g. 'Deutschand' -> 'Germany'
seen_values[value] = element_name

def create(self, hierarchy: Hierarchy, **kwargs):
""" Create a hierarchy in an existing dimension

Expand Down Expand Up @@ -110,7 +144,7 @@ def update(self, hierarchy: Hierarchy, keep_existing_attributes=False, **kwargs)
def update_or_create(self, hierarchy: Hierarchy, **kwargs):
""" update if exists else create

:param Hierarchy:
:param hierarchy:
:return:
"""
if self.exists(dimension_name=hierarchy.dimension_name, hierarchy_name=hierarchy.name, **kwargs):
Expand Down Expand Up @@ -331,3 +365,248 @@ def is_balanced(self, dimension_name: str, hierarchy_name: str, **kwargs):
return False
else:
raise RuntimeError(f"Unexpected return value from TM1 API request: {str(structure)}")

@require_pandas
@require_admin
def update_or_create_hierarchy_from_dataframe(
self,
dimension_name: str,
hierarchy_name: str,
df: pd.DataFrame,
element_column: str = None,
verify_unique_elements: bool = False,
element_types_column: str = 'ElementType',
unwind: bool = False):
"""

:param dimension_name:
:param hierarchy_name:
:param df: pd.DataFrame
MariusWirtz marked this conversation as resolved.
Show resolved Hide resolved
:param element_types_column: str
The column name in the df which specifies which element is which type. If None, all will be considered N level.
:param element_column: str
The column name of the element ID. If None, assumes first column is the element ID.
:param unwind: bool
Unwind hierarch before creating new edges
:return:

"""
# element ID is in first column if not specified.
element_column = df.columns[0] if not element_column else element_column
df[element_column] = df[element_column].astype(str)

# assume all Numeric if no type is provided
if element_types_column not in df.columns:
df[element_types_column] = "Numeric"

# verify uniqueness of element names
if verify_unique_elements:
unique_element_names = len(set(df[element_column].str.lower().str.replace(' ', '')))
if df.shape[0] != unique_element_names:
raise ValueError("There must be no duplicates in the element column")

# verify alias uniqueness
alias_columns = tuple([col for col in df.columns if col.lower().endswith((":a", ":alias"))])
if len(alias_columns) > 0:
self._validate_alias_uniqueness(df=df[[element_column, *alias_columns]])

# identify level columns
level_columns = []
level_weight_columns = []
# sort to assure right order of levels
for column in sorted(df.columns, reverse=True):
if column.lower().startswith('level') and column[5:8].isdigit():
if len(column) == 8:
level_columns.append(column)
elif len(column) == 15 and column.lower().endswith('_weight'):
level_weight_columns.append(column)

if not len(level_columns) == len(level_weight_columns):
raise ValueError("Number of level columns must be equal to number of level weight columns")

hierarchy_exists = self.exists(dimension_name, hierarchy_name)

if not hierarchy_exists:
existing_element_identifiers = set()
else:
existing_element_identifiers = self.elements.get_all_element_identifiers(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name)

if not hierarchy_exists:
hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
dimension_service = self.get_dimension_service()
if not dimension_service.exists(dimension_name):
dimension = Dimension(name=dimension_name, hierarchies=[hierarchy])
dimension_service.create(dimension)
else:
hierarchy = Hierarchy(name=hierarchy_name, dimension_name=dimension_name)
self.create(hierarchy)

# determine new elements based on Element Name column
elements_to_create = CaseAndSpaceInsensitiveDict({
element_name: Element.Types(element_type)
for element_name, element_type
in df.loc[
~df[element_column].isin(existing_element_identifiers),
(element_column, element_types_column)
].itertuples(index=False)
})

# determine new consolidations based on level columns
for element_name in df[[*level_columns]].stack().unique():
if not element_name:
continue
if element_name in existing_element_identifiers:
continue
if element_name in elements_to_create and elements_to_create[element_name] != Element.Types.CONSOLIDATED:
raise ValueError(f"Inconsistent Type for element: '{element_name}' in hierarchy '{hierarchy_name}'")
elements_to_create[element_name] = Element.Types.CONSOLIDATED

if elements_to_create:
# add these elements to hierarchy in tm1
self.elements.add_elements(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name,
elements=(
Element(element_name, element_type)
for element_name, element_type in
elements_to_create.items()))

# identify level columns
MariusWirtz marked this conversation as resolved.
Show resolved Hide resolved
level_columns = []
level_weight_columns = []
# sort to assure right order of levels
for column in sorted(df.columns, reverse=True):
if column.lower().startswith('level') and column[5:8].isdigit():
if len(column) == 8:
level_columns.append(column)
elif len(column) == 15 and column.lower().endswith('_weight'):
level_weight_columns.append(column)

if not len(level_columns) == len(level_weight_columns):
raise ValueError("Number of level columns must be equal to number of level weight columns")

# define the attribute columns in df. Applies to all elements in df, not only new ones.
attribute_columns = df.columns.drop(
labels=[element_column] + [element_types_column] + level_columns + level_weight_columns,
errors='ignore')

# new attributes are created as strings if no type is provided
try:
existing_attributes = self.elements.get_all_element_identifiers(
dimension_name='}ElementAttributes_' + dimension_name,
hierarchy_name='}ElementAttributes_' + hierarchy_name)
except TM1pyRestException as ex:
if ex.status_code == 404:
existing_attributes = set()
else:
raise ex

new_attributes = []
for attribute_column in attribute_columns:
if ':' in attribute_column:
attribute_name, attribute_type = attribute_column.rsplit(":", maxsplit=1)
attribute_type = self._attribute_type_from_code(attribute_type)

else:
attribute_name = attribute_column
attribute_type = ElementAttribute.Types.STRING

if attribute_name not in existing_attributes:
new_attributes.append(ElementAttribute(attribute_name, attribute_type))

self.elements.add_element_attributes(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name,
element_attributes=new_attributes)

# define attributes df with ID + attribute columns.
id_attribute_cols = [element_column] + list(attribute_columns.values)
attributes_df: pd.DataFrame = df.loc[:, id_attribute_cols]

# melt for write structure (ID, Attribute) : Attribute_value
attributes_df = attributes_df.melt(
id_vars=element_column,
value_vars=attribute_columns,
var_name='}ElementAttributes_' + dimension_name,
value_name='attribute_value',)
attributes_df.fillna('', inplace=True)

# drop ':' suffix in attribute column
attribute_column = '}ElementAttributes_' + dimension_name
attributes_df[attribute_column] = attributes_df[attribute_column].apply(lambda x: x.rsplit(':', 1)[0])

# write attributes to cube
cell_service = self.get_cell_service()
cell_service.write_dataframe(
cube_name='}ElementAttributes_' + dimension_name,
data=attributes_df,
sum_numeric_duplicates=False,
use_blob=True)

if unwind:
self.remove_all_edges(dimension_name, hierarchy_name)

edges = CaseAndSpaceInsensitiveTuplesDict()
for element_name, *record in df[[element_column, *level_columns, *level_weight_columns]].itertuples(
index=False):
levels = record[:len(level_columns)]
level_weights = record[len(level_columns):]

previous_level = element_name
for level, weight in zip(levels, level_weights):
if not level:
continue
if not isinstance(level, str) and math.isnan(level):
continue
if level == previous_level:
continue

edges[level, previous_level] = weight
previous_level = level

if edges:
try:
current_edges = self.elements.get_edges(
dimension_name=dimension_name,
hierarchy_name=hierarchy_name)
except TM1pyRestException as ex:
if ex.status_code == 404:
current_edges = CaseAndSpaceInsensitiveTuplesDict()
else:
raise ex

new_edges = {
MariusWirtz marked this conversation as resolved.
Show resolved Hide resolved
(k, v): w
for (k, v), w
in edges.items()
if (k, v) not in current_edges or w != current_edges[(k, v)]}
self.elements.add_edges(dimension_name=dimension_name, hierarchy_name=hierarchy_name, edges=new_edges)

def get_dimension_service(self):
from TM1py import DimensionService
return DimensionService(self._rest)

def get_cell_service(self):
from TM1py import CellService
return CellService(self._rest)

@staticmethod
def _attribute_type_from_code(attribute_type: str) -> ElementAttribute.Types:
attribute_type = attribute_type.lower()
if attribute_type not in ["a", "s", "n"] and attribute_type not in ElementAttribute.Types:
raise ValueError(f"Attribute Type '{attribute_type}' is not a valid "
f"value: 'a', 's', 'n', 'alias', 'string', 'numeric'")

if attribute_type == 'a':
return ElementAttribute.Types.ALIAS

if attribute_type == 's':
return ElementAttribute.Types.STRING

if attribute_type == 'n':
return ElementAttribute.Types.NUMERIC

else:
return ElementAttribute.Types(attribute_type)
Loading