From a0af7ab78ac964c0b2d9d76913cb775ea5ce69ef Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Aug 2021 14:51:03 +0200 Subject: [PATCH] Implement hierarchical aggregation for Tabulator (#2624) * Implement hierarchical parameter for Tabulator * Implement aggregators and add docs * Update panel/widgets/tables.py * Fix table test --- examples/reference/widgets/Tabulator.ipynb | 26 +++++ panel/models/tabulator.py | 4 + panel/models/tabulator.ts | 109 +++++++++++++++++++-- panel/tests/widgets/test_tables.py | 3 +- panel/widgets/tables.py | 32 +++--- 5 files changed, 150 insertions(+), 24 deletions(-) diff --git a/examples/reference/widgets/Tabulator.ipynb b/examples/reference/widgets/Tabulator.ipynb index d79d2e7fb1..5f6174ad78 100644 --- a/examples/reference/widgets/Tabulator.ipynb +++ b/examples/reference/widgets/Tabulator.ipynb @@ -28,10 +28,12 @@ "\n", "##### Core\n", "\n", + "* **``aggregators``** (``dict``): A dictionary mapping from index name to an aggregator to be used for `hierarchical` multi-indexes (valid aggregators include 'min', 'max', 'mean' and 'sum'). If separate aggregators for different columns are required the dictionary may be nested as `{index_name: {column_name: aggregator}}`\n", "* **``configuration``** (``dict``): A dictionary mapping used to specify tabulator options not explicitly exposed by panel.\n", "* **``editors``** (``dict``): A dictionary mapping from column name to a bokeh `CellEditor` instance or tabulator editor specification.\n", "* **``formatters``** (``dict``): A dictionary mapping from column name to a bokeh `CellFormatter` instance or tabulator formatter specification.\n", "* **``groupby``** (`list`): Groups rows in the table by one or more columns.\n", + "* **``hierarchical``** (boolean, default=False): Whether to render multi-indexes as hierarchical index (note hierarchical must be enabled during instantiation and cannot be modified later)\n", "* **``hidden_columns``** (`list`): List of columns to hide.\n", "* **``layout``** (str): Describes the column layout mode with one of the following options `'fit_columns'`, `'fit_data'`, `'fit_data_stretch'`, `'fit_data_fill'`, `'fit_data_table'`. \n", "* **``frozen_columns``** (`list`): List of columns to freeze, preventing them from scrolling out of frame. Column can be specified by name or index.\n", @@ -555,6 +557,30 @@ "pn.widgets.Tabulator(autompg, groupby=['yr', 'origin'], height=240)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hierarchical Multi-index\n", + "\n", + "The `Tabulator` widget can also render a hierarchical multi-index and aggregate over specific categories. If a DataFrame with a hierarchical multi-index is supplied and the `hierarchical` is enabled the widget will group data by the categories in the order they are defined in. Additionally for each group in the multi-index an aggregator may be provided which will aggregate over the values in that category.\n", + "\n", + "For example we may load population data for locations around the world broken down by sex and age-group. If we specify aggregators over the 'AgeGrp' and 'Sex' indexes we can see the aggregated values for each of those groups (note that we do not have to specify an aggregator for the outer index since we specify the aggregators over the subgroups in this case the 'Sex'):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.sampledata.population import data as population_data \n", + "\n", + "pop_df = population_data[population_data.Year == 2020].set_index(['Location', 'AgeGrp', 'Sex'])[['Value']]\n", + "\n", + "pn.widgets.Tabulator(value=pop_df, hierarchical=True, aggregators={'Sex': 'sum', 'AgeGrp': 'sum'}, height=400)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/panel/models/tabulator.py b/panel/models/tabulator.py index 3be7e3d253..4376c1042d 100644 --- a/panel/models/tabulator.py +++ b/panel/models/tabulator.py @@ -55,6 +55,8 @@ class DataTabulator(HTMLBox): See http://tabulator.info/ """ + aggregators = Dict(String, String) + configuration = Dict(String, Any) columns = List(Instance(TableColumn), help=""" @@ -75,6 +77,8 @@ class DataTabulator(HTMLBox): hidden_columns = List(String) + indexes = List(String) + layout = Enum('fit_data', 'fit_data_fill', 'fit_data_stretch', 'fit_data_table', 'fit_columns', default="fit_data") source = Instance(ColumnDataSource) diff --git a/panel/models/tabulator.ts b/panel/models/tabulator.ts index adf38e7348..ec67c1d3c9 100644 --- a/panel/models/tabulator.ts +++ b/panel/models/tabulator.ts @@ -12,6 +12,86 @@ import {PanelHTMLBoxView, set_size} from "./layout" declare const Tabulator: any; +function find_group(key: any, value: string, records: any[]): any { + for (const record of records) { + if (record[key] == value) + return record + } + return null +} + +function summarize(grouped: any[], columns: any[], aggregators: string[], depth: number = 0): any { + const summary: any = {} + if (grouped.length == 0) + return summary + const agg = aggregators[depth] + for (const group of grouped) { + const subsummary = summarize(group._children, columns, aggregators, depth+1) + for (const col in subsummary) { + if (isArray(subsummary[col])) + group[col] = subsummary[col].reduce((a: any, b: any) => a + b, 0) / subsummary[col].length + else + group[col] = subsummary[col] + } + for (const column of columns.slice(1)) { + const val = group[column.field] + if (column.field in summary) { + const old_val = summary[column.field] + if (agg === 'min') + summary[column.field] = Math.min(val, old_val) + else if (agg === 'max') + summary[column.field] = Math.max(val, old_val) + else if (agg === 'sum') + summary[column.field] = val + old_val + else if (agg === 'mean') { + if (isArray(summary[column.field])) + summary[column.field].push(val) + else + summary[column.field] = [old_val, val] + } + } else + summary[column.field] = val + } + } + return summary +} + +function group_data(records: any[], columns: any[], indexes: string[], aggregators: any): any[] { + const grouped = [] + const index_field = columns[0].field + for (const record of records) { + const value = record[indexes[0]] + let group = find_group(index_field, value, grouped) + if (group == null) { + group = {_children: []} + group[index_field] = value + grouped.push(group) + } + let subgroup = group + const groups: any = {} + for (const index of indexes.slice(1)) { + subgroup = find_group(index_field, record[index], subgroup._children) + if (subgroup == null) { + subgroup = {_children: []} + subgroup[index_field] = record[index] + group._children.push(subgroup) + } + groups[index] = group + for (const column of columns.slice(1)) + subgroup[column.field] = record[column] + group = subgroup + } + for (const column of columns.slice(1)) + subgroup[column.field] = record[column.field] + } + const aggs = [] + for (const index of indexes) + aggs.push((index in aggregators) ? aggregators[index] : 'sum') + summarize(grouped, columns, aggs) + return grouped +} + + // The view of the Bokeh extension/ HTML element // Here you can define how to render the model as well as react to model changes or View events. export class DataTabulatorView extends PanelHTMLBoxView { @@ -120,10 +200,10 @@ export class DataTabulatorView extends PanelHTMLBoxView { requestPage(page: number, sorters: any[]): Promise { return new Promise((resolve: any, reject: any) => { try { - if (page != null && sorters != null) { + if (page != null && sorters != null) { this.model.page = page || 1 this.model.sorters = sorters - } + } resolve([]) } catch(err) { reject(err) @@ -182,8 +262,8 @@ export class DataTabulatorView extends PanelHTMLBoxView { paginationSize: this.model.page_size, paginationInitialPage: 1, selectableCheck: (row: any) => { - const selectable = this.model.selectable_rows - return (selectable == null) || (selectable.indexOf(row._row.data._index) >= 0) + const selectable = this.model.selectable_rows + return (selectable == null) || (selectable.indexOf(row._row.data._index) >= 0) }, tooltips: (cell: any) => { return cell.getColumn().getField() + ": " + cell.getValue(); @@ -199,6 +279,8 @@ export class DataTabulatorView extends PanelHTMLBoxView { data = [] else data = transform_cds_to_records(cds, true) + if (configuration.dataTree) + data = group_data(data, this.model.columns, this.model.indexes, this.model.aggregators) return { ...configuration, "data": data, @@ -250,7 +332,10 @@ export class DataTabulatorView extends PanelHTMLBoxView { tab_column.formatter = "tickCross" else { tab_column.formatter = (cell: any) => { - return column.formatter.doFormat(cell.getRow(), cell, cell.getValue(), null, null) + const formatted = column.formatter.doFormat(cell.getRow(), cell, cell.getValue(), null, null) + const node = div() + node.innerHTML = formatted + return node.children[0].innerHTML } } } @@ -318,7 +403,9 @@ export class DataTabulatorView extends PanelHTMLBoxView { // Update table setData(): void { - const data = transform_cds_to_records(this.model.source, true); + let data = transform_cds_to_records(this.model.source, true); + if (this.model.configuration.dataTree) + data = group_data(data, this.model.columns, this.model.indexes, this.model.aggregators) if (this.model.pagination != null) this.tabulator.rowManager.setData(data, true, false) else @@ -407,7 +494,7 @@ export class DataTabulatorView extends PanelHTMLBoxView { this._styled_cells.push(element) element.cssText = "" for (const s of style) { - let prop, value + let prop, value if (isArray(s)) [prop, value] = s else if (!s.includes(':')) @@ -519,8 +606,8 @@ export class DataTabulatorView extends PanelHTMLBoxView { const filtered = [] for (const ind of indices) { if (this.model.selectable_rows == null || - this.model.selectable_rows.indexOf(ind) >= 0) - filtered.push(ind) + this.model.selectable_rows.indexOf(ind) >= 0) + filtered.push(ind) } return filtered } @@ -550,6 +637,7 @@ export const TableLayout = Enum("fit_data", "fit_data_fill", "fit_data_stretch", export namespace DataTabulator { export type Attrs = p.AttrsOf export type Props = HTMLBox.Props & { + aggregators: p.Property columns: p.Property configuration: p.Property download: p.Property @@ -559,6 +647,7 @@ export namespace DataTabulator { frozen_rows: p.Property groupby: p.Property hidden_columns: p.Property + indexes: p.Property layout: p.Property max_page: p.Property page: p.Property @@ -590,6 +679,7 @@ export class DataTabulator extends HTMLBox { this.prototype.default_view = DataTabulatorView; this.define(({Any, Array, Boolean, Nullable, Number, Ref, String}) => ({ + aggregators: [ Any, {} ], configuration: [ Any, {} ], columns: [ Array(Ref(TableColumn)), [] ], download: [ Boolean, true ], @@ -599,6 +689,7 @@ export class DataTabulator extends HTMLBox { frozen_rows: [ Array(Number), [] ], groupby: [ Array(String), [] ], hidden_columns: [ Array(String), [] ], + indexes: [ Array(String), [] ], layout: [ TableLayout, "fit_data" ], max_page: [ Number, 0 ], pagination: [ Nullable(String), null ], diff --git a/panel/tests/widgets/test_tables.py b/panel/tests/widgets/test_tables.py index 875235065d..94b245335d 100644 --- a/panel/tests/widgets/test_tables.py +++ b/panel/tests/widgets/test_tables.py @@ -929,7 +929,8 @@ def test_tabulator_dataframe_replace_data(document, comm): assert c3.field == 'C_l0_g1' assert model.configuration == { 'columns': [{'field': 'R0'}, {'field': 'C_l0_g0'}, {'field': 'C_l0_g1'}], - 'selectable': True + 'selectable': True, + 'dataTree': False } expected = { 'C_l0_g0': np.array(['R0C0', 'R1C0'], dtype=object), diff --git a/panel/widgets/tables.py b/panel/widgets/tables.py index 122387e3f0..ac750b3caf 100644 --- a/panel/widgets/tables.py +++ b/panel/widgets/tables.py @@ -28,6 +28,13 @@ class BaseTable(ReactiveData, Widget): + aggregators = param.Dict(default={}, doc=""" + A dictionary mapping from index name to an aggregator to + be used for hierarchical multi-indexes (valid aggregators + include 'min', 'max', 'mean' and 'sum'). If separate + aggregators for different columns are required the dictionary + may be nested as `{index_name: {column_name: aggregator}}`""") + editors = param.Dict(default={}, doc=""" Bokeh CellEditor to use for a particular column (overrides the default chosen based on the type).""") @@ -36,6 +43,9 @@ class BaseTable(ReactiveData, Widget): Bokeh CellFormatter to use for a particular column (overrides the default chosen based on the type).""") + hierarchical = param.Boolean(default=False, constant=True, doc=""" + Whether to generate a hierachical index.""") + row_height = param.Integer(default=40, doc=""" The height of each table row.""") @@ -91,7 +101,7 @@ def _get_columns(self): indexes = self.indexes col_names = list(self.value.columns) - if len(indexes) == 1: + if not self.hierarchical or len(indexes) == 1: col_names = indexes + col_names else: col_names = indexes[-1:] + col_names @@ -541,13 +551,6 @@ def selected_dataframe(self): class DataFrame(BaseTable): - aggregators = param.Dict(default={}, doc=""" - A dictionary mapping from index name to an aggregator to - be used for hierarchical multi-indexes (valid aggregators - include 'min', 'max', 'mean' and 'sum'). If separate - aggregators for different columns are required the dictionary - may be nested as `{index_name: {column_name: aggregator}}`""") - auto_edit = param.Boolean(default=False, doc=""" Whether clicking on a table cell automatically starts edit mode.""") @@ -575,9 +578,6 @@ class DataFrame(BaseTable): ``"none"`` Do not automatically compute column widths.""") - hierarchical = param.Boolean(default=False, constant=True, doc=""" - Whether to generate a hierachical index.""") - fit_columns = param.Boolean(default=None, doc=""" Whether columns should expand to the available width. This results in no horizontal scrollbar showing up, but data can @@ -765,11 +765,13 @@ class Tabulator(BaseTable): _data_params = ['value', 'page', 'page_size', 'pagination', 'sorters'] - _config_params = ['frozen_columns', 'groups', 'selectable'] + _config_params = ['frozen_columns', 'groups', 'selectable', 'hierarchical'] _manual_params = BaseTable._manual_params + _config_params - _rename = {'disabled': 'editable', 'selection': None, 'selectable': 'select_mode'} + _rename = { + 'disabled': 'editable', 'selection': None, 'selectable': 'select_mode' + } def __init__(self, value=None, **params): configuration = params.pop('configuration', {}) @@ -1016,6 +1018,7 @@ def _get_properties(self, source): else: selectable = self.selectable props.update({ + 'aggregators': self.aggregators, 'source': source, 'styles': self._get_style_data(), 'columns': columns, @@ -1035,6 +1038,7 @@ def _get_properties(self, source): if self.pagination: length = 0 if self._processed is None else len(self._processed) props['max_page'] = length//self.page_size + bool(length%self.page_size) + props['indexes'] = self.indexes return props def _get_model(self, doc, root=None, parent=None, comm=None): @@ -1091,7 +1095,6 @@ def _config_columns(self, column_objs): if column.field in group_cols ] col_dict = {'field': column.field} - if isinstance(self.text_align, str): col_dict['hozAlign'] = self.text_align elif column.field in self.text_align: @@ -1140,6 +1143,7 @@ def _get_configuration(self, columns): raise ValueError("Groups must be defined either explicitly " "or via the configuration, not both.") configuration['columns'] = self._config_columns(columns) + configuration['dataTree'] = self.hierarchical return configuration def download(self, filename='table.csv'):