From ef3bc6de044a85757cb178636339f7e5062b1724 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Fri, 10 Jun 2022 15:30:35 +0800 Subject: [PATCH 1/3] feat: treemap migration --- ...dd_advanced_data_types_to_column_models.py | 4 +- ...4_c747c78868b6_migrating_legacy_treemap.py | 104 +++++++++++++++ superset/utils/migrate_viz.py | 125 ++++++++++++++++++ tests/unit_tests/conftest.py | 5 +- .../viz_migration/treemap_migration_test.py | 93 +++++++++++++ 5 files changed, 329 insertions(+), 2 deletions(-) create mode 100644 superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py create mode 100644 superset/utils/migrate_viz.py create mode 100644 tests/unit_tests/utils/viz_migration/treemap_migration_test.py diff --git a/superset/migrations/versions/2021-05-27_16-10_6f139c533bea_add_advanced_data_types_to_column_models.py b/superset/migrations/versions/2021-05-27_16-10_6f139c533bea_add_advanced_data_types_to_column_models.py index bbbee47fad239..114716c3dace6 100644 --- a/superset/migrations/versions/2021-05-27_16-10_6f139c533bea_add_advanced_data_types_to_column_models.py +++ b/superset/migrations/versions/2021-05-27_16-10_6f139c533bea_add_advanced_data_types_to_column_models.py @@ -14,10 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""adding_advanced_data_type.py +"""adding advanced data type to column models + Revision ID: 6f139c533bea Revises: cbe71abde154 Create Date: 2021-05-27 16:10:59.567684 + """ import sqlalchemy as sa diff --git a/superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py b/superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py new file mode 100644 index 0000000000000..ceab4e89ff594 --- /dev/null +++ b/superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Migrating legacy TreeMap + +Revision ID: c747c78868b6 +Revises: e786798587de +Create Date: 2022-06-09 22:04:17.686635 + +""" + +# revision identifiers, used by Alembic. + +revision = "c747c78868b6" +down_revision = "e786798587de" + +from alembic import op +from sqlalchemy import and_, Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +from superset import db +from superset.utils.migrate_viz import get_migrate_class, MigrateVizEnum + +treemap_processor = get_migrate_class[MigrateVizEnum.treemap] + +Base = declarative_base() + + +class Slice(Base): + __tablename__ = "slices" + + id = Column(Integer, primary_key=True) + slice_name = Column(String(250)) + viz_type = Column(String(250)) + params = Column(Text) + query_context = Column(Text) + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + slices = session.query(Slice).filter( + Slice.viz_type == treemap_processor.source_viz_type + ) + total = slices.count() + idx = 0 + for slc in slices.yield_per(100): + try: + idx += 1 + print(f"Upgrading ({idx}/{total}): {slc.slice_name}#{slc.id}") + new_viz = treemap_processor.upgrade(slc) + session.merge(new_viz) + session.commit() + except Exception as exc: + print( + "Error while processing migration: '{}'\nError: {}\n".format( + slc.slice_name, str(exc) + ) + ) + + session.close() + + +def downgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + slices = session.query(Slice).filter( + and_( + Slice.viz_type == treemap_processor.target_viz_type, + Slice.params.like("%form_data_bak%"), + ) + ) + total = slices.count() + idx = 0 + for slc in slices.yield_per(100): + try: + idx += 1 + print(f"Downgrading ({idx}/{total}): {slc.slice_name}#{slc.id}") + new_viz = treemap_processor.downgrade(slc) + session.merge(new_viz) + session.commit() + except Exception as exc: + print( + "Error while processing migration: '{}'\nError: {}\n".format( + slc.slice_name, str(exc) + ) + ) + + session.close() diff --git a/superset/utils/migrate_viz.py b/superset/utils/migrate_viz.py new file mode 100644 index 0000000000000..1d279c79a4575 --- /dev/null +++ b/superset/utils/migrate_viz.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +from enum import Enum +from typing import Dict, Set, Type, TYPE_CHECKING + +if TYPE_CHECKING: + from superset.models.slice import Slice + + +# pylint: disable=invalid-name +class MigrateVizEnum(str, Enum): + # the Enum member name is viz_type in database + treemap = "treemap" + + +class MigrateViz: + remove_keys: Set[str] = set() + + mapping_keys: Dict[str, str] = {} + + source_viz_type: str + + target_viz_type: str + + def __init__(self, form_data: str) -> None: + self.data = json.loads(form_data) + + def _pre_action(self) -> None: + """some actions before migrate""" + + def _migrate(self) -> None: + if self.data.get("viz_type") != self.source_viz_type: + return + + if "viz_type" in self.data: + self.data["viz_type"] = self.target_viz_type + + rv_data = {} + for (key, value) in self.data.items(): + if key in self.mapping_keys and self.mapping_keys[key] in rv_data: + raise ValueError("Duplicate key in target viz") + + if key in self.mapping_keys: + rv_data[self.mapping_keys[key]] = value + + if key in self.remove_keys: + continue + + rv_data[key] = value + + self.data = rv_data + + def _post_action(self) -> None: + """some actions after migrate""" + + @classmethod + def upgrade(cls, slc: Slice) -> Slice: + clz = cls(slc.params) + slc.viz_type = cls.target_viz_type + form_data_bak = clz.data.copy() + + clz._pre_action() + clz._migrate() + clz._post_action() + + # only backup params + slc.params = json.dumps({**clz.data, "form_data_bak": form_data_bak}) + + query_context = json.loads(slc.query_context or "{}") + if "form_data" in query_context: + query_context["form_data"] = clz.data + slc.query_context = json.dumps(query_context) + return slc + + @classmethod + def downgrade(cls, slc: Slice) -> Slice: + form_data = json.loads(slc.params) + if "form_data_bak" in form_data and "viz_type" in form_data.get( + "form_data_bak" + ): + form_data_bak = form_data["form_data_bak"] + slc.params = json.dumps(form_data_bak) + slc.viz_type = form_data_bak.get("viz_type") + + query_context = json.loads(slc.query_context or "{}") + if "form_data" in query_context: + query_context["form_data"] = form_data_bak + slc.query_context = json.dumps(query_context) + return slc + + +class MigrateTreeMap(MigrateViz): + source_viz_type = "treemap" + target_viz_type = "treemap_v2" + remove_keys = {"metrics"} + + def _pre_action(self) -> None: + if ( + "metrics" in self.data + and isinstance(self.data["metrics"], list) + and len(self.data["metrics"]) > 0 + ): + self.data["metric"] = self.data["metrics"][0] + + +get_migrate_class: Dict[MigrateVizEnum, Type[MigrateViz]] = { + MigrateVizEnum.treemap: MigrateTreeMap, +} diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index 4560617d4bcb8..1403e31249402 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -17,6 +17,7 @@ # pylint: disable=redefined-outer-name, import-outside-toplevel import importlib +import os from typing import Any, Callable, Iterator import pytest @@ -69,7 +70,9 @@ def app() -> Iterator[SupersetApp]: app = SupersetApp(__name__) app.config.from_object("superset.config") - app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite://" + app.config["SQLALCHEMY_DATABASE_URI"] = ( + os.environ.get("SUPERSET__SQLALCHEMY_DATABASE_URI") or "sqlite://" + ) app.config["WTF_CSRF_ENABLED"] = False app.config["PREVENT_UNSAFE_DB_CONNECTIONS"] = False app.config["TESTING"] = True diff --git a/tests/unit_tests/utils/viz_migration/treemap_migration_test.py b/tests/unit_tests/utils/viz_migration/treemap_migration_test.py new file mode 100644 index 0000000000000..4bec5dec830fa --- /dev/null +++ b/tests/unit_tests/utils/viz_migration/treemap_migration_test.py @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import json + +from superset.app import SupersetApp +from superset.utils.migrate_viz import get_migrate_class, MigrateVizEnum + +treemap_form_data = """{ + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": [ + "Edward" + ], + "expressionType": "SIMPLE", + "filterOptionName": "filter_xhbus6irfa_r10k9nwmwy", + "isExtra": false, + "isNew": false, + "operator": "IN", + "operatorId": "IN", + "sqlExpression": null, + "subject": "name" + } + ], + "color_scheme": "bnbColors", + "datasource": "2__table", + "extra_form_data": {}, + "granularity_sqla": "ds", + "groupby": [ + "state", + "gender" + ], + "metrics": [ + "sum__num" + ], + "number_format": ",d", + "order_desc": true, + "row_limit": 10, + "time_range": "No filter", + "timeseries_limit_metric": "sum__num", + "treemap_ratio": 1.618033988749895, + "viz_type": "treemap" +} +""" + +treemap_processor = get_migrate_class[MigrateVizEnum.treemap] + + +def test_treemap_migrate(app_context: SupersetApp) -> None: + from superset.models.slice import Slice + + slc = Slice( + viz_type="treemap", + datasource_type="table", + params=treemap_form_data, + query_context=f'{{"form_data": {treemap_form_data}}}', + ) + + slc = treemap_processor.upgrade(slc) + assert slc.viz_type == treemap_processor.target_viz_type + # verify form_data + new_form_data = json.loads(slc.params) + assert new_form_data["metric"] == "sum__num" + assert new_form_data["viz_type"] == "treemap_v2" + assert "metrics" not in new_form_data + assert json.dumps(new_form_data["form_data_bak"], sort_keys=True) == json.dumps( + json.loads(treemap_form_data), sort_keys=True + ) + + # verify query_context + new_query_context = json.loads(slc.query_context) + assert new_query_context["form_data"]["viz_type"] == "treemap_v2" + + # downgrade + slc = treemap_processor.downgrade(slc) + assert slc.viz_type == treemap_processor.source_viz_type + assert json.dumps(json.loads(slc.params), sort_keys=True) == json.dumps( + json.loads(treemap_form_data), sort_keys=True + ) From 0d10fb3da6c09f8137688ce1c1cfebb0bcb7accd Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 30 Jun 2022 10:37:28 +0800 Subject: [PATCH 2/3] refine migration --- ...2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename superset/migrations/versions/{2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py => 2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py} (97%) diff --git a/superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py b/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py similarity index 97% rename from superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py rename to superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py index ceab4e89ff594..8ccd122258144 100644 --- a/superset/migrations/versions/2022-06-09_22-04_c747c78868b6_migrating_legacy_treemap.py +++ b/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py @@ -18,14 +18,14 @@ Revision ID: c747c78868b6 Revises: e786798587de -Create Date: 2022-06-09 22:04:17.686635 +Create Date: 2022-06-30 22:04:17.686635 """ # revision identifiers, used by Alembic. revision = "c747c78868b6" -down_revision = "e786798587de" +down_revision = "7fb8bca906d2" from alembic import op from sqlalchemy import and_, Column, Integer, String, Text From b1d10bd30d6369d4919fba9a50c77bf76bc156d1 Mon Sep 17 00:00:00 2001 From: Yongjie Zhao Date: Thu, 7 Jul 2022 10:15:19 +0800 Subject: [PATCH 3/3] addressing comments --- ...6-30_22-04_c747c78868b6_migrating_legacy_treemap.py | 10 ++++------ superset/utils/migrate_viz.py | 5 +---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py b/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py index 8ccd122258144..c420af5fca120 100644 --- a/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py +++ b/superset/migrations/versions/2022-06-30_22-04_c747c78868b6_migrating_legacy_treemap.py @@ -58,20 +58,19 @@ def upgrade(): ) total = slices.count() idx = 0 - for slc in slices.yield_per(100): + for slc in slices.yield_per(1000): try: idx += 1 print(f"Upgrading ({idx}/{total}): {slc.slice_name}#{slc.id}") new_viz = treemap_processor.upgrade(slc) session.merge(new_viz) - session.commit() except Exception as exc: print( "Error while processing migration: '{}'\nError: {}\n".format( slc.slice_name, str(exc) ) ) - + session.commit() session.close() @@ -87,18 +86,17 @@ def downgrade(): ) total = slices.count() idx = 0 - for slc in slices.yield_per(100): + for slc in slices.yield_per(1000): try: idx += 1 print(f"Downgrading ({idx}/{total}): {slc.slice_name}#{slc.id}") new_viz = treemap_processor.downgrade(slc) session.merge(new_viz) - session.commit() except Exception as exc: print( "Error while processing migration: '{}'\nError: {}\n".format( slc.slice_name, str(exc) ) ) - + session.commit() session.close() diff --git a/superset/utils/migrate_viz.py b/superset/utils/migrate_viz.py index 1d279c79a4575..65ae467cb803f 100644 --- a/superset/utils/migrate_viz.py +++ b/superset/utils/migrate_viz.py @@ -32,11 +32,8 @@ class MigrateVizEnum(str, Enum): class MigrateViz: remove_keys: Set[str] = set() - mapping_keys: Dict[str, str] = {} - source_viz_type: str - target_viz_type: str def __init__(self, form_data: str) -> None: @@ -53,7 +50,7 @@ def _migrate(self) -> None: self.data["viz_type"] = self.target_viz_type rv_data = {} - for (key, value) in self.data.items(): + for key, value in self.data.items(): if key in self.mapping_keys and self.mapping_keys[key] in rv_data: raise ValueError("Duplicate key in target viz")