From b2ffe148de7971a8ca9587c4a935c652b67c26c8 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 26 Oct 2020 17:51:57 -0700 Subject: [PATCH 01/39] add black to travis --- .travis.yml | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index c087fe4d..c999278c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ install: #- npm i lux-widget # command to run tests script: + - black --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/requirements.txt b/requirements.txt index 722203d1..25e680e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ scikit-learn>=0.22 Sphinx>=3.0.2 sphinx-rtd-theme>=0.4.3 lux-widget==0.1.0 +black>=20.8b1 # Install only to use SQLExecutor # psycopg2>=2.8.5 # psycopg2-binary>=2.8.5 From 6715ebe52306367b4c3f7b39419994cc854f382b Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 26 Oct 2020 18:29:57 -0700 Subject: [PATCH 02/39] reformat all code and adjust test --- .idea/lux.iml | 12 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 237 ++++ doc/conf.py | 50 +- lux/__init__.py | 2 +- lux/_config/config.py | 269 ++-- lux/_version.py | 2 +- lux/action/__init__.py | 3 +- lux/action/column_group.py | 48 +- lux/action/correlation.py | 116 +- lux/action/custom.py | 34 +- lux/action/enhance.py | 96 +- lux/action/filter.py | 175 +-- lux/action/generalize.py | 138 ++- lux/action/row_group.py | 57 +- lux/action/similarity.py | 90 +- lux/action/univariate.py | 123 +- lux/core/__init__.py | 11 +- lux/core/frame.py | 1576 +++++++++++++----------- lux/core/series.py | 38 +- lux/executor/Executor.py | 15 +- lux/executor/PandasExecutor.py | 383 +++--- lux/executor/SQLExecutor.py | 189 ++- lux/executor/__init__.py | 3 +- lux/history/__init__.py | 3 +- lux/history/event.py | 31 +- lux/history/history.py | 53 +- lux/interestingness/__init__.py | 3 +- lux/interestingness/interestingness.py | 573 +++++---- lux/processor/Compiler.py | 867 +++++++------ lux/processor/Parser.py | 185 +-- lux/processor/Validator.py | 128 +- lux/processor/__init__.py | 3 +- lux/utils/__init__.py | 3 +- lux/utils/date_utils.py | 220 ++-- lux/utils/message.py | 26 +- lux/utils/utils.py | 116 +- lux/vis/Clause.py | 240 ++-- lux/vis/Vis.py | 578 +++++---- lux/vis/VisList.py | 609 ++++----- lux/vis/__init__.py | 4 +- lux/vislib/__init__.py | 3 +- lux/vislib/altair/AltairChart.py | 161 ++- lux/vislib/altair/AltairRenderer.py | 185 +-- lux/vislib/altair/BarChart.py | 184 +-- lux/vislib/altair/Heatmap.py | 101 +- lux/vislib/altair/Histogram.py | 121 +- lux/vislib/altair/LineChart.py | 95 +- lux/vislib/altair/ScatterChart.py | 96 +- lux/vislib/altair/__init__.py | 3 +- setup.py | 50 +- tests/__init__.py | 3 +- tests/context.py | 7 +- tests/test_action.py | 243 ++-- tests/test_compiler.py | 458 ++++--- tests/test_config.py | 235 ++-- tests/test_dates.py | 127 +- tests/test_display.py | 17 +- tests/test_error_warning.py | 38 +- tests/test_executor.py | 202 ++- tests/test_interestingness.py | 232 ++-- tests/test_maintainence.py | 62 +- tests/test_nan.py | 16 +- tests/test_pandas.py | 17 +- tests/test_pandas_coverage.py | 298 +++-- tests/test_parser.py | 105 +- tests/test_performance.py | 65 +- tests/test_type.py | 168 +-- tests/test_vis.py | 205 ++- 71 files changed, 6380 insertions(+), 4444 deletions(-) create mode 100644 .idea/lux.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/lux.iml b/.idea/lux.iml new file mode 100644 index 00000000..7c9d48f0 --- /dev/null +++ b/.idea/lux.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..65531ca9 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..e15faf5f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 00000000..1424ff2b --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1602172290063 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From d3b0320f8a0048c1c7a345010ceaa8c7fdb7f51a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Wed, 28 Oct 2020 19:06:56 -0700 Subject: [PATCH 04/39] fix contributing doc --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8f068c4f..638212f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,7 +47,7 @@ python -m pytest tests/*.py # Submitting a Pull Request -You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. +Before submitting a PR, please make sure you have formatted your code using the command `black .`. You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. Once the pull request is submitted, the maintainer will get notified and review your pull request. They may ask for additional changes or comment on the PR. You can always make updates to your pull request after submitting it. From 60b8eff9af0018b4cc6ba0e2b70df6e743bc7030 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 29 Oct 2020 00:17:58 -0700 Subject: [PATCH 05/39] small change in contributing --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index accec953..ac05767b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,7 +50,7 @@ In order to keep our codebase clean and readible, we are using PEP8 guidelines. # Submitting a Pull Request - You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. + You can commit your code and push to your forked repo. Once all of your local changes have been tested and formatted, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. Once the pull request is submitted, the maintainer will get notified and review your pull request. They may ask for additional changes or comment on the PR. You can always make updates to your pull request after submitting it. From 824dd185ce8e16824c5b35a9b4a3fa748f8910ab Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 29 Oct 2020 00:32:38 -0700 Subject: [PATCH 06/39] update --- lux/core/frame.py | 1572 ++++++++++++++++++++++++-------------------- lux/vis/VisList.py | 515 ++++++++------- 2 files changed, 1145 insertions(+), 942 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 798d7c1a..c31226d4 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -26,711 +26,867 @@ class LuxDataFrame(pd.DataFrame): - ''' - A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. - ''' - # MUST register here for new properties!! - _metadata = ['_intent','data_type_lookup','data_type', - 'data_model_lookup','data_model','unique_values','cardinality','_rec_info', '_pandas_only', - '_min_max','plot_config', '_current_vis','_widget', '_recommendation','_prev','_history', '_saved_export'] - - def __init__(self,*args, **kw): - from lux.executor.PandasExecutor import PandasExecutor - self._history = History() - self._intent = [] - self._recommendation = {} - self._saved_export = None - self._current_vis = [] - self._prev = None - super(LuxDataFrame, self).__init__(*args, **kw) - - self.executor_type = "Pandas" - self.executor = PandasExecutor() - self.SQLconnection = "" - self.table_name = "" - - self._sampled = None - self._default_pandas_display = True - self._toggle_pandas_display = True - self._plot_config = None - self._message = Message() - self._pandas_only=False - # Metadata - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - @property - def _constructor(self): - return LuxDataFrame - # @property - # def _constructor_sliced(self): - # def f(*args, **kwargs): - # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 - # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') - # return f - @property - def history(self): - return self._history - def maintain_metadata(self): - if (not hasattr(self,"_metadata_fresh") or not self._metadata_fresh ): # Check that metadata has not yet been computed - if (len(self)>0): #only compute metadata information if the dataframe is non-empty - self.executor.compute_stats(self) - self.executor.compute_dataset_metadata(self) - self._infer_structure() - self._metadata_fresh = True - def expire_recs(self): - self._recs_fresh = False - self.recommendation = {} - self.current_vis = None - self._widget = None - self._rec_info = None - self._sampled = None - def expire_metadata(self): - # Set metadata as null - self._metadata_fresh = False - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - ##################### - ## Override Pandas ## - ##################### - def __getattr__(self, name): - ret_value = super(LuxDataFrame, self).__getattr__(name) - self.expire_metadata() - self.expire_recs() - return ret_value - def _set_axis(self, axis, labels): - super(LuxDataFrame, self)._set_axis(axis, labels) - self.expire_metadata() - self.expire_recs() - def _update_inplace(self,*args,**kwargs): - super(LuxDataFrame, self)._update_inplace(*args,**kwargs) - self.expire_metadata() - self.expire_recs() - def _set_item(self, key, value): - super(LuxDataFrame, self)._set_item(key, value) - self.expire_metadata() - self.expire_recs() - def _infer_structure(self): - # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data - is_multi_index_flag = self.index.nlevels !=1 - not_int_index_flag = self.index.dtype !='int64' - small_df_flag = len(self)<100 - self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag - if ("Number of Records" in self.columns): - self.pre_aggregated = True - very_small_df_flag = len(self)<=10 - if (very_small_df_flag): - self.pre_aggregated = True - def set_executor_type(self, exe): - if (exe =="SQL"): - import pkgutil - if (pkgutil.find_loader("psycopg2") is None): - raise ImportError("psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection.") - else: - import psycopg2 - from lux.executor.SQLExecutor import SQLExecutor - self.executor = SQLExecutor - else: - from lux.executor.PandasExecutor import PandasExecutor - self.executor = PandasExecutor() - self.executor_type = exe - @property - def plot_config(self): - return self._plot_config - @plot_config.setter - def plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to all visualizations in the dataframe display - Currently only supported for Altair visualizations - Parameters - ---------- - config_func : Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - - Example - ---------- - Changing the color of marks and adding a title for all charts displayed for this dataframe - >>> df = pd.read_csv("lux/data/car.csv") - >>> def changeColorAddTitle(chart): - chart = chart.configure_mark(color="red") # change mark color to red - chart.title = "Custom Title" # add title to chart - return chart - >>> df.plot_config = changeColorAddTitle - >>> df - Change the opacity of all scatterplots displayed for this dataframe - >>> df = pd.read_csv("lux/data/olympic.csv") - >>> def changeOpacityScatterOnly(chart): - if chart.mark=='circle': - chart = chart.configure_mark(opacity=0.1) # lower opacity - return chart - >>> df.plot_config = changeOpacityScatterOnly - >>> df - """ - self._plot_config = config_func - self._recs_fresh=False - def clear_plot_config(self): - self._plot_config = None - self._recs_fresh=False - - @property - def intent(self): - return self._intent - @intent.setter - def intent(self, intent_input:Union[List[Union[str, Clause]],Vis]): - is_list_input = isinstance(intent_input,list) - is_vis_input = isinstance(intent_input,Vis) - if not (is_list_input or is_vis_input): - raise TypeError("Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." - "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" - ) - if is_list_input: - self.set_intent(intent_input) - elif is_vis_input: - self.set_intent_as_vis(intent_input) - def clear_intent(self): - self.intent = [] - def set_intent(self, intent:List[Union[str, Clause]]): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/clause` - """ - self.expire_recs() - self._intent = intent - self._parse_validate_compile_intent() - def _parse_validate_compile_intent(self): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - self._intent = Parser.parse(self._intent) - Validator.validate_intent(self._intent,self) - self.maintain_metadata() - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - def copy_intent(self): - #creates a true copy of the dataframe's intent - output = [] - for clause in self._intent: - temp_clause = clause.copy_clause() - output.append(temp_clause) - return(output) - - def set_intent_as_vis(self,vis:Vis): - """ - Set intent of the dataframe as the Vis - - Parameters - ---------- - vis : Vis - """ - self.expire_recs() - self._intent = vis._inferred_intent - self._parse_validate_compile_intent() - - def to_pandas(self): - import lux.core - return lux.core.originalDF(self,copy=False) - - @property - def recommendation(self): - return self._recommendation - @recommendation.setter - def recommendation(self,recommendation:Dict): - self._recommendation = recommendation - @property - def current_vis(self): - return self._current_vis - @current_vis.setter - def current_vis(self,current_vis:Dict): - self._current_vis = current_vis - def __repr__(self): - # TODO: _repr_ gets called from _repr_html, need to get rid of this call - return "" - - ####################################################### - ########## SQL Metadata, type, model schema ########### - ####################################################### - - def set_SQL_connection(self, connection, t_name): - self.SQLconnection = connection - self.table_name = t_name - self.compute_SQL_dataset_metadata() - self.set_executor_type("SQL") - - def compute_SQL_dataset_metadata(self): - self.get_SQL_attributes() - for attr in list(self.columns): - self[attr] = None - self.data_type_lookup = {} - self.data_type = {} - #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this - ##### in the initialization and do it just once - self.compute_SQL_data_type() - self.compute_SQL_stats() - self.data_model_lookup = {} - self.data_model = {} - self.compute_data_model() - - def compute_SQL_stats(self): - # precompute statistics - self.unique_values = {} - self._min_max = {} - - self.get_SQL_unique_values() - #self.get_SQL_cardinality() - for attribute in self.columns: - if self.data_type_lookup[attribute] == 'quantitative': - self._min_max[attribute] = (self[attribute].min(), self[attribute].max()) - - def get_SQL_attributes(self): - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format(table_name) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)['column_name']) - for attr in attributes: - self[attr] = None - - def get_SQL_cardinality(self): - cardinality = {} - for attr in list(self.columns): - card_query = pd.read_sql("SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), self.SQLconnection) - cardinality[attr] = list(card_query["count"])[0] - self.cardinality = cardinality - - def get_SQL_unique_values(self): - unique_vals = {} - for attr in list(self.columns): - unique_query = pd.read_sql("SELECT Distinct({}) FROM {}".format(attr, self.table_name), self.SQLconnection) - unique_vals[attr] = list(unique_query[attr]) - self.unique_values = unique_vals - - def compute_SQL_data_type(self): - data_type_lookup = {} - sql_dtypes = {} - self.get_SQL_cardinality() - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - #get the data types of the attributes in the SQL table - for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(table_name, attr) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)['data_type'])[0] - sql_dtypes[attr] = datatype - - data_type = {"quantitative":[], "nominal":[], "temporal":[]} - for attr in list(self.columns): - if str(attr).lower() in ["month", "year"]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - elif sql_dtypes[attr] in ["character", "character varying", "boolean", "uuid", "text"]: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - elif sql_dtypes[attr] in ["integer", "real", "smallint", "smallserial", "serial"]: - if self.cardinality[attr] < 13: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - else: - data_type_lookup[attr] = "quantitative" - data_type["quantitative"].append(attr) - elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - self.data_type_lookup = data_type_lookup - self.data_type = data_type - def _append_rec(self,rec_infolist,recommendations:Dict): - if (recommendations["collection"] is not None and len(recommendations["collection"])>0): - rec_infolist.append(recommendations) - def maintain_recs(self): - # `rec_df` is the dataframe to generate the recommendations on - # check to see if globally defined actions have been registered/removed - if (lux.update_actions["flag"] == True): - self._recs_fresh = False - show_prev = False # flag indicating whether rec_df is showing previous df or current self - if self._prev is not None: - rec_df = self._prev - rec_df._message = Message() - rec_df.maintain_metadata() # the prev dataframe may not have been printed before - last_event = self.history._events[-1].name - rec_df._message.add(f"Lux is visualizing the previous version of the dataframe before you applied {last_event}.") - show_prev = True - else: - rec_df = self - rec_df._message = Message() - # Add warning message if there exist ID fields - id_fields_str = "" - if (len(rec_df.data_type["id"])>0): - for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " - id_fields_str = id_fields_str[:-2] - rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") - rec_df._prev = None # reset _prev - - if (not hasattr(rec_df,"_recs_fresh") or not rec_df._recs_fresh ): # Check that recs has not yet been computed - rec_infolist = [] - from lux.action.custom import custom - from lux.action.custom import custom_actions - from lux.action.correlation import correlation - from lux.action.univariate import univariate - from lux.action.enhance import enhance - from lux.action.filter import filter - from lux.action.generalize import generalize - from lux.action.row_group import row_group - from lux.action.column_group import column_group - if (rec_df.pre_aggregated): - if (rec_df.columns.name is not None): - rec_df._append_rec(rec_infolist, row_group(rec_df)) - if (rec_df.index.name is not None): - rec_df._append_rec(rec_infolist, column_group(rec_df)) - else: - if self.recommendation == {}: - # display conditions for default actions - no_vis = lambda ldf: (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) - one_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 - multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 - - # globally register default actions - lux.register_action("correlation", correlation, no_vis) - lux.register_action("distribution", univariate, no_vis, "quantitative") - lux.register_action("occurrence", univariate, no_vis, "nominal") - lux.register_action("temporal", univariate, no_vis, "temporal") - - lux.register_action("enhance", enhance, one_current_vis) - lux.register_action("filter", filter, one_current_vis) - lux.register_action("generalize", generalize, one_current_vis) - - lux.register_action("custom", custom, multiple_current_vis) - - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) - lux.update_actions["flag"] = False - - # Store _rec_info into a more user-friendly dictionary form - rec_df.recommendation = {} - for rec_info in rec_infolist: - action_type = rec_info["action"] - vlist = rec_info["collection"] - if (rec_df._plot_config): - for vis in rec_df.current_vis: vis._plot_config = rec_df.plot_config - for vis in vlist: vis._plot_config = rec_df.plot_config - if (len(vlist)>0): - rec_df.recommendation[action_type] = vlist - rec_df._rec_info = rec_infolist - self._widget = rec_df.render_widget() - elif (show_prev): # re-render widget for the current dataframe if previous rec is not recomputed - self._widget = rec_df.render_widget() - self._recs_fresh = True - - - ####################################################### - ############## LuxWidget Result Display ############### - ####################################################### - @property - def widget(self): - if(self._widget): - return self._widget - @property - def exported(self) -> Union[Dict[str,VisList], VisList]: - """ - Get selected visualizations as exported Vis List - - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Correlation': [0, 2], 'Occurrence': [1]} - indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. - - Returns - ------- - Union[Dict[str,VisList], VisList] - When there are no exported vis, return empty list -> [] - When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) - When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} - """ - if not hasattr(self,"_widget"): - warnings.warn( - "\nNo widget attached to the dataframe." - "Please assign dataframe to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst = self._widget._selectedVisIdxs - exported_vis = [] - if (exported_vis_lst=={}): - if self._saved_export: - return self._saved_export - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: - return self.current_vis - elif len(exported_vis_lst) > 1: - exported_vis = {} - if ("currentVis" in exported_vis_lst): - exported_vis["Current Vis"] = self.current_vis - for export_action in exported_vis_lst: - if (export_action != "currentVis"): - exported_vis[export_action] = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - return exported_vis - elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): - export_action = list(exported_vis_lst.keys())[0] - exported_vis = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - self._saved_export = exported_vis - return exported_vis - else: - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - - def remove_deleted_recs(self, change): - for action in self._widget.deletedIndices: - deletedSoFar = 0 - for index in self._widget.deletedIndices[action]: - self.recommendation[action].remove_index(index - deletedSoFar) - deletedSoFar += 1 - - def set_intent_on_click(self, change): - from IPython.display import display, clear_output - from lux.processor.Compiler import Compiler - - intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] - self.set_intent_as_vis(vis) - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() - - with self.output: - clear_output() - display(self._widget) - - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - def _repr_html_(self): - from IPython.display import display - from IPython.display import clear_output - import ipywidgets as widgets - - try: - if (self._pandas_only): - display(self.display_pandas()) - self._pandas_only=False - else: - if(self.index.nlevels>=2 or self.columns.nlevels >= 2): - warnings.warn( - "\nLux does not currently support dataframes " - "with hierarchical indexes.\n" - "Please convert the dataframe into a flat " - "table via `pandas.DataFrame.reset_index`.\n", - stacklevel=2, - ) - display(self.display_pandas()) - return - - if (len(self)<=0): - warnings.warn("\nLux can not operate on an empty dataframe.\nPlease check your input again.\n",stacklevel=2) - display(self.display_pandas()) - return - if (len(self.columns)<=1): - warnings.warn("\nLux defaults to Pandas when there is only a single column.",stacklevel=2) - display(self.display_pandas()) - return - self.maintain_metadata() - - if (self._intent!=[] and (not hasattr(self,"_compiled") or not self._compiled)): - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - if (lux.config.default_display == "lux"): - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True - - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() - - #Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - if len(self.recommendation) > 0: - # box = widgets.Box(layout=widgets.Layout(display='inline')) - button = widgets.Button(description="Toggle Pandas/Lux",layout=widgets.Layout(width='140px',top='5px')) - self.output = widgets.Output() - # box.children = [button,output] - # output.children = [button] - # display(box) - display(button, self.output) - def on_button_clicked(b): - with self.output: - if (b): - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if (self._toggle_pandas_display): - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - button.on_click(on_button_clicked) - on_button_clicked(None) - else: - warnings.warn("\nLux defaults to Pandas when there are no valid actions defined.",stacklevel=2) - display(self.display_pandas()) - - except(KeyboardInterrupt,SystemExit): - raise - except: - warnings.warn( - "\nUnexpected error in rendering Lux widget and recommendations. " - "Falling back to Pandas display.\n\n" - "Please report this issue on Github: https://github.com/lux-org/lux/issues " - ,stacklevel=2) - display(self.display_pandas()) - def display_pandas(self): - return self.to_pandas() - def render_widget(self, renderer:str ="altair", input_current_vis=""): - """ - Generate a LuxWidget based on the LuxDataFrame - - Structure of widgetJSON: - { - 'current_vis': {}, - 'recommendation': [ - { - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - ] - }, - ... repeat for other actions - ] - } - Parameters - ---------- - renderer : str, optional - Choice of visualization rendering library, by default "altair" - input_current_vis : lux.LuxDataFrame, optional - User-specified current vis to override default Current Vis, by default - """ - check_import_lux_widget() - import luxwidget - widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) - return luxwidget.LuxWidget( - currentVis=widgetJSON["current_vis"], - recommendations=widgetJSON["recommendation"], - intent=LuxDataFrame.intent_to_string(self._intent), - message = self._message.to_html() - ) - @staticmethod - def intent_to_JSON(intent): - from lux.utils import utils - - filter_specs = utils.get_filter_specs(intent) - attrs_specs = utils.get_attrs_specs(intent) - - intent = {} - intent['attributes'] = [clause.attribute for clause in attrs_specs] - intent['filters'] = [clause.attribute for clause in filter_specs] - return intent - @staticmethod - def intent_to_string(intent): - if (intent): - return ", ".join([clause.to_string() for clause in intent]) - else: - return "" - - def to_JSON(self, rec_infolist, input_current_vis=""): - widget_spec = {} - if (self.current_vis): - self.executor.execute(self.current_vis, self) - widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON(self.current_vis, input_current_vis) - else: - widget_spec["current_vis"] = {} - widget_spec["recommendation"] = [] - - # Recommended Collection - recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) - widget_spec["recommendation"].extend(recCollection) - return widget_spec - - @staticmethod - def current_vis_to_JSON(vlist, input_current_vis=""): - current_vis_spec = {} - numVC = len(vlist) #number of visualizations in the vis list - if (numVC==1): - current_vis_spec = vlist[0].render_VSpec() - elif (numVC>1): - pass - return current_vis_spec - - @staticmethod - def rec_to_JSON(recs): - rec_lst = [] - import copy - rec_copy = copy.deepcopy(recs) - for idx,rec in enumerate(rec_copy): - if (len(rec["collection"])>0): - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.render_VSpec() - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete DataObjectCollection since not JSON serializable - del rec_lst[idx]["collection"] - return rec_lst - - # Overridden Pandas Functions - def head(self, n: int = 5): - self._prev = self - self._history.append_event("head", n=5) - return super(LuxDataFrame, self).head(n) - - def tail(self, n: int = 5): - self._prev = self - self._history.append_event("tail", n=5) - return super(LuxDataFrame, self).tail(n) - - def info(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("info",*args, **kwargs) - return super(LuxDataFrame, self).info(*args, **kwargs) - - def describe(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("describe",*args, **kwargs) - return super(LuxDataFrame, self).describe(*args, **kwargs) + """ + A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. + """ + + # MUST register here for new properties!! + _metadata = [ + "_intent", + "data_type_lookup", + "data_type", + "data_model_lookup", + "data_model", + "unique_values", + "cardinality", + "_rec_info", + "_pandas_only", + "_min_max", + "plot_config", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + ] + + def __init__(self, *args, **kw): + from lux.executor.PandasExecutor import PandasExecutor + + self._history = History() + self._intent = [] + self._recommendation = {} + self._saved_export = None + self._current_vis = [] + self._prev = None + super(LuxDataFrame, self).__init__(*args, **kw) + + self.executor_type = "Pandas" + self.executor = PandasExecutor() + self.SQLconnection = "" + self.table_name = "" + + self._sampled = None + self._default_pandas_display = True + self._toggle_pandas_display = True + self._plot_config = None + self._message = Message() + self._pandas_only = False + # Metadata + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + @property + def _constructor(self): + return LuxDataFrame + + # @property + # def _constructor_sliced(self): + # def f(*args, **kwargs): + # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 + # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') + # return f + @property + def history(self): + return self._history + + def maintain_metadata(self): + if ( + not hasattr(self, "_metadata_fresh") or not self._metadata_fresh + ): # Check that metadata has not yet been computed + if ( + len(self) > 0 + ): # only compute metadata information if the dataframe is non-empty + self.executor.compute_stats(self) + self.executor.compute_dataset_metadata(self) + self._infer_structure() + self._metadata_fresh = True + + def expire_recs(self): + self._recs_fresh = False + self.recommendation = {} + self.current_vis = None + self._widget = None + self._rec_info = None + self._sampled = None + + def expire_metadata(self): + # Set metadata as null + self._metadata_fresh = False + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + ##################### + ## Override Pandas ## + ##################### + def __getattr__(self, name): + ret_value = super(LuxDataFrame, self).__getattr__(name) + self.expire_metadata() + self.expire_recs() + return ret_value + + def _set_axis(self, axis, labels): + super(LuxDataFrame, self)._set_axis(axis, labels) + self.expire_metadata() + self.expire_recs() + + def _update_inplace(self, *args, **kwargs): + super(LuxDataFrame, self)._update_inplace(*args, **kwargs) + self.expire_metadata() + self.expire_recs() + + def _set_item(self, key, value): + super(LuxDataFrame, self)._set_item(key, value) + self.expire_metadata() + self.expire_recs() + + def _infer_structure(self): + # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data + is_multi_index_flag = self.index.nlevels != 1 + not_int_index_flag = self.index.dtype != "int64" + small_df_flag = len(self) < 100 + self.pre_aggregated = ( + is_multi_index_flag or not_int_index_flag + ) and small_df_flag + if "Number of Records" in self.columns: + self.pre_aggregated = True + very_small_df_flag = len(self) <= 10 + if very_small_df_flag: + self.pre_aggregated = True + + def set_executor_type(self, exe): + if exe == "SQL": + import pkgutil + + if pkgutil.find_loader("psycopg2") is None: + raise ImportError( + "psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection." + ) + else: + import psycopg2 + from lux.executor.SQLExecutor import SQLExecutor + + self.executor = SQLExecutor + else: + from lux.executor.PandasExecutor import PandasExecutor + + self.executor = PandasExecutor() + self.executor_type = exe + + @property + def plot_config(self): + return self._plot_config + + @plot_config.setter + def plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to all visualizations in the dataframe display + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + + Example + ---------- + Changing the color of marks and adding a title for all charts displayed for this dataframe + >>> df = pd.read_csv("lux/data/car.csv") + >>> def changeColorAddTitle(chart): + chart = chart.configure_mark(color="red") # change mark color to red + chart.title = "Custom Title" # add title to chart + return chart + >>> df.plot_config = changeColorAddTitle + >>> df + Change the opacity of all scatterplots displayed for this dataframe + >>> df = pd.read_csv("lux/data/olympic.csv") + >>> def changeOpacityScatterOnly(chart): + if chart.mark=='circle': + chart = chart.configure_mark(opacity=0.1) # lower opacity + return chart + >>> df.plot_config = changeOpacityScatterOnly + >>> df + """ + self._plot_config = config_func + self._recs_fresh = False + + def clear_plot_config(self): + self._plot_config = None + self._recs_fresh = False + + @property + def intent(self): + return self._intent + + @intent.setter + def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): + is_list_input = isinstance(intent_input, list) + is_vis_input = isinstance(intent_input, Vis) + if not (is_list_input or is_vis_input): + raise TypeError( + "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." + "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" + ) + if is_list_input: + self.set_intent(intent_input) + elif is_vis_input: + self.set_intent_as_vis(intent_input) + + def clear_intent(self): + self.intent = [] + + def set_intent(self, intent: List[Union[str, Clause]]): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/clause` + """ + self.expire_recs() + self._intent = intent + self._parse_validate_compile_intent() + + def _parse_validate_compile_intent(self): + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + + self._intent = Parser.parse(self._intent) + Validator.validate_intent(self._intent, self) + self.maintain_metadata() + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + def copy_intent(self): + # creates a true copy of the dataframe's intent + output = [] + for clause in self._intent: + temp_clause = clause.copy_clause() + output.append(temp_clause) + return output + + def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe as the Vis + + Parameters + ---------- + vis : Vis + """ + self.expire_recs() + self._intent = vis._inferred_intent + self._parse_validate_compile_intent() + + def to_pandas(self): + import lux.core + + return lux.core.originalDF(self, copy=False) + + @property + def recommendation(self): + return self._recommendation + + @recommendation.setter + def recommendation(self, recommendation: Dict): + self._recommendation = recommendation + + @property + def current_vis(self): + return self._current_vis + + @current_vis.setter + def current_vis(self, current_vis: Dict): + self._current_vis = current_vis + + def __repr__(self): + # TODO: _repr_ gets called from _repr_html, need to get rid of this call + return "" + + ####################################################### + ########## SQL Metadata, type, model schema ########### + ####################################################### + + def set_SQL_connection(self, connection, t_name): + self.SQLconnection = connection + self.table_name = t_name + self.compute_SQL_dataset_metadata() + self.set_executor_type("SQL") + + def compute_SQL_dataset_metadata(self): + self.get_SQL_attributes() + for attr in list(self.columns): + self[attr] = None + self.data_type_lookup = {} + self.data_type = {} + #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this + ##### in the initialization and do it just once + self.compute_SQL_data_type() + self.compute_SQL_stats() + self.data_model_lookup = {} + self.data_model = {} + self.compute_data_model() + + def compute_SQL_stats(self): + # precompute statistics + self.unique_values = {} + self._min_max = {} + + self.get_SQL_unique_values() + # self.get_SQL_cardinality() + for attribute in self.columns: + if self.data_type_lookup[attribute] == "quantitative": + self._min_max[attribute] = ( + self[attribute].min(), + self[attribute].max(), + ) + + def get_SQL_attributes(self): + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( + table_name + ) + attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + for attr in attributes: + self[attr] = None + + def get_SQL_cardinality(self): + cardinality = {} + for attr in list(self.columns): + card_query = pd.read_sql( + "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + cardinality[attr] = list(card_query["count"])[0] + self.cardinality = cardinality + + def get_SQL_unique_values(self): + unique_vals = {} + for attr in list(self.columns): + unique_query = pd.read_sql( + "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + unique_vals[attr] = list(unique_query[attr]) + self.unique_values = unique_vals + + def compute_SQL_data_type(self): + data_type_lookup = {} + sql_dtypes = {} + self.get_SQL_cardinality() + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + # get the data types of the attributes in the SQL table + for attr in list(self.columns): + datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( + table_name, attr + ) + datatype = list( + pd.read_sql(datatype_query, self.SQLconnection)["data_type"] + )[0] + sql_dtypes[attr] = datatype + + data_type = {"quantitative": [], "nominal": [], "temporal": []} + for attr in list(self.columns): + if str(attr).lower() in ["month", "year"]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + elif sql_dtypes[attr] in [ + "character", + "character varying", + "boolean", + "uuid", + "text", + ]: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + elif sql_dtypes[attr] in [ + "integer", + "real", + "smallint", + "smallserial", + "serial", + ]: + if self.cardinality[attr] < 13: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + else: + data_type_lookup[attr] = "quantitative" + data_type["quantitative"].append(attr) + elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + self.data_type_lookup = data_type_lookup + self.data_type = data_type + + def _append_rec(self, rec_infolist, recommendations: Dict): + if ( + recommendations["collection"] is not None + and len(recommendations["collection"]) > 0 + ): + rec_infolist.append(recommendations) + + def maintain_recs(self): + # `rec_df` is the dataframe to generate the recommendations on + # check to see if globally defined actions have been registered/removed + if lux.update_actions["flag"] == True: + self._recs_fresh = False + show_prev = False # flag indicating whether rec_df is showing previous df or current self + if self._prev is not None: + rec_df = self._prev + rec_df._message = Message() + rec_df.maintain_metadata() # the prev dataframe may not have been printed before + last_event = self.history._events[-1].name + rec_df._message.add( + f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." + ) + show_prev = True + else: + rec_df = self + rec_df._message = Message() + # Add warning message if there exist ID fields + id_fields_str = "" + if len(rec_df.data_type["id"]) > 0: + for id_field in rec_df.data_type["id"]: + id_fields_str += f"{id_field}, " + id_fields_str = id_fields_str[:-2] + rec_df._message.add( + f"{id_fields_str} is not visualized since it resembles an ID field." + ) + rec_df._prev = None # reset _prev + + if ( + not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh + ): # Check that recs has not yet been computed + rec_infolist = [] + from lux.action.custom import custom + from lux.action.custom import custom_actions + from lux.action.correlation import correlation + from lux.action.univariate import univariate + from lux.action.enhance import enhance + from lux.action.filter import filter + from lux.action.generalize import generalize + from lux.action.row_group import row_group + from lux.action.column_group import column_group + + if rec_df.pre_aggregated: + if rec_df.columns.name is not None: + rec_df._append_rec(rec_infolist, row_group(rec_df)) + if rec_df.index.name is not None: + rec_df._append_rec(rec_infolist, column_group(rec_df)) + else: + if self.recommendation == {}: + # display conditions for default actions + no_vis = lambda ldf: (ldf.current_vis is None) or ( + ldf.current_vis is not None and len(ldf.current_vis) == 0 + ) + one_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) == 1 + ) + multiple_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) > 1 + ) + + # globally register default actions + lux.register_action("correlation", correlation, no_vis) + lux.register_action( + "distribution", univariate, no_vis, "quantitative" + ) + lux.register_action("occurrence", univariate, no_vis, "nominal") + lux.register_action("temporal", univariate, no_vis, "temporal") + + lux.register_action("enhance", enhance, one_current_vis) + lux.register_action("filter", filter, one_current_vis) + lux.register_action("generalize", generalize, one_current_vis) + + lux.register_action("custom", custom, multiple_current_vis) + + # generate vis from globally registered actions and append to dataframe + custom_action_collection = custom_actions(rec_df) + for rec in custom_action_collection: + rec_df._append_rec(rec_infolist, rec) + lux.update_actions["flag"] = False + + # Store _rec_info into a more user-friendly dictionary form + rec_df.recommendation = {} + for rec_info in rec_infolist: + action_type = rec_info["action"] + vlist = rec_info["collection"] + if rec_df._plot_config: + for vis in rec_df.current_vis: + vis._plot_config = rec_df.plot_config + for vis in vlist: + vis._plot_config = rec_df.plot_config + if len(vlist) > 0: + rec_df.recommendation[action_type] = vlist + rec_df._rec_info = rec_infolist + self._widget = rec_df.render_widget() + elif ( + show_prev + ): # re-render widget for the current dataframe if previous rec is not recomputed + self._widget = rec_df.render_widget() + self._recs_fresh = True + + ####################################################### + ############## LuxWidget Result Display ############### + ####################################################### + @property + def widget(self): + if self._widget: + return self._widget + + @property + def exported(self) -> Union[Dict[str, VisList], VisList]: + """ + Get selected visualizations as exported Vis List + + Notes + ----- + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Correlation': [0, 2], 'Occurrence': [1]} + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + + Returns + ------- + Union[Dict[str,VisList], VisList] + When there are no exported vis, return empty list -> [] + When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) + When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} + """ + if not hasattr(self, "_widget"): + warnings.warn( + "\nNo widget attached to the dataframe." + "Please assign dataframe to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + exported_vis = [] + if exported_vis_lst == {}: + if self._saved_export: + return self._saved_export + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: + return self.current_vis + elif len(exported_vis_lst) > 1: + exported_vis = {} + if "currentVis" in exported_vis_lst: + exported_vis["Current Vis"] = self.current_vis + for export_action in exported_vis_lst: + if export_action != "currentVis": + exported_vis[export_action] = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + return exported_vis + elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): + export_action = list(exported_vis_lst.keys())[0] + exported_vis = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + self._saved_export = exported_vis + return exported_vis + else: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + + def remove_deleted_recs(self, change): + for action in self._widget.deletedIndices: + deletedSoFar = 0 + for index in self._widget.deletedIndices[action]: + self.recommendation[action].remove_index(index - deletedSoFar) + deletedSoFar += 1 + + def set_intent_on_click(self, change): + from IPython.display import display, clear_output + from lux.processor.Compiler import Compiler + + intent_action = list(self._widget.selectedIntentIndex.keys())[0] + vis = self.recommendation[intent_action][ + self._widget.selectedIntentIndex[intent_action][0] + ] + self.set_intent_as_vis(vis) + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + + with self.output: + clear_output() + display(self._widget) + + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + def _repr_html_(self): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + if self._pandas_only: + display(self.display_pandas()) + self._pandas_only = False + else: + if self.index.nlevels >= 2 or self.columns.nlevels >= 2: + warnings.warn( + "\nLux does not currently support dataframes " + "with hierarchical indexes.\n" + "Please convert the dataframe into a flat " + "table via `pandas.DataFrame.reset_index`.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + + if len(self) <= 0: + warnings.warn( + "\nLux can not operate on an empty dataframe.\nPlease check your input again.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + if len(self.columns) <= 1: + warnings.warn( + "\nLux defaults to Pandas when there is only a single column.", + stacklevel=2, + ) + display(self.display_pandas()) + return + self.maintain_metadata() + + if self._intent != [] and ( + not hasattr(self, "_compiled") or not self._compiled + ): + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + if lux.config.default_display == "lux": + self._toggle_pandas_display = False + else: + self._toggle_pandas_display = True + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() + + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe( + self.set_intent_on_click, names="selectedIntentIndex" + ) + + if len(self.recommendation) > 0: + # box = widgets.Box(layout=widgets.Layout(display='inline')) + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + # box.children = [button,output] + # output.children = [button] + # display(box) + display(button, self.output) + + def on_button_clicked(b): + with self.output: + if b: + self._toggle_pandas_display = ( + not self._toggle_pandas_display + ) + clear_output() + if self._toggle_pandas_display: + display(self.display_pandas()) + else: + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) + else: + warnings.warn( + "\nLux defaults to Pandas when there are no valid actions defined.", + stacklevel=2, + ) + display(self.display_pandas()) + + except (KeyboardInterrupt, SystemExit): + raise + except: + warnings.warn( + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n\n" + "Please report this issue on Github: https://github.com/lux-org/lux/issues ", + stacklevel=2, + ) + display(self.display_pandas()) + + def display_pandas(self): + return self.to_pandas() + + def render_widget(self, renderer: str = "altair", input_current_vis=""): + """ + Generate a LuxWidget based on the LuxDataFrame + + Structure of widgetJSON: + { + 'current_vis': {}, + 'recommendation': [ + { + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + ] + }, + ... repeat for other actions + ] + } + Parameters + ---------- + renderer : str, optional + Choice of visualization rendering library, by default "altair" + input_current_vis : lux.LuxDataFrame, optional + User-specified current vis to override default Current Vis, by default + """ + check_import_lux_widget() + import luxwidget + + widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + return luxwidget.LuxWidget( + currentVis=widgetJSON["current_vis"], + recommendations=widgetJSON["recommendation"], + intent=LuxDataFrame.intent_to_string(self._intent), + message=self._message.to_html(), + ) + + @staticmethod + def intent_to_JSON(intent): + from lux.utils import utils + + filter_specs = utils.get_filter_specs(intent) + attrs_specs = utils.get_attrs_specs(intent) + + intent = {} + intent["attributes"] = [clause.attribute for clause in attrs_specs] + intent["filters"] = [clause.attribute for clause in filter_specs] + return intent + + @staticmethod + def intent_to_string(intent): + if intent: + return ", ".join([clause.to_string() for clause in intent]) + else: + return "" + + def to_JSON(self, rec_infolist, input_current_vis=""): + widget_spec = {} + if self.current_vis: + self.executor.execute(self.current_vis, self) + widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( + self.current_vis, input_current_vis + ) + else: + widget_spec["current_vis"] = {} + widget_spec["recommendation"] = [] + + # Recommended Collection + recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) + widget_spec["recommendation"].extend(recCollection) + return widget_spec + + @staticmethod + def current_vis_to_JSON(vlist, input_current_vis=""): + current_vis_spec = {} + numVC = len(vlist) # number of visualizations in the vis list + if numVC == 1: + current_vis_spec = vlist[0].render_VSpec() + elif numVC > 1: + pass + return current_vis_spec + + @staticmethod + def rec_to_JSON(recs): + rec_lst = [] + import copy + + rec_copy = copy.deepcopy(recs) + for idx, rec in enumerate(rec_copy): + if len(rec["collection"]) > 0: + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.render_VSpec() + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete DataObjectCollection since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst + + # Overridden Pandas Functions + def head(self, n: int = 5): + self._prev = self + self._history.append_event("head", n=5) + return super(LuxDataFrame, self).head(n) + + def tail(self, n: int = 5): + self._prev = self + self._history.append_event("tail", n=5) + return super(LuxDataFrame, self).tail(n) + + def info(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("info", *args, **kwargs) + return super(LuxDataFrame, self).info(*args, **kwargs) + + def describe(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("describe", *args, **kwargs) + return super(LuxDataFrame, self).describe(*args, **kwargs) diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index bde1d0c3..86ccf1a1 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -52,7 +52,6 @@ def intent(self, intent: List[Clause]) -> None: def set_intent(self, intent: List[Clause]) -> None: """ Sets the intent of the VisList and refresh the source based on the new clause - Parameters ---------- intent : List[Clause] @@ -65,238 +64,286 @@ def set_intent(self, intent: List[Clause]) -> None: def exported(self) -> VisList: """ Get selected visualizations as exported Vis List - Notes ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Vis List': [0, 2]} - - Returns - ------- - VisList - return a VisList of selected visualizations. -> VisList(v1, v2...) - """ - if not hasattr(self,"widget"): - warnings.warn( - "\nNo widget attached to the VisList." - "Please assign VisList to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst =self._widget._selectedVisIdxs - if (exported_vis_lst=={}): - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - else: - exported_vis = VisList(list(map(self.__getitem__, exported_vis_lst["Vis List"]))) - return exported_vis - def remove_duplicates(self) -> None: - """ - Removes duplicate visualizations in Vis List - """ - self._collection = list(set(self._collection)) - - def remove_index(self, index): - self._collection.pop(index) - - def _is_vis_input(self): - if (type(self._input_lst[0])==Vis): - return True - elif (type(self._input_lst[0])==Clause): - return False - def __getitem__(self, key): - return self._collection[key] - def __setitem__(self, key, value): - self._collection[key] = value - def __len__(self): - return len(self._collection) - def __repr__(self): - if len(self._collection) == 0: - return str(self._input_lst) - x_channel = "" - y_channel = "" - largest_mark = 0 - largest_filter = 0 - for vis in self._collection: #finds longest x attribute among all visualizations - filter_intents = None - for clause in vis._inferred_intent: - if clause.value != "": - filter_intents = clause - - if (clause.aggregation != "" and clause.aggregation is not None): - attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" - elif clause.bin_size > 0: - attribute = "BIN(" + clause.attribute + ")" - else: - attribute = clause.attribute - - if clause.channel == "x" and len(x_channel) < len(attribute): - x_channel = attribute - if clause.channel == "y" and len(y_channel) < len(attribute): - y_channel = attribute - if len(vis.mark) > largest_mark: - largest_mark = len(vis.mark) - if filter_intents and len(str(filter_intents.value)) + len(filter_intents.attribute) > largest_filter: - largest_filter = len(str(filter_intents.value)) + len(filter_intents.attribute) - vis_repr = [] - largest_x_length = len(x_channel) - largest_y_length = len(y_channel) - for vis in self._collection: #pads the shorter visualizations with spaces before the y attribute - filter_intents = None - x_channel = "" - y_channel = "" - additional_channels = [] - for clause in vis._inferred_intent: - if clause.value != "": - filter_intents = clause - - if (clause.aggregation != "" and clause.aggregation is not None and vis.mark!='scatter'): - attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" - elif clause.bin_size > 0: - attribute = "BIN(" + clause.attribute + ")" - else: - attribute = clause.attribute - - if clause.channel == "x": - x_channel = attribute.ljust(largest_x_length) - elif clause.channel == "y": - y_channel = attribute - elif clause.channel != "": - additional_channels.append([clause.channel, attribute]) - if filter_intents: - y_channel = y_channel.ljust(largest_y_length) - elif largest_filter != 0: - y_channel = y_channel.ljust(largest_y_length + largest_filter + 9) - else: - y_channel = y_channel.ljust(largest_y_length + largest_filter) - if x_channel != "": - x_channel = "x: " + x_channel + ", " - if y_channel != "": - y_channel = "y: " + y_channel - aligned_mark = vis.mark.ljust(largest_mark) - str_additional_channels = "" - for channel in additional_channels: - str_additional_channels += ", " + channel[0] + ": " + channel[1] - if filter_intents: - aligned_filter = " -- [" + filter_intents.attribute + filter_intents.filter_op + str(filter_intents.value) + "]" - aligned_filter = aligned_filter.ljust(largest_filter + 8) - vis_repr.append(f" ") - else: - vis_repr.append(f" ") - return '['+',\n'.join(vis_repr)[1:]+']' - def map(self,function): - # generalized way of applying a function to each element - return map(function, self._collection) - - def get(self,field_name): - # Get the value of the field for all objects in the collection - def get_field(d_obj): - field_val = getattr(d_obj,field_name) - # Might want to write catch error if key not in field - return field_val - return self.map(get_field) - - def set(self,field_name,field_val): - return NotImplemented - def set_plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to the Vis List - Currently only supported for Altair visualizations - - Parameters - ---------- - config_func : typing.Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - """ - for vis in self._collection: - vis.plot_config = config_func - def clear_plot_config(self): - for vis in self._collection: - vis.plot_config = None - def sort(self, remove_invalid=True, descending = True): - # remove the items that have invalid (-1) score - if (remove_invalid): self._collection = list(filter(lambda x: x.score!=-1,self._collection)) - # sort in-place by “score” by default if available, otherwise user-specified field to sort by - self._collection.sort(key=lambda x: x.score, reverse=descending) - - def topK(self,k): - #sort and truncate list to first K items - self.sort(remove_invalid=True) - return VisList(self._collection[:k]) - def bottomK(self,k): - #sort and truncate list to first K items - self.sort(descending=False,remove_invalid=True) - return VisList(self._collection[:k]) - def normalize_score(self, invert_order = False): - max_score = max(list(self.get("score"))) - for dobj in self._collection: - dobj.score = dobj.score/max_score - if (invert_order): dobj.score = 1 - dobj.score - def _repr_html_(self): - self._widget = None - from IPython.display import display - from lux.core.frame import LuxDataFrame - recommendation = {"action": "Vis List", - "description": "Shows a vis list defined by the intent"} - recommendation["collection"] = self._collection - - check_import_lux_widget() - import luxwidget - recJSON = LuxDataFrame.rec_to_JSON([recommendation]) - self._widget = luxwidget.LuxWidget( - currentVis={}, - recommendations=recJSON, - intent="", - message = "" - ) - display(self._widget) - - def refresh_source(self, ldf) : - """ - Loading the source into the visualizations in the VisList, then populating each visualization - based on the new source data, effectively "materializing" the visualization collection. - - Parameters - ---------- - ldf : LuxDataframe - Input Dataframe to be attached to the VisList - - Returns - ------- - VisList - Complete VisList with fully-specified fields - - See Also - -------- - lux.vis.Vis.refresh_source - - Note - ---- - Function derives a new _inferred_intent by instantiating the intent specification on the new data - """ - if (ldf is not None): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - from lux.processor.Compiler import Compiler - self._source = ldf - self._source.maintain_metadata() - if len(self._input_lst)>0: - if (self._is_vis_input()): - compiled_collection = [] - for vis in self._collection: - vis._inferred_intent = Parser.parse(vis._intent) - Validator.validate_intent(vis._inferred_intent,ldf) - vislist = Compiler.compile_vis(ldf,vis) - if (len(vislist)>0): - vis = vislist[0] - compiled_collection.append(vis) - self._collection = compiled_collection - else: - self._inferred_intent = Parser.parse(self._intent) - Validator.validate_intent(self._inferred_intent,ldf) - self._collection = Compiler.compile_intent(ldf,self._inferred_intent) - ldf.executor.execute(self._collection,ldf) + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Vis List': [0, 2]} + + Returns + ------- + VisList + return a VisList of selected visualizations. -> VisList(v1, v2...) + """ + if not hasattr(self, "widget"): + warnings.warn( + "\nNo widget attached to the VisList." + "Please assign VisList to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + if exported_vis_lst == {}: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + else: + exported_vis = VisList( + list(map(self.__getitem__, exported_vis_lst["Vis List"])) + ) + return exported_vis + + def remove_duplicates(self) -> None: + """ + Removes duplicate visualizations in Vis List + """ + self._collection = list(set(self._collection)) + + def remove_index(self, index): + self._collection.pop(index) + + def _is_vis_input(self): + if type(self._input_lst[0]) == Vis: + return True + elif type(self._input_lst[0]) == Clause: + return False + + def __getitem__(self, key): + return self._collection[key] + + def __setitem__(self, key, value): + self._collection[key] = value + + def __len__(self): + return len(self._collection) + + def __repr__(self): + if len(self._collection) == 0: + return str(self._input_lst) + x_channel = "" + y_channel = "" + largest_mark = 0 + largest_filter = 0 + for ( + vis + ) in self._collection: # finds longest x attribute among all visualizations + filter_intents = None + for clause in vis._inferred_intent: + if clause.value != "": + filter_intents = clause + + if clause.aggregation != "" and clause.aggregation is not None: + attribute = ( + clause._aggregation_name.upper() + "(" + clause.attribute + ")" + ) + elif clause.bin_size > 0: + attribute = "BIN(" + clause.attribute + ")" + else: + attribute = clause.attribute + + if clause.channel == "x" and len(x_channel) < len(attribute): + x_channel = attribute + if clause.channel == "y" and len(y_channel) < len(attribute): + y_channel = attribute + if len(vis.mark) > largest_mark: + largest_mark = len(vis.mark) + if ( + filter_intents + and len(str(filter_intents.value)) + len(filter_intents.attribute) + > largest_filter + ): + largest_filter = len(str(filter_intents.value)) + len( + filter_intents.attribute + ) + vis_repr = [] + largest_x_length = len(x_channel) + largest_y_length = len(y_channel) + for ( + vis + ) in ( + self._collection + ): # pads the shorter visualizations with spaces before the y attribute + filter_intents = None + x_channel = "" + y_channel = "" + additional_channels = [] + for clause in vis._inferred_intent: + if clause.value != "": + filter_intents = clause + + if ( + clause.aggregation != "" + and clause.aggregation is not None + and vis.mark != "scatter" + ): + attribute = ( + clause._aggregation_name.upper() + "(" + clause.attribute + ")" + ) + elif clause.bin_size > 0: + attribute = "BIN(" + clause.attribute + ")" + else: + attribute = clause.attribute + + if clause.channel == "x": + x_channel = attribute.ljust(largest_x_length) + elif clause.channel == "y": + y_channel = attribute + elif clause.channel != "": + additional_channels.append([clause.channel, attribute]) + if filter_intents: + y_channel = y_channel.ljust(largest_y_length) + elif largest_filter != 0: + y_channel = y_channel.ljust(largest_y_length + largest_filter + 9) + else: + y_channel = y_channel.ljust(largest_y_length + largest_filter) + if x_channel != "": + x_channel = "x: " + x_channel + ", " + if y_channel != "": + y_channel = "y: " + y_channel + aligned_mark = vis.mark.ljust(largest_mark) + str_additional_channels = "" + for channel in additional_channels: + str_additional_channels += ", " + channel[0] + ": " + channel[1] + if filter_intents: + aligned_filter = ( + " -- [" + + filter_intents.attribute + + filter_intents.filter_op + + str(filter_intents.value) + + "]" + ) + aligned_filter = aligned_filter.ljust(largest_filter + 8) + vis_repr.append( + f" " + ) + else: + vis_repr.append( + f" " + ) + return "[" + ",\n".join(vis_repr)[1:] + "]" + + def map(self, function): + # generalized way of applying a function to each element + return map(function, self._collection) + + def get(self, field_name): + # Get the value of the field for all objects in the collection + def get_field(d_obj): + field_val = getattr(d_obj, field_name) + # Might want to write catch error if key not in field + return field_val + + return self.map(get_field) + + def set(self, field_name, field_val): + return NotImplemented + + def set_plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to the Vis List + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : typing.Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + """ + for vis in self._collection: + vis.plot_config = config_func + + def clear_plot_config(self): + for vis in self._collection: + vis.plot_config = None + + def sort(self, remove_invalid=True, descending=True): + # remove the items that have invalid (-1) score + if remove_invalid: + self._collection = list(filter(lambda x: x.score != -1, self._collection)) + # sort in-place by “score” by default if available, otherwise user-specified field to sort by + self._collection.sort(key=lambda x: x.score, reverse=descending) + + def topK(self, k): + # sort and truncate list to first K items + self.sort(remove_invalid=True) + return VisList(self._collection[:k]) + + def bottomK(self, k): + # sort and truncate list to first K items + self.sort(descending=False, remove_invalid=True) + return VisList(self._collection[:k]) + + def normalize_score(self, invert_order=False): + max_score = max(list(self.get("score"))) + for dobj in self._collection: + dobj.score = dobj.score / max_score + if invert_order: + dobj.score = 1 - dobj.score + + def _repr_html_(self): + self._widget = None + from IPython.display import display + from lux.core.frame import LuxDataFrame + + recommendation = { + "action": "Vis List", + "description": "Shows a vis list defined by the intent", + } + recommendation["collection"] = self._collection + + check_import_lux_widget() + import luxwidget + + recJSON = LuxDataFrame.rec_to_JSON([recommendation]) + self._widget = luxwidget.LuxWidget( + currentVis={}, recommendations=recJSON, intent="", message="" + ) + display(self._widget) + + def refresh_source(self, ldf): + """ + Loading the source into the visualizations in the VisList, then populating each visualization + based on the new source data, effectively "materializing" the visualization collection. + Parameters + ---------- + ldf : LuxDataframe + Input Dataframe to be attached to the VisList + Returns + ------- + VisList + Complete VisList with fully-specified fields + + See Also + -------- + lux.vis.Vis.refresh_source + Note + ---- + Function derives a new _inferred_intent by instantiating the intent specification on the new data + """ + if ldf is not None: + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + from lux.processor.Compiler import Compiler + + self._source = ldf + self._source.maintain_metadata() + if len(self._input_lst) > 0: + if self._is_vis_input(): + compiled_collection = [] + for vis in self._collection: + vis._inferred_intent = Parser.parse(vis._intent) + Validator.validate_intent(vis._inferred_intent, ldf) + vislist = Compiler.compile_vis(ldf, vis) + if len(vislist) > 0: + vis = vislist[0] + compiled_collection.append(vis) + self._collection = compiled_collection + else: + self._inferred_intent = Parser.parse(self._intent) + Validator.validate_intent(self._inferred_intent, ldf) + self._collection = Compiler.compile_intent( + ldf, self._inferred_intent + ) + ldf.executor.execute(self._collection, ldf) From 4aa4bd40a2800684b7795f49a9beca81469c4018 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sun, 1 Nov 2020 18:03:12 -0800 Subject: [PATCH 07/39] reformat, update command to fix version --- .travis.yml | 2 +- lux/core/frame.py | 1574 +++++++++++++++------------ lux/vislib/altair/AltairRenderer.py | 24 +- tests/test_config.py | 53 +- 4 files changed, 903 insertions(+), 750 deletions(-) diff --git a/.travis.yml b/.travis.yml index d03743d2..98bde1cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ install: - pip install git+https://github.com/lux-org/lux-widget # command to run tests script: - - black --check . + - black --target-version py37 --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/lux/core/frame.py b/lux/core/frame.py index 60ba4304..8deb29e7 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -26,712 +26,868 @@ class LuxDataFrame(pd.DataFrame): - ''' - A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. - ''' - # MUST register here for new properties!! - _metadata = ['_intent','data_type_lookup','data_type', - 'data_model_lookup','data_model','unique_values','cardinality','_rec_info', '_pandas_only', - '_min_max','plot_config', '_current_vis','_widget', '_recommendation','_prev','_history', '_saved_export'] - - def __init__(self,*args, **kw): - from lux.executor.PandasExecutor import PandasExecutor - self._history = History() - self._intent = [] - self._recommendation = {} - self._saved_export = None - self._current_vis = [] - self._prev = None - super(LuxDataFrame, self).__init__(*args, **kw) - - self.executor_type = "Pandas" - self.executor = PandasExecutor() - self.SQLconnection = "" - self.table_name = "" - - self._sampled = None - self._default_pandas_display = True - self._toggle_pandas_display = True - self._plot_config = None - self._message = Message() - self._pandas_only=False - # Metadata - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - @property - def _constructor(self): - return LuxDataFrame - # @property - # def _constructor_sliced(self): - # def f(*args, **kwargs): - # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 - # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') - # return f - @property - def history(self): - return self._history - def maintain_metadata(self): - if (not hasattr(self,"_metadata_fresh") or not self._metadata_fresh ): # Check that metadata has not yet been computed - if (len(self)>0): #only compute metadata information if the dataframe is non-empty - self.executor.compute_stats(self) - self.executor.compute_dataset_metadata(self) - self._infer_structure() - self._metadata_fresh = True - def expire_recs(self): - self._recs_fresh = False - self.recommendation = {} - self.current_vis = None - self._widget = None - self._rec_info = None - self._sampled = None - def expire_metadata(self): - # Set metadata as null - self._metadata_fresh = False - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - ##################### - ## Override Pandas ## - ##################### - def __getattr__(self, name): - ret_value = super(LuxDataFrame, self).__getattr__(name) - self.expire_metadata() - self.expire_recs() - return ret_value - def _set_axis(self, axis, labels): - super(LuxDataFrame, self)._set_axis(axis, labels) - self.expire_metadata() - self.expire_recs() - def _update_inplace(self,*args,**kwargs): - super(LuxDataFrame, self)._update_inplace(*args,**kwargs) - self.expire_metadata() - self.expire_recs() - def _set_item(self, key, value): - super(LuxDataFrame, self)._set_item(key, value) - self.expire_metadata() - self.expire_recs() - def _infer_structure(self): - # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data - is_multi_index_flag = self.index.nlevels !=1 - not_int_index_flag = self.index.dtype !='int64' - small_df_flag = len(self)<100 - self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag - if ("Number of Records" in self.columns): - self.pre_aggregated = True - very_small_df_flag = len(self)<=10 - if (very_small_df_flag): - self.pre_aggregated = True - def set_executor_type(self, exe): - if (exe =="SQL"): - import pkgutil - if (pkgutil.find_loader("psycopg2") is None): - raise ImportError("psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection.") - else: - import psycopg2 - from lux.executor.SQLExecutor import SQLExecutor - self.executor = SQLExecutor - else: - from lux.executor.PandasExecutor import PandasExecutor - self.executor = PandasExecutor() - self.executor_type = exe - @property - def plot_config(self): - return self._plot_config - @plot_config.setter - def plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to all visualizations in the dataframe display - Currently only supported for Altair visualizations - Parameters - ---------- - config_func : Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - - Example - ---------- - Changing the color of marks and adding a title for all charts displayed for this dataframe - >>> df = pd.read_csv("lux/data/car.csv") - >>> def changeColorAddTitle(chart): - chart = chart.configure_mark(color="red") # change mark color to red - chart.title = "Custom Title" # add title to chart - return chart - >>> df.plot_config = changeColorAddTitle - >>> df - Change the opacity of all scatterplots displayed for this dataframe - >>> df = pd.read_csv("lux/data/olympic.csv") - >>> def changeOpacityScatterOnly(chart): - if chart.mark=='circle': - chart = chart.configure_mark(opacity=0.1) # lower opacity - return chart - >>> df.plot_config = changeOpacityScatterOnly - >>> df - """ - self._plot_config = config_func - self._recs_fresh=False - def clear_plot_config(self): - self._plot_config = None - self._recs_fresh=False - - @property - def intent(self): - return self._intent - @intent.setter - def intent(self, intent_input:Union[List[Union[str, Clause]],Vis]): - is_list_input = isinstance(intent_input,list) - is_vis_input = isinstance(intent_input,Vis) - if not (is_list_input or is_vis_input): - raise TypeError("Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." - "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" - ) - if is_list_input: - self.set_intent(intent_input) - elif is_vis_input: - self.set_intent_as_vis(intent_input) - def clear_intent(self): - self.intent = [] - def set_intent(self, intent:List[Union[str, Clause]]): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/clause` - """ - self.expire_recs() - self._intent = intent - self._parse_validate_compile_intent() - def _parse_validate_compile_intent(self): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - self._intent = Parser.parse(self._intent) - Validator.validate_intent(self._intent,self) - self.maintain_metadata() - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - def copy_intent(self): - #creates a true copy of the dataframe's intent - output = [] - for clause in self._intent: - temp_clause = clause.copy_clause() - output.append(temp_clause) - return(output) - - def set_intent_as_vis(self,vis:Vis): - """ - Set intent of the dataframe as the Vis - - Parameters - ---------- - vis : Vis - """ - self.expire_recs() - self._intent = vis._inferred_intent - self._parse_validate_compile_intent() - - def to_pandas(self): - import lux.core - return lux.core.originalDF(self,copy=False) - - @property - def recommendation(self): - return self._recommendation - @recommendation.setter - def recommendation(self,recommendation:Dict): - self._recommendation = recommendation - @property - def current_vis(self): - return self._current_vis - @current_vis.setter - def current_vis(self,current_vis:Dict): - self._current_vis = current_vis - def __repr__(self): - # TODO: _repr_ gets called from _repr_html, need to get rid of this call - return "" - - ####################################################### - ########## SQL Metadata, type, model schema ########### - ####################################################### - - def set_SQL_connection(self, connection, t_name): - self.SQLconnection = connection - self.table_name = t_name - self.compute_SQL_dataset_metadata() - self.set_executor_type("SQL") - - def compute_SQL_dataset_metadata(self): - self.get_SQL_attributes() - for attr in list(self.columns): - self[attr] = None - self.data_type_lookup = {} - self.data_type = {} - #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this - ##### in the initialization and do it just once - self.compute_SQL_data_type() - self.compute_SQL_stats() - self.data_model_lookup = {} - self.data_model = {} - self.compute_data_model() - - def compute_SQL_stats(self): - # precompute statistics - self.unique_values = {} - self._min_max = {} - - self.get_SQL_unique_values() - #self.get_SQL_cardinality() - for attribute in self.columns: - if self.data_type_lookup[attribute] == 'quantitative': - self._min_max[attribute] = (self[attribute].min(), self[attribute].max()) - - def get_SQL_attributes(self): - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format(table_name) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)['column_name']) - for attr in attributes: - self[attr] = None - - def get_SQL_cardinality(self): - cardinality = {} - for attr in list(self.columns): - card_query = pd.read_sql("SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), self.SQLconnection) - cardinality[attr] = list(card_query["count"])[0] - self.cardinality = cardinality - - def get_SQL_unique_values(self): - unique_vals = {} - for attr in list(self.columns): - unique_query = pd.read_sql("SELECT Distinct({}) FROM {}".format(attr, self.table_name), self.SQLconnection) - unique_vals[attr] = list(unique_query[attr]) - self.unique_values = unique_vals - - def compute_SQL_data_type(self): - data_type_lookup = {} - sql_dtypes = {} - self.get_SQL_cardinality() - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - #get the data types of the attributes in the SQL table - for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(table_name, attr) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)['data_type'])[0] - sql_dtypes[attr] = datatype - - data_type = {"quantitative":[], "nominal":[], "temporal":[]} - for attr in list(self.columns): - if str(attr).lower() in ["month", "year"]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - elif sql_dtypes[attr] in ["character", "character varying", "boolean", "uuid", "text"]: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - elif sql_dtypes[attr] in ["integer", "real", "smallint", "smallserial", "serial"]: - if self.cardinality[attr] < 13: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - else: - data_type_lookup[attr] = "quantitative" - data_type["quantitative"].append(attr) - elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - self.data_type_lookup = data_type_lookup - self.data_type = data_type - def _append_rec(self,rec_infolist,recommendations:Dict): - if (recommendations["collection"] is not None and len(recommendations["collection"])>0): - rec_infolist.append(recommendations) - def maintain_recs(self): - # `rec_df` is the dataframe to generate the recommendations on - # check to see if globally defined actions have been registered/removed - if (lux.update_actions["flag"] == True): - self._recs_fresh = False - show_prev = False # flag indicating whether rec_df is showing previous df or current self - if self._prev is not None: - rec_df = self._prev - rec_df._message = Message() - rec_df.maintain_metadata() # the prev dataframe may not have been printed before - last_event = self.history._events[-1].name - rec_df._message.add(f"Lux is visualizing the previous version of the dataframe before you applied {last_event}.") - show_prev = True - else: - rec_df = self - rec_df._message = Message() - # Add warning message if there exist ID fields - id_fields_str = "" - if (len(rec_df.data_type["id"])>0): - for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " - id_fields_str = id_fields_str[:-2] - rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") - rec_df._prev = None # reset _prev - - if (not hasattr(rec_df,"_recs_fresh") or not rec_df._recs_fresh ): # Check that recs has not yet been computed - rec_infolist = [] - from lux.action.custom import custom - from lux.action.custom import custom_actions - from lux.action.correlation import correlation - from lux.action.univariate import univariate - from lux.action.enhance import enhance - from lux.action.filter import filter - from lux.action.generalize import generalize - from lux.action.row_group import row_group - from lux.action.column_group import column_group - if (rec_df.pre_aggregated): - if (rec_df.columns.name is not None): - rec_df._append_rec(rec_infolist, row_group(rec_df)) - if (rec_df.index.name is not None): - rec_df._append_rec(rec_infolist, column_group(rec_df)) - else: - if self.recommendation == {}: - # display conditions for default actions - no_vis = lambda ldf: (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) - one_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 - multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 - - # globally register default actions - lux.register_action("correlation", correlation, no_vis) - lux.register_action("distribution", univariate, no_vis, "quantitative") - lux.register_action("occurrence", univariate, no_vis, "nominal") - lux.register_action("temporal", univariate, no_vis, "temporal") - - lux.register_action("enhance", enhance, one_current_vis) - lux.register_action("filter", filter, one_current_vis) - lux.register_action("generalize", generalize, one_current_vis) - - lux.register_action("custom", custom, multiple_current_vis) - - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) - lux.update_actions["flag"] = False - - # Store _rec_info into a more user-friendly dictionary form - rec_df.recommendation = {} - for rec_info in rec_infolist: - action_type = rec_info["action"] - vlist = rec_info["collection"] - if (rec_df._plot_config): - if (rec_df.current_vis): - for vis in rec_df.current_vis: vis._plot_config = rec_df.plot_config - for vis in vlist: vis._plot_config = rec_df.plot_config - if (len(vlist)>0): - rec_df.recommendation[action_type] = vlist - rec_df._rec_info = rec_infolist - self._widget = rec_df.render_widget() - elif (show_prev): # re-render widget for the current dataframe if previous rec is not recomputed - self._widget = rec_df.render_widget() - self._recs_fresh = True - - - ####################################################### - ############## LuxWidget Result Display ############### - ####################################################### - @property - def widget(self): - if(self._widget): - return self._widget - @property - def exported(self) -> Union[Dict[str,VisList], VisList]: - """ - Get selected visualizations as exported Vis List - - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Correlation': [0, 2], 'Occurrence': [1]} - indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. - - Returns - ------- - Union[Dict[str,VisList], VisList] - When there are no exported vis, return empty list -> [] - When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) - When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} - """ - if not hasattr(self,"_widget"): - warnings.warn( - "\nNo widget attached to the dataframe." - "Please assign dataframe to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst = self._widget._selectedVisIdxs - exported_vis = [] - if (exported_vis_lst=={}): - if self._saved_export: - return self._saved_export - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: - return self.current_vis - elif len(exported_vis_lst) > 1: - exported_vis = {} - if ("currentVis" in exported_vis_lst): - exported_vis["Current Vis"] = self.current_vis - for export_action in exported_vis_lst: - if (export_action != "currentVis"): - exported_vis[export_action] = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - return exported_vis - elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): - export_action = list(exported_vis_lst.keys())[0] - exported_vis = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - self._saved_export = exported_vis - return exported_vis - else: - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - - def remove_deleted_recs(self, change): - for action in self._widget.deletedIndices: - deletedSoFar = 0 - for index in self._widget.deletedIndices[action]: - self.recommendation[action].remove_index(index - deletedSoFar) - deletedSoFar += 1 - - def set_intent_on_click(self, change): - from IPython.display import display, clear_output - from lux.processor.Compiler import Compiler - - intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] - self.set_intent_as_vis(vis) - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() - - with self.output: - clear_output() - display(self._widget) - - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - def _repr_html_(self): - from IPython.display import display - from IPython.display import clear_output - import ipywidgets as widgets - - try: - if (self._pandas_only): - display(self.display_pandas()) - self._pandas_only=False - else: - if(self.index.nlevels>=2 or self.columns.nlevels >= 2): - warnings.warn( - "\nLux does not currently support dataframes " - "with hierarchical indexes.\n" - "Please convert the dataframe into a flat " - "table via `pandas.DataFrame.reset_index`.\n", - stacklevel=2, - ) - display(self.display_pandas()) - return - - if (len(self)<=0): - warnings.warn("\nLux can not operate on an empty dataframe.\nPlease check your input again.\n",stacklevel=2) - display(self.display_pandas()) - return - if (len(self.columns)<=1): - warnings.warn("\nLux defaults to Pandas when there is only a single column.",stacklevel=2) - display(self.display_pandas()) - return - self.maintain_metadata() - - if (self._intent!=[] and (not hasattr(self,"_compiled") or not self._compiled)): - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - if (lux.config.default_display == "lux"): - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True - - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() - - #Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - if len(self.recommendation) > 0: - # box = widgets.Box(layout=widgets.Layout(display='inline')) - button = widgets.Button(description="Toggle Pandas/Lux",layout=widgets.Layout(width='140px',top='5px')) - self.output = widgets.Output() - # box.children = [button,output] - # output.children = [button] - # display(box) - display(button, self.output) - def on_button_clicked(b): - with self.output: - if (b): - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if (self._toggle_pandas_display): - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - button.on_click(on_button_clicked) - on_button_clicked(None) - else: - warnings.warn("\nLux defaults to Pandas when there are no valid actions defined.",stacklevel=2) - display(self.display_pandas()) - - except(KeyboardInterrupt,SystemExit): - raise - except: - warnings.warn( - "\nUnexpected error in rendering Lux widget and recommendations. " - "Falling back to Pandas display.\n\n" - "Please report this issue on Github: https://github.com/lux-org/lux/issues " - ,stacklevel=2) - display(self.display_pandas()) - def display_pandas(self): - return self.to_pandas() - def render_widget(self, renderer:str ="altair", input_current_vis=""): - """ - Generate a LuxWidget based on the LuxDataFrame - - Structure of widgetJSON: - { - 'current_vis': {}, - 'recommendation': [ - { - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - ] - }, - ... repeat for other actions - ] - } - Parameters - ---------- - renderer : str, optional - Choice of visualization rendering library, by default "altair" - input_current_vis : lux.LuxDataFrame, optional - User-specified current vis to override default Current Vis, by default - """ - check_import_lux_widget() - import luxwidget - widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) - return luxwidget.LuxWidget( - currentVis=widgetJSON["current_vis"], - recommendations=widgetJSON["recommendation"], - intent=LuxDataFrame.intent_to_string(self._intent), - message = self._message.to_html() - ) - @staticmethod - def intent_to_JSON(intent): - from lux.utils import utils - - filter_specs = utils.get_filter_specs(intent) - attrs_specs = utils.get_attrs_specs(intent) - - intent = {} - intent['attributes'] = [clause.attribute for clause in attrs_specs] - intent['filters'] = [clause.attribute for clause in filter_specs] - return intent - @staticmethod - def intent_to_string(intent): - if (intent): - return ", ".join([clause.to_string() for clause in intent]) - else: - return "" - - def to_JSON(self, rec_infolist, input_current_vis=""): - widget_spec = {} - if (self.current_vis): - self.executor.execute(self.current_vis, self) - widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON(self.current_vis, input_current_vis) - else: - widget_spec["current_vis"] = {} - widget_spec["recommendation"] = [] - - # Recommended Collection - recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) - widget_spec["recommendation"].extend(recCollection) - return widget_spec - - @staticmethod - def current_vis_to_JSON(vlist, input_current_vis=""): - current_vis_spec = {} - numVC = len(vlist) #number of visualizations in the vis list - if (numVC==1): - current_vis_spec = vlist[0].render_VSpec() - elif (numVC>1): - pass - return current_vis_spec - - @staticmethod - def rec_to_JSON(recs): - rec_lst = [] - import copy - rec_copy = copy.deepcopy(recs) - for idx,rec in enumerate(rec_copy): - if (len(rec["collection"])>0): - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.render_VSpec() - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete DataObjectCollection since not JSON serializable - del rec_lst[idx]["collection"] - return rec_lst - - # Overridden Pandas Functions - def head(self, n: int = 5): - self._prev = self - self._history.append_event("head", n=5) - return super(LuxDataFrame, self).head(n) - - def tail(self, n: int = 5): - self._prev = self - self._history.append_event("tail", n=5) - return super(LuxDataFrame, self).tail(n) - - def info(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("info",*args, **kwargs) - return super(LuxDataFrame, self).info(*args, **kwargs) - - def describe(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("describe",*args, **kwargs) - return super(LuxDataFrame, self).describe(*args, **kwargs) + """ + A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. + """ + + # MUST register here for new properties!! + _metadata = [ + "_intent", + "data_type_lookup", + "data_type", + "data_model_lookup", + "data_model", + "unique_values", + "cardinality", + "_rec_info", + "_pandas_only", + "_min_max", + "plot_config", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + ] + + def __init__(self, *args, **kw): + from lux.executor.PandasExecutor import PandasExecutor + + self._history = History() + self._intent = [] + self._recommendation = {} + self._saved_export = None + self._current_vis = [] + self._prev = None + super(LuxDataFrame, self).__init__(*args, **kw) + + self.executor_type = "Pandas" + self.executor = PandasExecutor() + self.SQLconnection = "" + self.table_name = "" + + self._sampled = None + self._default_pandas_display = True + self._toggle_pandas_display = True + self._plot_config = None + self._message = Message() + self._pandas_only = False + # Metadata + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + @property + def _constructor(self): + return LuxDataFrame + + # @property + # def _constructor_sliced(self): + # def f(*args, **kwargs): + # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 + # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') + # return f + @property + def history(self): + return self._history + + def maintain_metadata(self): + if ( + not hasattr(self, "_metadata_fresh") or not self._metadata_fresh + ): # Check that metadata has not yet been computed + if ( + len(self) > 0 + ): # only compute metadata information if the dataframe is non-empty + self.executor.compute_stats(self) + self.executor.compute_dataset_metadata(self) + self._infer_structure() + self._metadata_fresh = True + + def expire_recs(self): + self._recs_fresh = False + self.recommendation = {} + self.current_vis = None + self._widget = None + self._rec_info = None + self._sampled = None + + def expire_metadata(self): + # Set metadata as null + self._metadata_fresh = False + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + ##################### + ## Override Pandas ## + ##################### + def __getattr__(self, name): + ret_value = super(LuxDataFrame, self).__getattr__(name) + self.expire_metadata() + self.expire_recs() + return ret_value + + def _set_axis(self, axis, labels): + super(LuxDataFrame, self)._set_axis(axis, labels) + self.expire_metadata() + self.expire_recs() + + def _update_inplace(self, *args, **kwargs): + super(LuxDataFrame, self)._update_inplace(*args, **kwargs) + self.expire_metadata() + self.expire_recs() + + def _set_item(self, key, value): + super(LuxDataFrame, self)._set_item(key, value) + self.expire_metadata() + self.expire_recs() + + def _infer_structure(self): + # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data + is_multi_index_flag = self.index.nlevels != 1 + not_int_index_flag = self.index.dtype != "int64" + small_df_flag = len(self) < 100 + self.pre_aggregated = ( + is_multi_index_flag or not_int_index_flag + ) and small_df_flag + if "Number of Records" in self.columns: + self.pre_aggregated = True + very_small_df_flag = len(self) <= 10 + if very_small_df_flag: + self.pre_aggregated = True + + def set_executor_type(self, exe): + if exe == "SQL": + import pkgutil + + if pkgutil.find_loader("psycopg2") is None: + raise ImportError( + "psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection." + ) + else: + import psycopg2 + from lux.executor.SQLExecutor import SQLExecutor + + self.executor = SQLExecutor + else: + from lux.executor.PandasExecutor import PandasExecutor + + self.executor = PandasExecutor() + self.executor_type = exe + + @property + def plot_config(self): + return self._plot_config + + @plot_config.setter + def plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to all visualizations in the dataframe display + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + + Example + ---------- + Changing the color of marks and adding a title for all charts displayed for this dataframe + >>> df = pd.read_csv("lux/data/car.csv") + >>> def changeColorAddTitle(chart): + chart = chart.configure_mark(color="red") # change mark color to red + chart.title = "Custom Title" # add title to chart + return chart + >>> df.plot_config = changeColorAddTitle + >>> df + Change the opacity of all scatterplots displayed for this dataframe + >>> df = pd.read_csv("lux/data/olympic.csv") + >>> def changeOpacityScatterOnly(chart): + if chart.mark=='circle': + chart = chart.configure_mark(opacity=0.1) # lower opacity + return chart + >>> df.plot_config = changeOpacityScatterOnly + >>> df + """ + self._plot_config = config_func + self._recs_fresh = False + + def clear_plot_config(self): + self._plot_config = None + self._recs_fresh = False + + @property + def intent(self): + return self._intent + + @intent.setter + def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): + is_list_input = isinstance(intent_input, list) + is_vis_input = isinstance(intent_input, Vis) + if not (is_list_input or is_vis_input): + raise TypeError( + "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." + "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" + ) + if is_list_input: + self.set_intent(intent_input) + elif is_vis_input: + self.set_intent_as_vis(intent_input) + + def clear_intent(self): + self.intent = [] + + def set_intent(self, intent: List[Union[str, Clause]]): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/clause` + """ + self.expire_recs() + self._intent = intent + self._parse_validate_compile_intent() + + def _parse_validate_compile_intent(self): + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + + self._intent = Parser.parse(self._intent) + Validator.validate_intent(self._intent, self) + self.maintain_metadata() + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + def copy_intent(self): + # creates a true copy of the dataframe's intent + output = [] + for clause in self._intent: + temp_clause = clause.copy_clause() + output.append(temp_clause) + return output + + def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe as the Vis + + Parameters + ---------- + vis : Vis + """ + self.expire_recs() + self._intent = vis._inferred_intent + self._parse_validate_compile_intent() + + def to_pandas(self): + import lux.core + + return lux.core.originalDF(self, copy=False) + + @property + def recommendation(self): + return self._recommendation + + @recommendation.setter + def recommendation(self, recommendation: Dict): + self._recommendation = recommendation + + @property + def current_vis(self): + return self._current_vis + + @current_vis.setter + def current_vis(self, current_vis: Dict): + self._current_vis = current_vis + + def __repr__(self): + # TODO: _repr_ gets called from _repr_html, need to get rid of this call + return "" + + ####################################################### + ########## SQL Metadata, type, model schema ########### + ####################################################### + + def set_SQL_connection(self, connection, t_name): + self.SQLconnection = connection + self.table_name = t_name + self.compute_SQL_dataset_metadata() + self.set_executor_type("SQL") + + def compute_SQL_dataset_metadata(self): + self.get_SQL_attributes() + for attr in list(self.columns): + self[attr] = None + self.data_type_lookup = {} + self.data_type = {} + #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this + ##### in the initialization and do it just once + self.compute_SQL_data_type() + self.compute_SQL_stats() + self.data_model_lookup = {} + self.data_model = {} + self.compute_data_model() + + def compute_SQL_stats(self): + # precompute statistics + self.unique_values = {} + self._min_max = {} + + self.get_SQL_unique_values() + # self.get_SQL_cardinality() + for attribute in self.columns: + if self.data_type_lookup[attribute] == "quantitative": + self._min_max[attribute] = ( + self[attribute].min(), + self[attribute].max(), + ) + + def get_SQL_attributes(self): + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( + table_name + ) + attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + for attr in attributes: + self[attr] = None + + def get_SQL_cardinality(self): + cardinality = {} + for attr in list(self.columns): + card_query = pd.read_sql( + "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + cardinality[attr] = list(card_query["count"])[0] + self.cardinality = cardinality + + def get_SQL_unique_values(self): + unique_vals = {} + for attr in list(self.columns): + unique_query = pd.read_sql( + "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + unique_vals[attr] = list(unique_query[attr]) + self.unique_values = unique_vals + + def compute_SQL_data_type(self): + data_type_lookup = {} + sql_dtypes = {} + self.get_SQL_cardinality() + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + # get the data types of the attributes in the SQL table + for attr in list(self.columns): + datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( + table_name, attr + ) + datatype = list( + pd.read_sql(datatype_query, self.SQLconnection)["data_type"] + )[0] + sql_dtypes[attr] = datatype + + data_type = {"quantitative": [], "nominal": [], "temporal": []} + for attr in list(self.columns): + if str(attr).lower() in ["month", "year"]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + elif sql_dtypes[attr] in [ + "character", + "character varying", + "boolean", + "uuid", + "text", + ]: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + elif sql_dtypes[attr] in [ + "integer", + "real", + "smallint", + "smallserial", + "serial", + ]: + if self.cardinality[attr] < 13: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + else: + data_type_lookup[attr] = "quantitative" + data_type["quantitative"].append(attr) + elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + self.data_type_lookup = data_type_lookup + self.data_type = data_type + + def _append_rec(self, rec_infolist, recommendations: Dict): + if ( + recommendations["collection"] is not None + and len(recommendations["collection"]) > 0 + ): + rec_infolist.append(recommendations) + + def maintain_recs(self): + # `rec_df` is the dataframe to generate the recommendations on + # check to see if globally defined actions have been registered/removed + if lux.update_actions["flag"] == True: + self._recs_fresh = False + show_prev = False # flag indicating whether rec_df is showing previous df or current self + if self._prev is not None: + rec_df = self._prev + rec_df._message = Message() + rec_df.maintain_metadata() # the prev dataframe may not have been printed before + last_event = self.history._events[-1].name + rec_df._message.add( + f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." + ) + show_prev = True + else: + rec_df = self + rec_df._message = Message() + # Add warning message if there exist ID fields + id_fields_str = "" + if len(rec_df.data_type["id"]) > 0: + for id_field in rec_df.data_type["id"]: + id_fields_str += f"{id_field}, " + id_fields_str = id_fields_str[:-2] + rec_df._message.add( + f"{id_fields_str} is not visualized since it resembles an ID field." + ) + rec_df._prev = None # reset _prev + + if ( + not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh + ): # Check that recs has not yet been computed + rec_infolist = [] + from lux.action.custom import custom + from lux.action.custom import custom_actions + from lux.action.correlation import correlation + from lux.action.univariate import univariate + from lux.action.enhance import enhance + from lux.action.filter import filter + from lux.action.generalize import generalize + from lux.action.row_group import row_group + from lux.action.column_group import column_group + + if rec_df.pre_aggregated: + if rec_df.columns.name is not None: + rec_df._append_rec(rec_infolist, row_group(rec_df)) + if rec_df.index.name is not None: + rec_df._append_rec(rec_infolist, column_group(rec_df)) + else: + if self.recommendation == {}: + # display conditions for default actions + no_vis = lambda ldf: (ldf.current_vis is None) or ( + ldf.current_vis is not None and len(ldf.current_vis) == 0 + ) + one_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) == 1 + ) + multiple_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) > 1 + ) + + # globally register default actions + lux.register_action("correlation", correlation, no_vis) + lux.register_action( + "distribution", univariate, no_vis, "quantitative" + ) + lux.register_action("occurrence", univariate, no_vis, "nominal") + lux.register_action("temporal", univariate, no_vis, "temporal") + + lux.register_action("enhance", enhance, one_current_vis) + lux.register_action("filter", filter, one_current_vis) + lux.register_action("generalize", generalize, one_current_vis) + + lux.register_action("custom", custom, multiple_current_vis) + + # generate vis from globally registered actions and append to dataframe + custom_action_collection = custom_actions(rec_df) + for rec in custom_action_collection: + rec_df._append_rec(rec_infolist, rec) + lux.update_actions["flag"] = False + + # Store _rec_info into a more user-friendly dictionary form + rec_df.recommendation = {} + for rec_info in rec_infolist: + action_type = rec_info["action"] + vlist = rec_info["collection"] + if rec_df._plot_config: + if rec_df.current_vis: + for vis in rec_df.current_vis: + vis._plot_config = rec_df.plot_config + for vis in vlist: + vis._plot_config = rec_df.plot_config + if len(vlist) > 0: + rec_df.recommendation[action_type] = vlist + rec_df._rec_info = rec_infolist + self._widget = rec_df.render_widget() + elif ( + show_prev + ): # re-render widget for the current dataframe if previous rec is not recomputed + self._widget = rec_df.render_widget() + self._recs_fresh = True + + ####################################################### + ############## LuxWidget Result Display ############### + ####################################################### + @property + def widget(self): + if self._widget: + return self._widget + + @property + def exported(self) -> Union[Dict[str, VisList], VisList]: + """ + Get selected visualizations as exported Vis List + + Notes + ----- + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Correlation': [0, 2], 'Occurrence': [1]} + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + + Returns + ------- + Union[Dict[str,VisList], VisList] + When there are no exported vis, return empty list -> [] + When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) + When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} + """ + if not hasattr(self, "_widget"): + warnings.warn( + "\nNo widget attached to the dataframe." + "Please assign dataframe to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + exported_vis = [] + if exported_vis_lst == {}: + if self._saved_export: + return self._saved_export + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: + return self.current_vis + elif len(exported_vis_lst) > 1: + exported_vis = {} + if "currentVis" in exported_vis_lst: + exported_vis["Current Vis"] = self.current_vis + for export_action in exported_vis_lst: + if export_action != "currentVis": + exported_vis[export_action] = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + return exported_vis + elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): + export_action = list(exported_vis_lst.keys())[0] + exported_vis = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + self._saved_export = exported_vis + return exported_vis + else: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + + def remove_deleted_recs(self, change): + for action in self._widget.deletedIndices: + deletedSoFar = 0 + for index in self._widget.deletedIndices[action]: + self.recommendation[action].remove_index(index - deletedSoFar) + deletedSoFar += 1 + + def set_intent_on_click(self, change): + from IPython.display import display, clear_output + from lux.processor.Compiler import Compiler + + intent_action = list(self._widget.selectedIntentIndex.keys())[0] + vis = self.recommendation[intent_action][ + self._widget.selectedIntentIndex[intent_action][0] + ] + self.set_intent_as_vis(vis) + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + + with self.output: + clear_output() + display(self._widget) + + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + def _repr_html_(self): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + if self._pandas_only: + display(self.display_pandas()) + self._pandas_only = False + else: + if self.index.nlevels >= 2 or self.columns.nlevels >= 2: + warnings.warn( + "\nLux does not currently support dataframes " + "with hierarchical indexes.\n" + "Please convert the dataframe into a flat " + "table via `pandas.DataFrame.reset_index`.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + + if len(self) <= 0: + warnings.warn( + "\nLux can not operate on an empty dataframe.\nPlease check your input again.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + if len(self.columns) <= 1: + warnings.warn( + "\nLux defaults to Pandas when there is only a single column.", + stacklevel=2, + ) + display(self.display_pandas()) + return + self.maintain_metadata() + + if self._intent != [] and ( + not hasattr(self, "_compiled") or not self._compiled + ): + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + if lux.config.default_display == "lux": + self._toggle_pandas_display = False + else: + self._toggle_pandas_display = True + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() + + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe( + self.set_intent_on_click, names="selectedIntentIndex" + ) + + if len(self.recommendation) > 0: + # box = widgets.Box(layout=widgets.Layout(display='inline')) + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + # box.children = [button,output] + # output.children = [button] + # display(box) + display(button, self.output) + + def on_button_clicked(b): + with self.output: + if b: + self._toggle_pandas_display = ( + not self._toggle_pandas_display + ) + clear_output() + if self._toggle_pandas_display: + display(self.display_pandas()) + else: + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) + else: + warnings.warn( + "\nLux defaults to Pandas when there are no valid actions defined.", + stacklevel=2, + ) + display(self.display_pandas()) + + except (KeyboardInterrupt, SystemExit): + raise + except: + warnings.warn( + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n\n" + "Please report this issue on Github: https://github.com/lux-org/lux/issues ", + stacklevel=2, + ) + display(self.display_pandas()) + + def display_pandas(self): + return self.to_pandas() + + def render_widget(self, renderer: str = "altair", input_current_vis=""): + """ + Generate a LuxWidget based on the LuxDataFrame + + Structure of widgetJSON: + { + 'current_vis': {}, + 'recommendation': [ + { + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + ] + }, + ... repeat for other actions + ] + } + Parameters + ---------- + renderer : str, optional + Choice of visualization rendering library, by default "altair" + input_current_vis : lux.LuxDataFrame, optional + User-specified current vis to override default Current Vis, by default + """ + check_import_lux_widget() + import luxwidget + + widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + return luxwidget.LuxWidget( + currentVis=widgetJSON["current_vis"], + recommendations=widgetJSON["recommendation"], + intent=LuxDataFrame.intent_to_string(self._intent), + message=self._message.to_html(), + ) + + @staticmethod + def intent_to_JSON(intent): + from lux.utils import utils + + filter_specs = utils.get_filter_specs(intent) + attrs_specs = utils.get_attrs_specs(intent) + + intent = {} + intent["attributes"] = [clause.attribute for clause in attrs_specs] + intent["filters"] = [clause.attribute for clause in filter_specs] + return intent + + @staticmethod + def intent_to_string(intent): + if intent: + return ", ".join([clause.to_string() for clause in intent]) + else: + return "" + + def to_JSON(self, rec_infolist, input_current_vis=""): + widget_spec = {} + if self.current_vis: + self.executor.execute(self.current_vis, self) + widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( + self.current_vis, input_current_vis + ) + else: + widget_spec["current_vis"] = {} + widget_spec["recommendation"] = [] + + # Recommended Collection + recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) + widget_spec["recommendation"].extend(recCollection) + return widget_spec + + @staticmethod + def current_vis_to_JSON(vlist, input_current_vis=""): + current_vis_spec = {} + numVC = len(vlist) # number of visualizations in the vis list + if numVC == 1: + current_vis_spec = vlist[0].render_VSpec() + elif numVC > 1: + pass + return current_vis_spec + + @staticmethod + def rec_to_JSON(recs): + rec_lst = [] + import copy + + rec_copy = copy.deepcopy(recs) + for idx, rec in enumerate(rec_copy): + if len(rec["collection"]) > 0: + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.render_VSpec() + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete DataObjectCollection since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst + + # Overridden Pandas Functions + def head(self, n: int = 5): + self._prev = self + self._history.append_event("head", n=5) + return super(LuxDataFrame, self).head(n) + + def tail(self, n: int = 5): + self._prev = self + self._history.append_event("tail", n=5) + return super(LuxDataFrame, self).tail(n) + + def info(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("info", *args, **kwargs) + return super(LuxDataFrame, self).info(*args, **kwargs) + + def describe(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("describe", *args, **kwargs) + return super(LuxDataFrame, self).describe(*args, **kwargs) diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 3068d286..2692f72e 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -80,9 +80,9 @@ def create_vis(self, vis, standalone=True): chart = None if chart: + if vis.plot_config: + chart.chart = vis.plot_config(chart.chart) if self.output_type == "VegaLite": - if vis.plot_config: - chart.chart = vis.plot_config(chart.chart) chart_dict = chart.chart.to_dict() # this is a bit of a work around because altair must take a pandas dataframe and we can only generate a luxDataFrame # chart["data"] = { "values": vis.data.to_dict(orient='records') } @@ -92,20 +92,12 @@ def create_vis(self, vis, standalone=True): elif self.output_type == "Altair": import inspect - if (chart): - if (vis.plot_config): chart.chart = vis.plot_config(chart.chart) - if (self.output_type=="VegaLite"): - chart_dict = chart.chart.to_dict() - # this is a bit of a work around because altair must take a pandas dataframe and we can only generate a luxDataFrame - # chart["data"] = { "values": vis.data.to_dict(orient='records') } - # chart_dict["width"] = 160 - # chart_dict["height"] = 150 - return chart_dict - elif (self.output_type=="Altair"): - import inspect - if (vis.plot_config): chart.code +='\n'.join(inspect.getsource(vis.plot_config).split('\n ')[1:-1]) - chart.code +="\nchart" - chart.code = chart.code.replace('\n\t\t','\n') + if vis.plot_config: + chart.code += "\n".join( + inspect.getsource(vis.plot_config).split("\n ")[1:-1] + ) + chart.code += "\nchart" + chart.code = chart.code.replace("\n\t\t", "\n") var = vis._source if var is not None: diff --git a/tests/test_config.py b/tests/test_config.py index 09fb1522..adfd2655 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -143,39 +143,44 @@ def test_remove_invalid_action(): def test_remove_default_actions(): - df = pd.read_csv("lux/data/car.csv") - df._repr_html_() + df = pd.read_csv("lux/data/car.csv") + df._repr_html_() - lux.remove_action("Distribution") - df._repr_html_() - assert("Distribution" not in df.recommendation) + lux.remove_action("Distribution") + df._repr_html_() + assert "Distribution" not in df.recommendation - lux.remove_action("Occurrence") - df._repr_html_() - assert("Occurrence" not in df.recommendation) + lux.remove_action("Occurrence") + df._repr_html_() + assert "Occurrence" not in df.recommendation - lux.remove_action("Temporal") - df._repr_html_() - assert("Temporal" not in df.recommendation) + lux.remove_action("Temporal") + df._repr_html_() + assert "Temporal" not in df.recommendation - lux.remove_action("Correlation") - df._repr_html_() - assert("Correlation" not in df.recommendation) + lux.remove_action("Correlation") + df._repr_html_() + assert "Correlation" not in df.recommendation + + assert ( + len(df.recommendation) == 0, + "Default actions should not be rendered after it has been removed.", + ) - assert(len(df.recommendation) == 0, - "Default actions should not be rendered after it has been removed.") + df = register_new_action() + df.set_intent(["Acceleration", "Horsepower"]) + df._repr_html_() + assert ( + "bars" in df.recommendation, + "Bars should be rendered after it has been registered with correct intent.", + ) + assert len(df.recommendation["bars"]) > 0 - df = register_new_action() - df.set_intent(["Acceleration", "Horsepower"]) - df._repr_html_() - assert("bars" in df.recommendation, - "Bars should be rendered after it has been registered with correct intent.") - assert(len(df.recommendation["bars"]) > 0) -# TODO: This test does not pass in pytest but is working in Jupyter notebook. +# TODO: This test does not pass in pytest but is working in Jupyter notebook. # def test_plot_setting(): # df = pd.read_csv("lux/data/car.csv") -# df["Year"] = pd.to_datetime(df["Year"], format='%Y') +# df["Year"] = pd.to_datetime(df["Year"], format='%Y') # def change_color_add_title(chart): # chart = chart.configure_mark(color="green") # change mark color to green # chart.title = "Custom Title" # add title to chart From 4b51dd47a6e0104c3947e38576737659e80ad94a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sun, 1 Nov 2020 18:13:18 -0800 Subject: [PATCH 08/39] remove dev dependencies --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3370b7c0..b23c6009 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,6 @@ scipy>=1.3.3 altair>=4.0.0 pandas>=1.1.0 scikit-learn>=0.22 -Sphinx>=3.0.2 -sphinx-rtd-theme>=0.4.3 # Install only to use SQLExecutor # psycopg2>=2.8.5 # psycopg2-binary>=2.8.5 From 700a0bc5f3378b80cfd6197d959f4eb34c388c3a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 10 Nov 2020 23:09:38 -0800 Subject: [PATCH 09/39] first pass -- inline comments --- lux/action/column_group.py | 6 +- lux/action/correlation.py | 5 +- lux/action/enhance.py | 5 +- lux/action/filter.py | 4 +- lux/action/univariate.py | 10 ++-- lux/core/frame.py | 10 ++-- lux/executor/PandasExecutor.py | 12 ++-- lux/processor/Compiler.py | 63 +++++++++------------ lux/processor/Parser.py | 10 ++-- lux/processor/Validator.py | 5 +- lux/utils/date_utils.py | 18 +++--- lux/utils/utils.py | 5 +- lux/vis/Vis.py | 12 +--- lux/vis/VisList.py | 26 ++------- lux/vislib/altair/BarChart.py | 10 ++-- lux/vislib/altair/Heatmap.py | 5 +- lux/vislib/altair/ScatterChart.py | 5 +- tests/test_compiler.py | 35 +++++------- tests/test_dates.py | 9 +-- tests/test_error_warning.py | 3 - tests/test_executor.py | 18 +++--- tests/test_interestingness.py | 23 -------- tests/test_maintainence.py | 11 +--- tests/test_pandas_coverage.py | 91 +------------------------------ tests/test_vis.py | 10 ++-- 25 files changed, 111 insertions(+), 300 deletions(-) diff --git a/lux/action/column_group.py b/lux/action/column_group.py index 710cea95..049da68a 100644 --- a/lux/action/column_group.py +++ b/lux/action/column_group.py @@ -31,9 +31,9 @@ def column_group(ldf): ldf_flat = ldf if isinstance(ldf.columns, pd.DatetimeIndex): ldf_flat.columns = ldf_flat.columns.format() - ldf_flat = ( - ldf_flat.reset_index() - ) # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis + + # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis + ldf_flat = ldf_flat.reset_index() if ldf.index.nlevels == 1: if ldf.index.name: index_column_name = ldf.index.name diff --git a/lux/action/correlation.py b/lux/action/correlation.py index 5d51ba01..1a999e48 100644 --- a/lux/action/correlation.py +++ b/lux/action/correlation.py @@ -53,9 +53,8 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): "description": "Show relationships between two

quantitative

attributes.", } ignore_rec_flag = False - if ( - len(ldf) < 5 - ): # Doesn't make sense to compute correlation if less than 4 data values + # Doesn't make sense to compute correlation if less than 4 data values + if len(ldf) < 5: ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: diff --git a/lux/action/enhance.py b/lux/action/enhance.py index ffdc2423..fb889b11 100644 --- a/lux/action/enhance.py +++ b/lux/action/enhance.py @@ -53,9 +53,8 @@ def enhance(ldf): "action": "Enhance", "description": f"Further breaking down current {intended_attrs} intent by additional attribute.", } - elif ( - len(attr_specs) > 2 - ): # if there are too many column attributes, return don't generate Enhance recommendations + # if there are too many column attributes, return don't generate Enhance recommendations + elif len(attr_specs) > 2: recommendation = {"action": "Enhance"} recommendation["collection"] = [] return recommendation diff --git a/lux/action/filter.py b/lux/action/filter.py index f0972722..0f2c6037 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -86,8 +86,8 @@ def get_complementary_ops(fltr_op): new_spec.append(new_filter) temp_vis = Vis(new_spec, score=1) output.append(temp_vis) - - else: # if no existing filters, create filters using unique values from all categorical variables in the dataset + # if no existing filters, create filters using unique values from all categorical variables in the dataset + else: intended_attrs = ", ".join( [ clause.attribute diff --git a/lux/action/univariate.py b/lux/action/univariate.py index 4eb0157e..8f8cd1ac 100644 --- a/lux/action/univariate.py +++ b/lux/action/univariate.py @@ -58,9 +58,8 @@ def univariate(ldf, *args): "action": "Distribution", "description": "Show univariate histograms of

quantitative

attributes.", } - if ( - len(ldf) < 5 - ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) + # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) + if len(ldf) < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": intent = [lux.Clause("?", data_type="nominal")] @@ -76,9 +75,8 @@ def univariate(ldf, *args): "action": "Temporal", "description": "Show trends over

time-related

attributes.", } - if ( - len(ldf) < 3 - ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) + # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) + if len(ldf) < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] diff --git a/lux/core/frame.py b/lux/core/frame.py index 6e45621b..1dfa0d04 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -482,9 +482,8 @@ def maintain_recs(self): ) rec_df._prev = None # reset _prev - if ( - not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh - ): # Check that recs has not yet been computed + # Check that recs has not yet been computed + if not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh: rec_infolist = [] from lux.action.custom import custom from lux.action.custom import custom_actions @@ -550,9 +549,8 @@ def maintain_recs(self): rec_df.recommendation[action_type] = vlist rec_df._rec_info = rec_infolist self._widget = rec_df.render_widget() - elif ( - show_prev - ): # re-render widget for the current dataframe if previous rec is not recomputed + # re-render widget for the current dataframe if previous rec is not recomputed + elif show_prev: self._widget = rec_df.render_widget() self._recs_fresh = True diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 64fa2e54..d168cdd9 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -80,9 +80,8 @@ def execute(vislist: VisList, ldf: LuxDataFrame): """ PandasExecutor.execute_sampling(ldf) for vis in vislist: - vis._vis_data = ( - ldf._sampled - ) # The vis data starts off being original or sampled dataframe + # The vis data starts off being original or sampled dataframe + vis._vis_data = ldf._sampled filter_executed = PandasExecutor.execute_filter(vis) # Select relevant data based on attribute information attributes = set([]) @@ -220,9 +219,10 @@ def execute_aggregate(vis: Vis, isFiltered=True): ) == N_unique_vals * len( color_attr_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`." - vis._vis_data = vis.data.iloc[ - :, :3 - ] # Keep only the three relevant columns not the *_right columns resulting from merge + + # Keep only the three relevant columns not the *_right columns resulting from merge + vis._vis_data = vis.data.iloc[:, :3] + else: df = pd.DataFrame({columns[0]: attr_unique_vals}) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index 0635f2de..cf04e741 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -37,16 +37,13 @@ def __repr__(self): @staticmethod def compile_vis(ldf: LuxDataFrame, vis: Vis) -> VisList: if vis: - vis_collection = Compiler.populate_data_type_model( - ldf, [vis] - ) # autofill data type/model information - vis_collection = Compiler.remove_all_invalid( - vis_collection - ) # remove invalid visualizations from collection + # autofill data type/model information + vis_collection = Compiler.populate_data_type_model(ldf, [vis]) + # remove invalid visualizations from collection + vis_collection = Compiler.remove_all_invalid(vis_collection) for vis in vis_collection: - Compiler.determine_encoding( - ldf, vis - ) # autofill viz related information + # autofill viz related information + Compiler.determine_encoding(ldf, vis) ldf._compiled = True return vis_collection @@ -72,17 +69,14 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList """ if _inferred_intent: vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf) - vis_collection = Compiler.populate_data_type_model( - ldf, vis_collection - ) # autofill data type/model information + # autofill data type/model information + vis_collection = Compiler.populate_data_type_model(ldf, vis_collection) + # remove invalid visualizations from collection if len(vis_collection) >= 1: - vis_collection = Compiler.remove_all_invalid( - vis_collection - ) # remove invalid visualizations from collection + vis_collection = Compiler.remove_all_invalid(vis_collection) for vis in vis_collection: - Compiler.determine_encoding( - ldf, vis - ) # autofill viz related information + # autofill viz related information + Compiler.determine_encoding(ldf, vis) ldf._compiled = True return vis_collection @@ -121,9 +115,8 @@ def combine(col_attrs, accum): for i in range(n): column_list = copy.deepcopy(accum + [col_attrs[0][i]]) if last: - if ( - len(filters) > 0 - ): # if we have filters, generate combinations for each row. + # if we have filters, generate combinations for each row. + if len(filters) > 0: for row in filters: _inferred_intent = copy.deepcopy(column_list + [row]) vis = Vis(_inferred_intent) @@ -164,9 +157,8 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: if clause.description == "?": clause.description = "" # TODO: Note that "and not is_datetime_string(clause.attribute))" is a temporary hack and breaks the `test_row_column_group` example - if ( - clause.attribute != "" and clause.attribute != "Record" - ): # and not is_datetime_string(clause.attribute): + # and not is_datetime_string(clause.attribute): + if clause.attribute != "" and clause.attribute != "Record": if clause.data_type == "": clause.data_type = ldf.data_type_lookup[clause.attribute] if clause.data_type == "id": @@ -174,9 +166,8 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: if clause.data_model == "": clause.data_model = ldf.data_model_lookup[clause.attribute] if clause.value != "": - if ( - vis.title == "" - ): # If user provided title for Vis, then don't override. + # If user provided title for Vis, then don't override. + if vis.title == "": if isinstance(clause.value, np.datetime64): chart_title = date_utils.date_formatter(clause.value, ldf) else: @@ -303,10 +294,9 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause): dimension = d2 color_attr = d1 else: + # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one if d1.attribute == d2.attribute: - vis._inferred_intent.pop( - 0 - ) # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one + vis._inferred_intent.pop(0) else: vis.remove_column_from_spec(d2.attribute) dimension = d1 @@ -380,12 +370,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): ValueError Ensures no more than one attribute is placed in the same channel. """ - result_dict = ( - {} - ) # result of enforcing specified channel will be stored in result_dict - specified_dict = ( - {} - ) # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]} + # result of enforcing specified channel will be stored in result_dict + result_dict = {} + # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]} + specified_dict = {} # create a dictionary of specified channels in the given dobj for val in auto_channel.keys(): specified_dict[val] = vis.get_attr_by_channel(val) @@ -395,9 +383,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): if len(sAttr) == 1: # if specified in dobj # remove the specified channel from auto_channel (matching by value, since channel key may not be same) for i in list(auto_channel.keys()): + # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) if (auto_channel[i].attribute == sAttr[0].attribute) and ( auto_channel[i].channel == sVal - ): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) + ): auto_channel.pop(i) break sAttr[0].channel = sVal diff --git a/lux/processor/Parser.py b/lux/processor/Parser.py index c1852021..2e205704 100644 --- a/lux/processor/Parser.py +++ b/lux/processor/Parser.py @@ -54,9 +54,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: if isinstance(clause, list): valid_values = [] for v in clause: - if ( - type(v) is str - ): # and v in list(ldf.columns): #TODO: Move validation check to Validator + # and v in list(ldf.columns): #TODO: Move validation check to Validator + if type(v) is str: valid_values.append(v) temp_spec = Clause(attribute=valid_values) new_context.append(temp_spec) @@ -95,9 +94,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: if clause.description: # TODO: Move validation check to Validator # if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes - if any( - ext in [">", "<", "=", "!="] for ext in clause.description - ): # clause.description contain ">","<". or "=" + # clause.description contain ">","<". or "=" + if any(ext in [">", "<", "=", "!="] for ext in clause.description): # then parse it and assign to clause.attribute, clause.filter_op, clause.values clause.filter_op = re.findall( r"/.*/|>|=|<|>=|<=|!=", clause.description diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index 688a5f05..f01e7d42 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -85,9 +85,8 @@ def validate_clause(clause): else: vals = [clause.value] for val in vals: - if ( - val not in series.values - ): # (not series.str.contains(val).any()): + # (not series.str.contains(val).any()): + if val not in series.values: warnings.warn( f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." ) diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index eb067ea6..817e1ea8 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -40,9 +40,9 @@ def date_formatter(time_stamp, ldf): """ datetime = pd.to_datetime(time_stamp) if ldf.data_type["temporal"]: - date_column = ldf[ - ldf.data_type["temporal"][0] - ] # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future + # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future + date_column = ldf[ldf.data_type["temporal"][0]] + granularity = compute_date_granularity(date_column) date_str = "" if granularity == "year": @@ -80,16 +80,12 @@ def compute_date_granularity(date_column: pd.core.series.Series): field: str A str specifying the granularity of dates for the inspected temporal column """ - date_fields = [ - "day", - "month", - "year", - ] # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html) + # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html) + date_fields = ["day", "month", "year"] date_index = pd.DatetimeIndex(date_column) for field in date_fields: - if ( - hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1 - ): # can be changed to sum(getattr(date_index, field)) != 0 + # can be changed to sum(getattr(date_index, field)) != 0 + if hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1: return field return "year" # if none, then return year by default diff --git a/lux/utils/utils.py b/lux/utils/utils.py index 148509db..0c246597 100644 --- a/lux/utils/utils.py +++ b/lux/utils/utils.py @@ -69,9 +69,8 @@ def check_if_id_like(df, attribute): import re # Strong signals - high_cardinality = ( - df.cardinality[attribute] > 500 - ) # so that aggregated reset_index fields don't get misclassified + # so that aggregated reset_index fields don't get misclassified + high_cardinality = df.cardinality[attribute] > 500 attribute_contain_id = re.search(r"id", str(attribute)) is not None almost_all_vals_unique = df.cardinality[attribute] >= 0.98 * len(df) is_string = pd.api.types.is_string_dtype(df[attribute]) diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index ca73346c..a7883068 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -322,15 +322,12 @@ def refresh_source(self, ldf): # -> Vis: from lux.processor.Parser import Parser from lux.processor.Validator import Validator from lux.processor.Compiler import Compiler - from lux.executor.PandasExecutor import ( - PandasExecutor, - ) # TODO: temporary (generalize to executor) + from lux.executor.PandasExecutor import PandasExecutor + + # TODO: temporary (generalize to executor) -<<<<<<< HEAD -======= self.check_not_vislist_intent() ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 ldf.maintain_metadata() self._source = ldf self._inferred_intent = Parser.parse(self._intent) @@ -345,8 +342,6 @@ def refresh_source(self, ldf): # -> Vis: self._inferred_intent = vis._inferred_intent self._vis_data = vis.data self._min_max = vis._min_max -<<<<<<< HEAD -======= def check_not_vislist_intent(self): if len(self._intent) > 2 or "?" in self._intent: @@ -359,4 +354,3 @@ def check_not_vislist_intent(self): "The intent that you specified corresponds to more than one visualization. Please replace the Vis constructor with VisList to generate a list of visualizations. " + "For more information, see: https://lux-api.readthedocs.io/en/latest/source/guide/vis.html#working-with-collections-of-visualization-with-vislist" ) ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index 0f15252a..5fcfbd4d 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -63,19 +63,6 @@ def set_intent(self, intent: List[Clause]) -> None: @property def exported(self) -> VisList: """ -<<<<<<< HEAD - Get selected visualizations as exported Vis List - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Vis List': [0, 2]} - - Returns - ------- - VisList - return a VisList of selected visualizations. -> VisList(v1, v2...) -======= Get selected visualizations as exported Vis List Notes @@ -88,7 +75,6 @@ def exported(self) -> VisList: ------- VisList return a VisList of selected visualizations. -> VisList(v1, v2...) ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 """ if not hasattr(self, "widget"): warnings.warn( @@ -143,9 +129,8 @@ def __repr__(self): y_channel = "" largest_mark = 0 largest_filter = 0 - for ( - vis - ) in self._collection: # finds longest x attribute among all visualizations + # finds longest x attribute among all visualizations + for vis in self._collection: filter_intents = None for clause in vis._inferred_intent: if clause.value != "": @@ -177,11 +162,8 @@ def __repr__(self): vis_repr = [] largest_x_length = len(x_channel) largest_y_length = len(y_channel) - for ( - vis - ) in ( - self._collection - ): # pads the shorter visualizations with spaces before the y attribute + # pads the shorter visualizations with spaces before the y attribute + for vis in self._collection: filter_intents = None x_channel = "" y_channel = "" diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 561a23d4..5b7ecb57 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -120,14 +120,12 @@ def add_text(self): self.chart = self.chart + self.text self.code += self._topkcode - def encode_color( - self, - ): # override encode_color in AltairChart to enforce add_text occurs afterwards + # override encode_color in AltairChart to enforce add_text occurs afterwards + def encode_color(self): AltairChart.encode_color(self) self.add_text() - self.chart = self.chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding")) self.code += ( f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" ) diff --git a/lux/vislib/altair/Heatmap.py b/lux/vislib/altair/Heatmap.py index 56ae7276..87c97b13 100644 --- a/lux/vislib/altair/Heatmap.py +++ b/lux/vislib/altair/Heatmap.py @@ -66,9 +66,8 @@ def initialize_chart(self): ) ) chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1) - chart = chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding")) chart = chart.interactive() # Enable Zooming and Panning #################################### diff --git a/lux/vislib/altair/ScatterChart.py b/lux/vislib/altair/ScatterChart.py index a6463041..583291d0 100644 --- a/lux/vislib/altair/ScatterChart.py +++ b/lux/vislib/altair/ScatterChart.py @@ -59,9 +59,8 @@ def initialize_chart(self): ), ) ) - chart = chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding")) chart = chart.interactive() # Enable Zooming and Panning ##################################### diff --git a/tests/test_compiler.py b/tests/test_compiler.py index e2079ab6..760c742c 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -156,9 +156,8 @@ def test_sort_bar(): def test_specified_vis_collection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vlst = VisList( [ @@ -188,9 +187,8 @@ def test_specified_vis_collection(): def test_specified_channel_enforced_vis_collection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") visList = VisList( [lux.Clause(attribute="?"), lux.Clause(attribute="MilesPerGal", channel="x")], df, @@ -202,9 +200,8 @@ def test_specified_channel_enforced_vis_collection(): def test_autoencoding_scatter(): # No channel specified df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")], df) check_attribute_on_channel(vis, "MilesPerGal", "x") check_attribute_on_channel(vis, "Weight", "y") @@ -244,9 +241,8 @@ def test_autoencoding_scatter(): def test_autoencoding_histogram(): # No channel specified df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal", channel="y")], df) check_attribute_on_channel(vis, "MilesPerGal", "y") @@ -257,9 +253,8 @@ def test_autoencoding_histogram(): def test_autoencoding_line_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="Year"), lux.Clause(attribute="Acceleration")], df) check_attribute_on_channel(vis, "Year", "x") check_attribute_on_channel(vis, "Acceleration", "y") @@ -298,9 +293,8 @@ def test_autoencoding_line_chart(): def test_autoencoding_color_line_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute="Year"), lux.Clause(attribute="Acceleration"), @@ -314,9 +308,8 @@ def test_autoencoding_color_line_chart(): def test_autoencoding_color_scatter_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis( [ lux.Clause(attribute="Horsepower"), diff --git a/tests/test_dates.py b/tests/test_dates.py index 28cdcc3a..4b87f7a6 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -22,9 +22,8 @@ def test_dateformatter(): ldf = pd.read_csv("lux/data/car.csv") - ldf["Year"] = pd.to_datetime( - ldf["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") timestamp = np.datetime64("2019-08-26") ldf.maintain_metadata() assert date_utils.date_formatter(timestamp, ldf) == "2019" @@ -93,11 +92,7 @@ def test_period_to_altair(): exported_code = df.recommendation["Filter"][2].to_Altair() -<<<<<<< HEAD - assert "Year = 1971" in exported_code -======= assert "Year = 1972" in exported_code ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 def test_refresh_inplace(): diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index 238b7549..d5fe49ff 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -36,8 +36,6 @@ def test_bad_filter(): df[df["Region"] == "asdfgh"]._repr_html_() -<<<<<<< HEAD -======= def test_multi_vis(): df = pd.read_csv("lux/data/college.csv") with pytest.raises( @@ -61,7 +59,6 @@ def test_multi_vis(): )._repr_html_() ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 # Test Properties with Private Variables Readable but not Writable def test_vis_private_properties(): from lux.vis.Vis import Vis diff --git a/tests/test_executor.py b/tests/test_executor.py index 2dababb0..d1a18a6b 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -35,9 +35,8 @@ def test_lazy_execution(): def test_selection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute=["Horsepower", "Weight", "Acceleration"]), lux.Clause(attribute="Year"), @@ -102,9 +101,8 @@ def test_colored_line_chart(): from lux.vis.Vis import Clause df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") x_clause = Clause(attribute="Year", channel="x") y_clause = Clause(attribute="MilesPerGal", channel="y") @@ -116,19 +114,19 @@ def test_colored_line_chart(): color_cardinality = len(df.unique_values["Cylinders"]) group_by_cardinality = len(df.unique_values["Year"]) assert len(new_vis.data.columns) == 3 + # Not color_cardinality*group_by_cardinality since some combinations have 0 values assert ( len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values + ) def test_filter(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year"), diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index 48d04961..a42766ce 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -51,15 +51,9 @@ def test_interestingness_1_0_0(): if int(vis._inferred_intent[2].value) == 8: rank1 = f if int(vis._inferred_intent[2].value) == 6: -<<<<<<< HEAD - rank2 = f - if "1972" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): - rank3 = f -======= rank3 = f if "ford" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank2 = f ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -118,11 +112,7 @@ def test_interestingness_0_1_0(): rank1 = f if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "Europe": rank2 = f -<<<<<<< HEAD - if "1971" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): -======= if "1970" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -161,14 +151,7 @@ def test_interestingness_1_1_0(): if len(vis.get_attr_by_attr_name("Cylinders")) > 0: if int(vis._inferred_intent[2].value) == 6: rank1 = f -<<<<<<< HEAD - if int(vis._inferred_intent[2].value) == 5: - rank3 = f - if len(vis.get_attr_by_attr_name("Origin")) > 0: - if str(vis._inferred_intent[2].value) == "Europe": -======= if int(vis._inferred_intent[2].value) == 8: ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank2 = f if len(vis.get_attr_by_attr_name("Origin")) > 0: if str(vis._inferred_intent[2].value) == "Europe": @@ -278,15 +261,9 @@ def test_interestingness_0_2_0(): for f in range(0, len(df.recommendation["Filter"])): if "1973" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank1 = f -<<<<<<< HEAD - if "1976" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): - rank2 = f - if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "Europe": -======= if "ford" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank2 = f if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "USA": ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 diff --git a/tests/test_maintainence.py b/tests/test_maintainence.py index adb3697e..35f4ec71 100644 --- a/tests/test_maintainence.py +++ b/tests/test_maintainence.py @@ -62,7 +62,8 @@ def test_metadata_column_group_reset_df(): assert hasattr(df, "_metadata_fresh") result = df.groupby("Cylinders").mean() assert not hasattr(result, "_metadata_fresh") - result._repr_html_() # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) + # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) + result._repr_html_() assert ( result._metadata_fresh == True ), "Failed to maintain metadata after display df" @@ -77,20 +78,12 @@ def test_recs_inplace_operation(): df = pd.read_csv("lux/data/car.csv") df._repr_html_() assert df._recs_fresh == True, "Failed to maintain recommendation after display df" -<<<<<<< HEAD - assert len(df.recommendation["Occurrence"]) == 3 -======= assert len(df.recommendation["Occurrence"]) == 4 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df.drop(columns=["Name"], inplace=True) assert "Name" not in df.columns, "Failed to perform `drop` operation in-place" assert ( df._recs_fresh == False ), "Failed to maintain recommendation after in-place Pandas operation" df._repr_html_() -<<<<<<< HEAD - assert len(df.recommendation["Occurrence"]) == 2 -======= assert len(df.recommendation["Occurrence"]) == 3 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert df._recs_fresh == True, "Failed to maintain recommendation after display df" diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 561d086a..ad5008de 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -38,10 +38,8 @@ def test_rename_inplace(): df.rename(columns={"Name": "Car Name"}, inplace=True) df._repr_html_() new_df._repr_html_() - new_df, df = ( - df, - new_df, - ) # new_df is the old dataframe (df) with the new column name changed inplace + # new_df is the old dataframe (df) with the new column name changed inplace + new_df, df = df, new_df assert df.data_type_lookup != new_df.data_type_lookup @@ -111,10 +109,7 @@ def test_rename3(): "col7", "col8", "col9", -<<<<<<< HEAD -======= "col10", ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 ] df._repr_html_() assert list(df.recommendation.keys()) == [ @@ -123,11 +118,7 @@ def test_rename3(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 9 -======= assert len(df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert "col2" in list(df.cardinality.keys()) @@ -202,13 +193,8 @@ def test_query(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_pop(): df = pd.read_csv("lux/data/car.csv") @@ -221,13 +207,8 @@ def test_pop(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 8 -======= assert len(df.cardinality) == 9 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_transform(): df = pd.read_csv("lux/data/car.csv") @@ -235,13 +216,8 @@ def test_transform(): new_df = df.iloc[:, 1:].groupby("Origin").transform(sum) new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Correlation", "Occurrence"] -<<<<<<< HEAD - assert len(new_df.cardinality) == 6 -======= assert len(new_df.cardinality) == 7 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_get_group(): df = pd.read_csv("lux/data/car.csv") @@ -255,13 +231,8 @@ def test_get_group(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_applymap(): df = pd.read_csv("lux/data/car.csv") @@ -275,22 +246,11 @@ def test_applymap(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 9 -======= assert len(df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_strcat(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["combined"] = df["Origin"].str.cat(df["Brand"], sep=", ") df._repr_html_() @@ -304,13 +264,7 @@ def test_strcat(): def test_named_agg(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.groupby("Brand").agg( avg_weight=("Weight", "mean"), @@ -333,13 +287,8 @@ def test_change_dtype(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.data_type_lookup) == 9 -======= assert len(df.data_type_lookup) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_get_dummies(): df = pd.read_csv("lux/data/car.csv") @@ -352,13 +301,8 @@ def test_get_dummies(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.data_type_lookup) == 310 -======= assert len(new_df.data_type_lookup) == 339 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_drop(): df = pd.read_csv("lux/data/car.csv") @@ -372,13 +316,8 @@ def test_drop(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df2.cardinality) == 6 -======= assert len(new_df2.cardinality) == 7 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_merge(): df = pd.read_csv("lux/data/car.csv") @@ -392,13 +331,8 @@ def test_merge(): "Occurrence", "Temporal", ] # TODO once bug is fixed -<<<<<<< HEAD - assert len(new_df2.cardinality) == 10 -======= assert len(new_df2.cardinality) == 11 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_prefix(): df = pd.read_csv("lux/data/car.csv") @@ -411,22 +345,12 @@ def test_prefix(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert new_df.cardinality["1_Name"] == 300 def test_loc(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.loc[:, "Displacement":"Origin"] new_df._repr_html_() @@ -457,13 +381,7 @@ def test_loc(): def test_iloc(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 3:9] new_df._repr_html_() @@ -636,12 +554,7 @@ def test_value_counts(): def test_str_replace(): -<<<<<<< HEAD - url = "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - df = pd.read_csv(url) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Brand"].str.replace("chevrolet", "chevy") diff --git a/tests/test_vis.py b/tests/test_vis.py index 0f6f9eec..ff3b6f63 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -49,9 +49,8 @@ def test_vis_collection(): filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist) )[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" - assert ( - len(vlist) == len(df.columns) - 1 - 1 - ) # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute + # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute + assert len(vlist) == len(df.columns) - 1 - 1 vlist = VisList(["Height", "?"], df) assert len(vlist) == len(df.columns) - 1 # remove 1 for vis with for same attribute @@ -141,9 +140,8 @@ def test_vis_collection_via_list_of_vis(): "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" ) df = pd.read_csv(url) - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") from lux.vis.VisList import VisList from lux.vis.Vis import Vis From c8f2db54c3a984f5e2b60094b77047d53ffac2fa Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:26:36 -0800 Subject: [PATCH 10/39] _config/config.py --- lux/_config/config.py | 4 +- test.ipynb | 258 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 test.ipynb diff --git a/lux/_config/config.py b/lux/_config/config.py index 0c1e967f..809d89b1 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -115,9 +115,7 @@ def register_action( update_actions["flag"] = True -def remove_action( - name: str = "", -) -> None: +def remove_action(name: str = "") -> None: """ Removes the provided action globally in lux diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 00000000..27184612 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import lux" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(df.shape)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df.intent = []" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 4a361e151299f5c9587e89f8de477ae0e5793d23 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:27:20 -0800 Subject: [PATCH 11/39] delete test notebook --- test.ipynb | 258 ----------------------------------------------------- 1 file changed, 258 deletions(-) delete mode 100644 test.ipynb diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index 27184612..00000000 --- a/test.ipynb +++ /dev/null @@ -1,258 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import lux" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame([])\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(0, 0)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(df.shape)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df.intent = []" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 3aceabcbe4f15263f828d34d419c5cc2c2adfbfc Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:39:28 -0800 Subject: [PATCH 12/39] action --- lux/action/column_group.py | 4 +--- lux/action/correlation.py | 3 +-- lux/action/custom.py | 8 ++------ lux/action/enhance.py | 8 ++------ lux/action/filter.py | 4 +--- lux/action/generalize.py | 8 ++------ lux/action/similarity.py | 10 ++-------- 7 files changed, 11 insertions(+), 34 deletions(-) diff --git a/lux/action/column_group.py b/lux/action/column_group.py index 049da68a..29d33d92 100644 --- a/lux/action/column_group.py +++ b/lux/action/column_group.py @@ -51,9 +51,7 @@ def column_group(ldf): data_model="dimension", aggregation=None, ), - lux.Clause( - str(attribute), data_type="quantitative", aggregation=None - ), + lux.Clause(str(attribute), data_type="quantitative", aggregation=None), ] ) collection.append(vis) diff --git a/lux/action/correlation.py b/lux/action/correlation.py index 1a999e48..53cc8540 100644 --- a/lux/action/correlation.py +++ b/lux/action/correlation.py @@ -85,8 +85,7 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): def check_transpose_not_computed(vlist: VisList, a: str, b: str): transpose_exist = list( filter( - lambda x: (x._inferred_intent[0].attribute == b) - and (x._inferred_intent[1].attribute == a), + lambda x: (x._inferred_intent[0].attribute == b) and (x._inferred_intent[1].attribute == a), vlist, ) ) diff --git a/lux/action/custom.py b/lux/action/custom.py index c709d34b..72ece683 100644 --- a/lux/action/custom.py +++ b/lux/action/custom.py @@ -67,14 +67,10 @@ def custom_actions(ldf): recommendations = [] for action_name in lux.actions.__dir__(): display_condition = lux.actions.__getattr__(action_name).display_condition - if display_condition is None or ( - display_condition is not None and display_condition(ldf) - ): + if display_condition is None or (display_condition is not None and display_condition(ldf)): args = lux.actions.__getattr__(action_name).args if args: - recommendation = lux.actions.__getattr__(action_name).action( - ldf, args - ) + recommendation = lux.actions.__getattr__(action_name).action(ldf, args) else: recommendation = lux.actions.__getattr__(action_name).action(ldf) recommendations.append(recommendation) diff --git a/lux/action/enhance.py b/lux/action/enhance.py index fb889b11..a74bd452 100644 --- a/lux/action/enhance.py +++ b/lux/action/enhance.py @@ -35,14 +35,10 @@ def enhance(ldf): filters = utils.get_filter_specs(ldf._intent) # Collect variables that already exist in the intent - attr_specs = list( - filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent) - ) + attr_specs = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters] attr_str = [clause.attribute for clause in attr_specs] - intended_attrs = ( - '

' + ", ".join(attr_str + fltr_str) + "

" - ) + intended_attrs = '

' + ", ".join(attr_str + fltr_str) + "

" if len(attr_specs) == 1: recommendation = { "action": "Enhance", diff --git a/lux/action/filter.py b/lux/action/filter.py index 0f2c6037..891ad909 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -108,9 +108,7 @@ def get_complementary_ops(fltr_op): unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() - new_filter = lux.Clause( - attribute=cat, filter_op="=", value=unique_values[i] - ) + new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i]) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) diff --git a/lux/action/generalize.py b/lux/action/generalize.py index c6096cc0..d95bcb26 100644 --- a/lux/action/generalize.py +++ b/lux/action/generalize.py @@ -38,16 +38,12 @@ def generalize(ldf): output = [] excluded_columns = [] - attributes = list( - filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent) - ) + attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) filters = utils.get_filter_specs(ldf._intent) fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters] attr_str = [clause.attribute for clause in attributes] - intended_attrs = ( - '

' + ", ".join(attr_str + fltr_str) + "

" - ) + intended_attrs = '

' + ", ".join(attr_str + fltr_str) + "

" recommendation = { "action": "Generalize", diff --git a/lux/action/similarity.py b/lux/action/similarity.py index c9871cbc..174a4d43 100644 --- a/lux/action/similarity.py +++ b/lux/action/similarity.py @@ -80,12 +80,7 @@ def aggregate(vis): xAxis = vis.get_attr_by_channel("x")[0].attribute yAxis = vis.get_attr_by_channel("y")[0].attribute - vis.data = ( - vis.data[[xAxis, yAxis]] - .groupby(xAxis, as_index=False) - .agg({yAxis: "mean"}) - .copy() - ) + vis.data = vis.data[[xAxis, yAxis]].groupby(xAxis, as_index=False).agg({yAxis: "mean"}).copy() def interpolate(vis, length): @@ -133,8 +128,7 @@ def interpolate(vis, length): x_diff = xVals[count] - xVals[count - 1] yDiff = yVals[count] - yVals[count - 1] interpolated_y_vals[i] = ( - yVals[count - 1] - + (interpolated_x - xVals[count - 1]) / x_diff * yDiff + yVals[count - 1] + (interpolated_x - xVals[count - 1]) / x_diff * yDiff ) vis.data = pd.DataFrame( list(zip(interpolated_x_vals, interpolated_y_vals)), From 1e7e03b2a84b2fea1bca4de1ed616d354cc68139 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:49:45 -0800 Subject: [PATCH 13/39] line length 105 --- lux/core/frame.py | 53 ++++++--------------- lux/executor/PandasExecutor.py | 57 ++++++----------------- lux/executor/SQLExecutor.py | 64 ++++++++++---------------- lux/interestingness/interestingness.py | 28 +++-------- lux/processor/Compiler.py | 28 +++-------- lux/processor/Parser.py | 4 +- lux/processor/Validator.py | 13 ++---- lux/utils/date_utils.py | 8 +--- lux/utils/message.py | 4 +- lux/utils/utils.py | 4 +- lux/vis/Vis.py | 31 ++++--------- lux/vis/VisList.py | 33 ++++--------- lux/vislib/altair/AltairChart.py | 22 ++++----- lux/vislib/altair/AltairRenderer.py | 12 ++--- lux/vislib/altair/BarChart.py | 20 ++++---- lux/vislib/altair/Histogram.py | 5 +- lux/vislib/altair/LineChart.py | 20 ++++---- tests/test_action.py | 11 ++--- tests/test_compiler.py | 8 +--- tests/test_dates.py | 20 ++------ tests/test_error_warning.py | 4 +- tests/test_executor.py | 31 +++---------- tests/test_interestingness.py | 25 ++++------ tests/test_maintainence.py | 12 ++--- tests/test_pandas_coverage.py | 52 ++++++--------------- tests/test_type.py | 14 ++---- tests/test_vis.py | 39 ++++------------ 27 files changed, 179 insertions(+), 443 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 1dfa0d04..080c0294 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -102,12 +102,10 @@ def history(self): return self._history def maintain_metadata(self): - if ( - not hasattr(self, "_metadata_fresh") or not self._metadata_fresh - ): # Check that metadata has not yet been computed - if ( - len(self) > 0 - ): # only compute metadata information if the dataframe is non-empty + # Check that metadata has not yet been computed + if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh: + # only compute metadata information if the dataframe is non-empty + if len(self) > 0: self.executor.compute_stats(self) self.executor.compute_dataset_metadata(self) self._infer_structure() @@ -162,9 +160,7 @@ def _infer_structure(self): is_multi_index_flag = self.index.nlevels != 1 not_int_index_flag = self.index.dtype != "int64" small_df_flag = len(self) < 100 - self.pre_aggregated = ( - is_multi_index_flag or not_int_index_flag - ) and small_df_flag + self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag if "Number of Records" in self.columns: self.pre_aggregated = True very_small_df_flag = len(self) <= 10 @@ -408,9 +404,7 @@ def compute_SQL_data_type(self): datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( table_name, attr ) - datatype = list( - pd.read_sql(datatype_query, self.SQLconnection)["data_type"] - )[0] + datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype data_type = {"quantitative": [], "nominal": [], "temporal": []} @@ -447,10 +441,7 @@ def compute_SQL_data_type(self): self.data_type = data_type def _append_rec(self, rec_infolist, recommendations: Dict): - if ( - recommendations["collection"] is not None - and len(recommendations["collection"]) > 0 - ): + if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: rec_infolist.append(recommendations) def maintain_recs(self): @@ -477,9 +468,7 @@ def maintain_recs(self): for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " id_fields_str = id_fields_str[:-2] - rec_df._message.add( - f"{id_fields_str} is not visualized since it resembles an ID field." - ) + rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") rec_df._prev = None # reset _prev # Check that recs has not yet been computed @@ -506,19 +495,15 @@ def maintain_recs(self): ldf.current_vis is not None and len(ldf.current_vis) == 0 ) one_current_vis = ( - lambda ldf: ldf.current_vis is not None - and len(ldf.current_vis) == 1 + lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 ) multiple_current_vis = ( - lambda ldf: ldf.current_vis is not None - and len(ldf.current_vis) > 1 + lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 ) # globally register default actions lux.register_action("correlation", correlation, no_vis) - lux.register_action( - "distribution", univariate, no_vis, "quantitative" - ) + lux.register_action("distribution", univariate, no_vis, "quantitative") lux.register_action("occurrence", univariate, no_vis, "nominal") lux.register_action("temporal", univariate, no_vis, "temporal") @@ -649,9 +634,7 @@ def set_intent_on_click(self, change): from lux.processor.Compiler import Compiler intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][ - self._widget.selectedIntentIndex[intent_action][0] - ] + vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] self.set_intent_as_vis(vis) self.maintain_metadata() @@ -702,9 +685,7 @@ def _repr_html_(self): return self.maintain_metadata() - if self._intent != [] and ( - not hasattr(self, "_compiled") or not self._compiled - ): + if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): from lux.processor.Compiler import Compiler self.current_vis = Compiler.compile_intent(self, self._intent) @@ -719,9 +700,7 @@ def _repr_html_(self): # Observers(callback_function, listen_to_this_variable) self._widget.observe(self.remove_deleted_recs, names="deletedIndices") - self._widget.observe( - self.set_intent_on_click, names="selectedIntentIndex" - ) + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") if len(self.recommendation) > 0: # box = widgets.Box(layout=widgets.Layout(display='inline')) @@ -738,9 +717,7 @@ def _repr_html_(self): def on_button_clicked(b): with self.output: if b: - self._toggle_pandas_display = ( - not self._toggle_pandas_display - ) + self._toggle_pandas_display = not self._toggle_pandas_display clear_output() if self._toggle_pandas_display: display(self.display_pandas()) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index d168cdd9..97bed87c 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -162,21 +162,15 @@ def execute_aggregate(vis: Vis, isFiltered=True): .reset_index() ) vis._vis_data = vis.data.rename(columns={"index": "Record"}) - vis._vis_data = vis.data[ - [groupby_attr.attribute, color_attr.attribute, "Record"] - ] + vis._vis_data = vis.data[[groupby_attr.attribute, color_attr.attribute, "Record"]] else: - vis._vis_data = ( - vis.data.groupby(groupby_attr.attribute).count().reset_index() - ) + vis._vis_data = vis.data.groupby(groupby_attr.attribute).count().reset_index() vis._vis_data = vis.data.rename(columns={"index": "Record"}) vis._vis_data = vis.data[[groupby_attr.attribute, "Record"]] else: # if color is specified, need to group by groupby_attr and color_attr if has_color: - groupby_result = vis.data.groupby( - [groupby_attr.attribute, color_attr.attribute] - ) + groupby_result = vis.data.groupby([groupby_attr.attribute, color_attr.attribute]) else: groupby_result = vis.data.groupby(groupby_attr.attribute) groupby_result = groupby_result.agg(agg_func) @@ -199,9 +193,7 @@ def execute_aggregate(vis: Vis, isFiltered=True): df = pd.DataFrame( { columns[0]: attr_unique_vals * color_cardinality, - columns[1]: pd.Series(color_attr_vals).repeat( - N_unique_vals - ), + columns[1]: pd.Series(color_attr_vals).repeat(N_unique_vals), } ) vis._vis_data = vis.data.merge( @@ -211,12 +203,8 @@ def execute_aggregate(vis: Vis, isFiltered=True): suffixes=["", "_right"], ) for col in columns[2:]: - vis.data[col] = vis.data[col].fillna( - 0 - ) # Triggers __setitem__ - assert len( - list(vis.data[groupby_attr.attribute]) - ) == N_unique_vals * len( + vis.data[col] = vis.data[col].fillna(0) # Triggers __setitem__ + assert len(list(vis.data[groupby_attr.attribute])) == N_unique_vals * len( color_attr_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`." @@ -235,9 +223,7 @@ def execute_aggregate(vis: Vis, isFiltered=True): assert ( len(list(vis.data[groupby_attr.attribute])) == N_unique_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute}`." - vis._vis_data = vis.data.sort_values( - by=groupby_attr.attribute, ascending=True - ) + vis._vis_data = vis.data.sort_values(by=groupby_attr.attribute, ascending=True) vis._vis_data = vis.data.reset_index() vis._vis_data = vis.data.drop(columns="index") @@ -292,9 +278,7 @@ def execute_filter(vis: Vis): return False @staticmethod - def apply_filter( - df: pd.DataFrame, attribute: str, op: str, val: object - ) -> pd.DataFrame: + def apply_filter(df: pd.DataFrame, attribute: str, op: str, val: object) -> pd.DataFrame: """ Helper function for applying filter to a dataframe @@ -335,12 +319,8 @@ def execute_2D_binning(vis: Vis): x_attr = vis.get_attr_by_channel("x")[0] y_attr = vis.get_attr_by_channel("y")[0] - vis._vis_data.loc[:, "xBin"] = pd.cut( - vis._vis_data[x_attr.attribute], bins=40 - ) - vis._vis_data.loc[:, "yBin"] = pd.cut( - vis._vis_data[y_attr.attribute], bins=40 - ) + vis._vis_data.loc[:, "xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=40) + vis._vis_data.loc[:, "yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=40) color_attr = vis.get_attr_by_channel("color") if len(color_attr) > 0: @@ -369,14 +349,10 @@ def execute_2D_binning(vis: Vis): result = result[result["count"] != 0] # convert type to facilitate weighted correlation interestingess calculation - result.loc[:, "xBinStart"] = ( - result["xBin"].apply(lambda x: x.left).astype("float") - ) + result.loc[:, "xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") result.loc[:, "xBinEnd"] = result["xBin"].apply(lambda x: x.right) - result.loc[:, "yBinStart"] = ( - result["yBin"].apply(lambda x: x.left).astype("float") - ) + result.loc[:, "yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") result.loc[:, "yBinEnd"] = result["yBin"].apply(lambda x: x.right) vis._vis_data = result.drop(columns=["xBin", "yBin"]) @@ -408,10 +384,7 @@ def compute_data_type(self, ldf: LuxDataFrame): if ldf.pre_aggregated: if ldf.cardinality[attr] == len(ldf): ldf.data_type_lookup[attr] = "nominal" - if ( - ldf.cardinality[attr] / len(ldf) < 0.4 - and ldf.cardinality[attr] < 20 - ): + if ldf.cardinality[attr] / len(ldf) < 0.4 and ldf.cardinality[attr] < 20: ldf.data_type_lookup[attr] = "nominal" else: ldf.data_type_lookup[attr] = "quantitative" @@ -463,9 +436,7 @@ def compute_data_type(self, ldf: LuxDataFrame): def compute_data_model(self, ldf: LuxDataFrame): ldf.data_model = { "measure": ldf.data_type["quantitative"], - "dimension": ldf.data_type["nominal"] - + ldf.data_type["temporal"] - + ldf.data_type["id"], + "dimension": ldf.data_type["nominal"] + ldf.data_type["temporal"] + ldf.data_type["id"], } ldf.data_model_lookup = self.reverseMapping(ldf.data_model) diff --git a/lux/executor/SQLExecutor.py b/lux/executor/SQLExecutor.py index 2cca392d..c3978975 100644 --- a/lux/executor/SQLExecutor.py +++ b/lux/executor/SQLExecutor.py @@ -60,9 +60,7 @@ def execute(vislist: VisList, ldf: LuxDataFrame): required_variables = ",".join(required_variables) row_count = list( pd.read_sql( - "SELECT COUNT(*) FROM {} {}".format( - ldf.table_name, where_clause - ), + "SELECT COUNT(*) FROM {} {}".format(ldf.table_name, where_clause), ldf.SQLconnection, )["count"] )[0] @@ -116,41 +114,35 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): else: where_clause, filterVars = SQLExecutor.execute_filter(vis) if agg_func == "mean": - mean_query = ( - "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "sum": - mean_query = ( - "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "max": - mean_query = ( - "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -162,9 +154,7 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints for vals in all_attr_vals: if vals not in result_vals: - vis.data.loc[len(vis.data)] = [vals] + [0] * ( - len(vis.data.columns) - 1 - ) + vis.data.loc[len(vis.data)] = [vals] + [0] * (len(vis.data.columns) - 1) @staticmethod def execute_binning(vis: Vis, ldf: LuxDataFrame): @@ -200,9 +190,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): # binEdges of size N+1, so need to compute binCenter as the bin location upper_edges = [float(i) for i in upper_edges.split(",")] if attr_type == int: - bin_centers = np.array( - [math.ceil((attr_min + attr_min + bin_width) / 2)] - ) + bin_centers = np.array([math.ceil((attr_min + attr_min + bin_width) / 2)]) else: bin_centers = np.array([(attr_min + attr_min + bin_width) / 2]) bin_centers = np.append( @@ -215,9 +203,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): math.ceil((upper_edges[len(upper_edges) - 1] + attr_max) / 2), ) else: - bin_centers = np.append( - bin_centers, (upper_edges[len(upper_edges) - 1] + attr_max) / 2 - ) + bin_centers = np.append(bin_centers, (upper_edges[len(upper_edges) - 1] + attr_max) / 2) if len(bin_centers) > len(bin_count_data): bucket_lables = bin_count_data["width_bucket"].unique() diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py index 9d175583..f70a658b 100644 --- a/lux/interestingness/interestingness.py +++ b/lux/interestingness/interestingness.py @@ -75,9 +75,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: if n_filter == 0: return unevenness(vis, ldf, measure_lst, dimension_lst) elif n_filter == 1: - return deviation_from_overall( - vis, ldf, filter_specs, measure_lst[0].attribute - ) + return deviation_from_overall(vis, ldf, filter_specs, measure_lst[0].attribute) # Histogram elif n_dim == 0 and n_msr == 1: if v_size < 2: @@ -94,9 +92,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: if v_size < 10: return -1 if vis.mark == "heatmap": - return weighted_correlation( - vis.data["xBinStart"], vis.data["yBinStart"], vis.data["count"] - ) + return weighted_correlation(vis.data["xBinStart"], vis.data["yBinStart"], vis.data["count"]) if n_filter == 1: v_filter_size = get_filtered_size(filter_specs, vis.data) sig = v_filter_size / v_size @@ -139,9 +135,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: groupby_unique_vals = ldf.unique_values[groupby_column] for c in range(0, groupby_cardinality): contingency_table.append( - vis.data[vis.data[groupby_column] == groupby_unique_vals[c]][ - measure_column - ] + vis.data[vis.data[groupby_column] == groupby_unique_vals[c]][measure_column] ) score = 0.12 # ValueError results if an entire column of the contingency table is 0, can happen if an applied filter results in @@ -186,14 +180,10 @@ def weighted_cov(x, y, w): def weighted_correlation(x, y, w): # Based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Weighted_correlation_coefficient - return weighted_cov(x, y, w) / np.sqrt( - weighted_cov(x, x, w) * weighted_cov(y, y, w) - ) + return weighted_cov(x, y, w) / np.sqrt(weighted_cov(x, x, w) * weighted_cov(y, y, w)) -def deviation_from_overall( - vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_attribute: str -) -> int: +def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_attribute: str) -> int: """ Difference in bar chart/histogram shape from overall chart Note: this function assumes that the filtered vis.data is operating on the same range as the unfiltered vis.data. @@ -230,9 +220,7 @@ def deviation_from_overall( v = unfiltered_vis.data[msr_attribute] v = v / v.sum() - assert len(v) == len( - v_filter - ), "Data for filtered and unfiltered vis have unequal length." + assert len(v) == len(v_filter), "Data for filtered and unfiltered vis have unequal length." sig = v_filter_size / v_size # significance factor # Euclidean distance as L2 function @@ -257,9 +245,7 @@ def deviation_from_overall( return sig * rankSig * euclidean(v, v_filter) -def unevenness( - vis: Vis, ldf: LuxDataFrame, measure_lst: list, dimension_lst: list -) -> int: +def unevenness(vis: Vis, ldf: LuxDataFrame, measure_lst: list, dimension_lst: list) -> int: """ Measure the unevenness of a bar chart vis. If a bar chart is highly uneven across the possible values, then it may be interesting. (e.g., USA produces lots of cars compared to Japan and Europe) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index cf04e741..aa02af6d 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -81,9 +81,7 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList return vis_collection @staticmethod - def enumerate_collection( - _inferred_intent: List[Clause], ldf: LuxDataFrame - ) -> VisList: + def enumerate_collection(_inferred_intent: List[Clause], ldf: LuxDataFrame) -> VisList: """ Given specifications that have been expanded thorught populateOptions, recursively iterate over the resulting list combinations to generate a vis list. @@ -172,9 +170,7 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: chart_title = date_utils.date_formatter(clause.value, ldf) else: chart_title = clause.value - vis.title = ( - f"{clause.attribute} {clause.filter_op} {chart_title}" - ) + vis.title = f"{clause.attribute} {clause.filter_op} {chart_title}" return vlist @staticmethod @@ -335,9 +331,7 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause): "y": vis._inferred_intent[1], "color": vis._inferred_intent[2], } - relevant_attributes = [ - auto_channel[channel].attribute for channel in auto_channel - ] + relevant_attributes = [auto_channel[channel].attribute for channel in auto_channel] relevant_min_max = dict( (attr, ldf._min_max[attr]) for attr in relevant_attributes @@ -399,9 +393,7 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): # and the leftovers in the auto_channel specification, # step through them together and fill it automatically. leftover_channels = list(filter(lambda x: result_dict[x] == "", result_dict)) - for leftover_channel, leftover_encoding in zip( - leftover_channels, auto_channel.values() - ): + for leftover_channel, leftover_encoding in zip(leftover_channels, auto_channel.values()): leftover_encoding.channel = leftover_channel result_dict[leftover_channel] = leftover_encoding vis._inferred_intent = list(result_dict.values()) @@ -409,9 +401,7 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): @staticmethod # def populate_wildcard_options(ldf: LuxDataFrame) -> dict: - def populate_wildcard_options( - _inferred_intent: List[Clause], ldf: LuxDataFrame - ) -> dict: + def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) -> dict: """ Given wildcards and constraints in the LuxDataFrame's intent, return the list of available values that satisfies the data_type or data_model constraints. @@ -436,13 +426,9 @@ def populate_wildcard_options( if clause.attribute == "?": options = set(list(ldf.columns)) # all attributes if clause.data_type != "": - options = options.intersection( - set(ldf.data_type[clause.data_type]) - ) + options = options.intersection(set(ldf.data_type[clause.data_type])) if clause.data_model != "": - options = options.intersection( - set(ldf.data_model[clause.data_model]) - ) + options = options.intersection(set(ldf.data_model[clause.data_model])) options = list(options) else: options = convert_to_list(clause.attribute) diff --git a/lux/processor/Parser.py b/lux/processor/Parser.py index 2e205704..065d420e 100644 --- a/lux/processor/Parser.py +++ b/lux/processor/Parser.py @@ -97,9 +97,7 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: # clause.description contain ">","<". or "=" if any(ext in [">", "<", "=", "!="] for ext in clause.description): # then parse it and assign to clause.attribute, clause.filter_op, clause.values - clause.filter_op = re.findall( - r"/.*/|>|=|<|>=|<=|!=", clause.description - )[0] + clause.filter_op = re.findall(r"/.*/|>|=|<|>=|<=|!=", clause.description)[0] split_description = clause.description.split(clause.filter_op) clause.attribute = split_description[0] clause.value = split_description[1] diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index f01e7d42..a497045a 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -54,8 +54,7 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None: def validate_clause(clause): if not ( - (clause.attribute and clause.attribute == "?") - or (clause.value and clause.value == "?") + (clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?") ): if isinstance(clause.attribute, list): for attr in clause.attribute: @@ -66,18 +65,12 @@ def validate_clause(clause): else: if clause.attribute != "Record": # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation - if clause.attribute and not is_datetime_string( - clause.attribute - ): + if clause.attribute and not is_datetime_string(clause.attribute): if not clause.attribute in list(ldf.columns): warnings.warn( f"The input attribute '{clause.attribute}' does not exist in the DataFrame." ) - if ( - clause.value - and clause.attribute - and clause.filter_op == "=" - ): + if clause.value and clause.attribute and clause.filter_op == "=": series = ldf[clause.attribute] if not is_datetime_series(series): if isinstance(clause.value, list): diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index 817e1ea8..d3ed03ae 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -50,9 +50,7 @@ def date_formatter(time_stamp, ldf): elif granularity == "month": date_str += str(datetime.year) + "-" + str(datetime.month) elif granularity == "day": - date_str += ( - str(datetime.year) + "-" + str(datetime.month) + "-" + str(datetime.day) - ) + date_str += str(datetime.year) + "-" + str(datetime.month) + "-" + str(datetime.day) else: # non supported granularity return datetime.date() @@ -103,9 +101,7 @@ def is_datetime_series(series: pd.Series) -> bool: ------- is_date: bool """ - return pd.api.types.is_datetime64_any_dtype(series) or pd.api.types.is_period_dtype( - series - ) + return pd.api.types.is_datetime64_any_dtype(series) or pd.api.types.is_period_dtype(series) def is_datetime_string(string: str) -> bool: diff --git a/lux/utils/message.py b/lux/utils/message.py index 638fd581..04d1cc37 100644 --- a/lux/utils/message.py +++ b/lux/utils/message.py @@ -29,9 +29,7 @@ def to_html(self): if len(self.messages) == 0: return "" else: - sorted_msgs = sorted( - self.messages, key=lambda i: i["priority"], reverse=True - ) + sorted_msgs = sorted(self.messages, key=lambda i: i["priority"], reverse=True) html = "
    " for msg in sorted_msgs: msgTxt = msg["text"] diff --git a/lux/utils/utils.py b/lux/utils/utils.py index 0c246597..4c289b65 100644 --- a/lux/utils/utils.py +++ b/lux/utils/utils.py @@ -80,9 +80,7 @@ def check_if_id_like(df, attribute): sampled = df[attribute].sample(50, random_state=99) else: sampled = df[attribute] - str_length_uniformity = ( - sampled.apply(lambda x: type(x) == str and len(x)).std() < 3 - ) + str_length_uniformity = sampled.apply(lambda x: type(x) == str and len(x)).std() < 3 return ( high_cardinality and (attribute_contain_id or almost_all_vals_unique) diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index a7883068..66ef85f4 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -27,7 +27,9 @@ def __init__(self, intent, source=None, title="", score=0.0): self._intent = intent # This is the user's original intent to Vis self._inferred_intent = intent # This is the re-written, expanded version of user's original intent (include inferred vis info) self._source = source # This is the original data that is attached to the Vis - self._vis_data = None # This is the data that represents the Vis (e.g., selected, aggregated, binned) + self._vis_data = ( + None # This is the data that represents the Vis (e.g., selected, aggregated, binned) + ) self._code = None self._mark = "" self._min_max = {} @@ -39,9 +41,7 @@ def __init__(self, intent, source=None, title="", score=0.0): def __repr__(self): if self._source is None: - return ( - f"" - ) + return f"" filter_intents = None channels, additional_channels = [], [] for clause in self._inferred_intent: @@ -52,12 +52,7 @@ def __repr__(self): if hasattr(clause, "attribute"): if clause.attribute != "": if clause.aggregation != "" and clause.aggregation is not None: - attribute = ( - clause._aggregation_name.upper() - + "(" - + clause.attribute - + ")" - ) + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -77,9 +72,7 @@ def __repr__(self): if filter_intents: return f"" else: - return ( - f"" - ) + return f"" @property def data(self): @@ -164,9 +157,7 @@ def get_attr_by_attr_name(self, attr_name): def get_attr_by_channel(self, channel): spec_obj = list( filter( - lambda x: x.channel == channel and x.value == "" - if hasattr(x, "channel") - else False, + lambda x: x.channel == channel and x.value == "" if hasattr(x, "channel") else False, self._inferred_intent, ) ) @@ -195,9 +186,7 @@ def get_attr_by_data_model(self, dmodel, exclude_record=False): def get_attr_by_data_type(self, dtype): return list( filter( - lambda x: x.data_type == dtype and x.value == "" - if hasattr(x, "data_type") - else False, + lambda x: x.data_type == dtype and x.value == "" if hasattr(x, "data_type") else False, self._inferred_intent, ) ) @@ -218,9 +207,7 @@ def remove_column_from_spec(self, attribute, remove_first: bool = False): Boolean flag to determine whether to remove all instances of the attribute or only one (first) instance, by default False """ if not remove_first: - new_inferred = list( - filter(lambda x: x.attribute != attribute, self._inferred_intent) - ) + new_inferred = list(filter(lambda x: x.attribute != attribute, self._inferred_intent)) self._inferred_intent = new_inferred self._intent = new_inferred elif remove_first: diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index 5fcfbd4d..c25495d9 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -93,9 +93,7 @@ def exported(self) -> VisList: ) return [] else: - exported_vis = VisList( - list(map(self.__getitem__, exported_vis_lst["Vis List"])) - ) + exported_vis = VisList(list(map(self.__getitem__, exported_vis_lst["Vis List"]))) return exported_vis def remove_duplicates(self) -> None: @@ -137,9 +135,7 @@ def __repr__(self): filter_intents = clause if clause.aggregation != "" and clause.aggregation is not None: - attribute = ( - clause._aggregation_name.upper() + "(" + clause.attribute + ")" - ) + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -153,12 +149,9 @@ def __repr__(self): largest_mark = len(vis.mark) if ( filter_intents - and len(str(filter_intents.value)) + len(filter_intents.attribute) - > largest_filter + and len(str(filter_intents.value)) + len(filter_intents.attribute) > largest_filter ): - largest_filter = len(str(filter_intents.value)) + len( - filter_intents.attribute - ) + largest_filter = len(str(filter_intents.value)) + len(filter_intents.attribute) vis_repr = [] largest_x_length = len(x_channel) largest_y_length = len(y_channel) @@ -172,14 +165,8 @@ def __repr__(self): if clause.value != "": filter_intents = clause - if ( - clause.aggregation != "" - and clause.aggregation is not None - and vis.mark != "scatter" - ): - attribute = ( - clause._aggregation_name.upper() + "(" + clause.attribute + ")" - ) + if clause.aggregation != "" and clause.aggregation is not None and vis.mark != "scatter": + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -294,9 +281,7 @@ def _repr_html_(self): import luxwidget recJSON = LuxDataFrame.rec_to_JSON([recommendation]) - self._widget = luxwidget.LuxWidget( - currentVis={}, recommendations=recJSON, intent="", message="" - ) + self._widget = luxwidget.LuxWidget(currentVis={}, recommendations=recJSON, intent="", message="") display(self._widget) def refresh_source(self, ldf): @@ -340,7 +325,5 @@ def refresh_source(self, ldf): else: self._inferred_intent = Parser.parse(self._intent) Validator.validate_intent(self._inferred_intent, ldf) - self._collection = Compiler.compile_intent( - ldf, self._inferred_intent - ) + self._collection = Compiler.compile_intent(ldf, self._inferred_intent) ldf.executor.execute(self._collection, ldf) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index 09a01013..f0ccb869 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -50,9 +50,7 @@ def add_tooltip(self): self.chart = self.chart.encode(tooltip=list(self.vis.data.columns)) def apply_default_config(self): - self.chart = self.chart.configure_title( - fontWeight=500, fontSize=13, font="Helvetica Neue" - ) + self.chart = self.chart.configure_title(fontWeight=500, fontSize=13, font="Helvetica Neue") self.chart = self.chart.configure_axis( titleFontWeight=500, titleFontSize=11, @@ -71,13 +69,15 @@ def apply_default_config(self): labelFont="Helvetica Neue", ) self.chart = self.chart.properties(width=160, height=150) - self.code += "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" + self.code += ( + "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" + ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" - self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" - self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" self.code += ( - " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" + " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" ) + self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" + self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" def encode_color(self): @@ -97,9 +97,7 @@ def encode_color(self): ) self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}',timeUnit='{timeUnit}',title='{color_attr_name}'))" else: - self.chart = self.chart.encode( - color=alt.Color(color_attr_name, type=color_attr_type) - ) + self.chart = self.chart.encode(color=alt.Color(color_attr_name, type=color_attr_type)) self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}'))\n" elif len(color_attr) > 1: raise ValueError( @@ -111,9 +109,7 @@ def add_title(self): if chart_title: self.chart = self.chart.encode().properties(title=chart_title) if self.code != "": - self.code += ( - f"chart = chart.encode().properties(title = '{chart_title}')" - ) + self.code += f"chart = chart.encode().properties(title = '{chart_title}')" def initialize_chart(self): return NotImplemented diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 2692f72e..1d10aeb0 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -93,9 +93,7 @@ def create_vis(self, vis, standalone=True): import inspect if vis.plot_config: - chart.code += "\n".join( - inspect.getsource(vis.plot_config).split("\n ")[1:-1] - ) + chart.code += "\n".join(inspect.getsource(vis.plot_config).split("\n ")[1:-1]) chart.code += "\nchart" chart.code = chart.code.replace("\n\t\t", "\n") @@ -107,15 +105,11 @@ def create_vis(self, vis, standalone=True): if local_vars: callers_local_vars = local_vars.f_locals.items() possible_vars = [ - var_name - for var_name, var_val in callers_local_vars - if var_val is var + var_name for var_name, var_val in callers_local_vars if var_val is var ] all_vars.extend(possible_vars) found_variable = [ - possible_var - for possible_var in all_vars - if possible_var[0] != "_" + possible_var for possible_var in all_vars if possible_var[0] != "_" ][0] else: # if vis._source was not set when the Vis was created found_variable = "df" diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 5b7ecb57..0550e590 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -49,11 +49,11 @@ def initialize_chart(self): type=y_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - x_attr_field = alt.X( - x_attr.attribute, type=x_attr.data_type, title=agg_title - ) + x_attr_field = alt.X(x_attr.attribute, type=x_attr.data_type, title=agg_title) y_attr_field_code = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', axis=alt.Axis(labelOverlap=True))" - x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', title='{agg_title}')" + x_attr_field_code = ( + f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', title='{agg_title}')" + ) if y_attr.sort == "ascending": y_attr_field.sort = "-x" @@ -67,11 +67,11 @@ def initialize_chart(self): type=x_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - y_attr_field = alt.Y( - y_attr.attribute, type=y_attr.data_type, title=agg_title - ) + y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))" - y_attr_field_code = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" + y_attr_field_code = ( + f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" + ) if x_attr.sort == "ascending": x_attr_field.sort = "-y" x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True),sort='-y')" @@ -126,6 +126,4 @@ def encode_color(self): self.add_text() # Setting tooltip as non-null self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding")) - self.code += ( - f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" - ) + self.code += f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" diff --git a/lux/vislib/altair/Histogram.py b/lux/vislib/altair/Histogram.py index b9d1da4a..fdcaaabc 100644 --- a/lux/vislib/altair/Histogram.py +++ b/lux/vislib/altair/Histogram.py @@ -41,10 +41,7 @@ def initialize_chart(self): x_min = self.vis.min_max[msr_attr.attribute][0] x_max = self.vis.min_max[msr_attr.attribute][1] - x_range = abs( - max(self.vis.data[msr_attr.attribute]) - - min(self.vis.data[msr_attr.attribute]) - ) + x_range = abs(max(self.vis.data[msr_attr.attribute]) - min(self.vis.data[msr_attr.attribute])) plot_range = abs(x_max - x_min) markbar = x_range / plot_range * 12 diff --git a/lux/vislib/altair/LineChart.py b/lux/vislib/altair/LineChart.py index 1e01eabf..002beefb 100644 --- a/lux/vislib/altair/LineChart.py +++ b/lux/vislib/altair/LineChart.py @@ -48,23 +48,19 @@ def initialize_chart(self): if y_attr.data_model == "measure": agg_title = get_agg_title(y_attr) x_attr_spec = alt.X(x_attr.attribute, type=x_attr.data_type) - y_attr_spec = alt.Y( - y_attr.attribute, type=y_attr.data_type, title=agg_title - ) - x_attr_field_code = ( - f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}')" + y_attr_spec = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) + x_attr_field_code = f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}')" + y_attr_fieldCode = ( + f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" ) - y_attr_fieldCode = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" else: agg_title = get_agg_title(x_attr) - x_attr_spec = alt.X( - x_attr.attribute, type=x_attr.data_type, title=agg_title - ) + x_attr_spec = alt.X(x_attr.attribute, type=x_attr.data_type, title=agg_title) y_attr_spec = alt.Y(y_attr.attribute, type=y_attr.data_type) - x_attr_field_code = f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}', title='{agg_title}')" - y_attr_fieldCode = ( - f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}')" + x_attr_field_code = ( + f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}', title='{agg_title}')" ) + y_attr_fieldCode = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}')" chart = alt.Chart(self.data).mark_line().encode(x=x_attr_spec, y=y_attr_spec) chart = chart.interactive() # Enable Zooming and Panning diff --git a/tests/test_action.py b/tests/test_action.py index 3b3097ad..5775c614 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,9 +20,7 @@ def test_vary_filter_val(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) @@ -82,9 +80,7 @@ def test_row_column_group(): tseries = df.pivot(index="State", columns="Date", values="Value") # Interpolating missing values tseries[tseries.columns.min()] = tseries[tseries.columns.min()].fillna(0) - tseries[tseries.columns.max()] = tseries[tseries.columns.max()].fillna( - tseries.max(axis=1) - ) + tseries[tseries.columns.max()] = tseries[tseries.columns.max()].fillna(tseries.max(axis=1)) tseries = tseries.interpolate("zero", axis=1) tseries._repr_html_() assert list(tseries.recommendation.keys()) == ["Row Groups", "Column Groups"] @@ -183,8 +179,7 @@ def test_year_filter_value(): lambda vis: len( list( filter( - lambda clause: clause.value != "" - and clause.attribute == "Year", + lambda clause: clause.value != "" and clause.attribute == "Year", vis._intent, ) ) diff --git a/tests/test_compiler.py b/tests/test_compiler.py index 760c742c..037b7534 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -130,9 +130,7 @@ def test_sort_bar(): df = pd.read_csv("lux/data/car.csv") vis = Vis( [ - lux.Clause( - attribute="Acceleration", data_model="measure", data_type="quantitative" - ), + lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Clause(attribute="Origin", data_model="dimension", data_type="nominal"), ], df, @@ -143,9 +141,7 @@ def test_sort_bar(): df = pd.read_csv("lux/data/car.csv") vis = Vis( [ - lux.Clause( - attribute="Acceleration", data_model="measure", data_type="quantitative" - ), + lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Clause(attribute="Name", data_model="dimension", data_type="nominal"), ], df, diff --git a/tests/test_dates.py b/tests/test_dates.py index 4b87f7a6..8a5cc823 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -52,9 +52,7 @@ def test_period_selection(): PandasExecutor.execute(ldf.current_vis, ldf) - assert all( - [type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis] - ) + assert all([type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis]) assert all(ldf.current_vis[2].data.columns == ["Year", "Acceleration"]) @@ -64,16 +62,12 @@ def test_period_filter(): ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq="A") - ldf.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + ldf.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(ldf.current_vis, ldf) ldf._repr_html_() - assert isinstance( - ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period - ) + assert isinstance(ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period) def test_period_to_altair(): @@ -83,9 +77,7 @@ def test_period_to_altair(): df["Year"] = pd.DatetimeIndex(df["Year"]).to_period(freq="A") - df.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(df.current_vis, df) df._repr_html_() @@ -102,9 +94,7 @@ def test_refresh_inplace(): "value": [10.5, 15.2, 20.3, 25.2], } ) - with pytest.warns( - UserWarning, match="Lux detects that the attribute 'date' may be temporal." - ): + with pytest.warns(UserWarning, match="Lux detects that the attribute 'date' may be temporal."): df._repr_html_() assert df.data_type_lookup["date"] == "temporal" diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index d5fe49ff..a34b349f 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -54,9 +54,7 @@ def test_multi_vis(): SyntaxError, match="The intent that you specified corresponds to more than one visualization.", ): - Vis( - ["SATAverage", "AverageCost", "Region=New England|Southeast"], df - )._repr_html_() + Vis(["SATAverage", "AverageCost", "Region=New England|Southeast"], df)._repr_html_() # Test Properties with Private Variables Readable but not Writable diff --git a/tests/test_executor.py b/tests/test_executor.py index d1a18a6b..d4a05d01 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -89,10 +89,7 @@ def test_colored_bar_chart(): group_by_cardinality = len(df.unique_values["Origin"]) assert len(new_vis.data.columns) == 3 assert ( - len(new_vis.data) - == 15 - > group_by_cardinality - < color_cardinality * group_by_cardinality + len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values @@ -115,12 +112,7 @@ def test_colored_line_chart(): group_by_cardinality = len(df.unique_values["Year"]) assert len(new_vis.data.columns) == 3 # Not color_cardinality*group_by_cardinality since some combinations have 0 values - assert ( - len(new_vis.data) - == 60 - > group_by_cardinality - < color_cardinality * group_by_cardinality - ) + assert len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality def test_filter(): @@ -188,23 +180,12 @@ def test_filter_aggregation_fillzero_aligned(): ] vis = Vis(intent, df) result = vis.data - externalValidation = ( - df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"] - ) + externalValidation = df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"] assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0 assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0 - assert ( - result[result["Cylinders"] == 3]["MilesPerGal"].values[0] - == externalValidation[3] - ) - assert ( - result[result["Cylinders"] == 4]["MilesPerGal"].values[0] - == externalValidation[4] - ) - assert ( - result[result["Cylinders"] == 6]["MilesPerGal"].values[0] - == externalValidation[6] - ) + assert result[result["Cylinders"] == 3]["MilesPerGal"].values[0] == externalValidation[3] + assert result[result["Cylinders"] == 4]["MilesPerGal"].values[0] == externalValidation[4] + assert result[result["Cylinders"] == 6]["MilesPerGal"].values[0] == externalValidation[6] def test_exclude_attribute(): diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index a42766ce..d62b4b40 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -85,14 +85,12 @@ def test_interestingness_0_1_0(): for f in range(0, len(df.recommendation["Enhance"])): if ( df.recommendation["Enhance"][f].mark == "scatter" - and df.recommendation["Enhance"][f]._inferred_intent[1].attribute - == "Weight" + and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Weight" ): rank1 = f if ( df.recommendation["Enhance"][f].mark == "scatter" - and df.recommendation["Enhance"][f]._inferred_intent[1].attribute - == "Acceleration" + and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Acceleration" ): rank2 = f if ( @@ -181,20 +179,17 @@ def test_interestingness_1_1_1(): for f in range(0, len(df.recommendation["Enhance"])): if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Cylinders" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Cylinders" ): rank1 = f if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Weight" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Weight" ): rank2 = f if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Horsepower" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Horsepower" ): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -224,9 +219,7 @@ def test_interestingness_0_2_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - df.set_intent( - [lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration")] - ) + df.set_intent([lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration")]) df._repr_html_() # check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None @@ -235,14 +228,12 @@ def test_interestingness_0_2_0(): rank3 = -1 for f in range(0, len(df.recommendation["Enhance"])): if ( - str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) - == "Origin" + str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Origin" and str(df.recommendation["Enhance"][f].mark) == "scatter" ): rank1 = f if ( - str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) - == "Displacement" + str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Displacement" and str(df.recommendation["Enhance"][f].mark) == "scatter" ): rank2 = f diff --git a/tests/test_maintainence.py b/tests/test_maintainence.py index 35f4ec71..1c2137ca 100644 --- a/tests/test_maintainence.py +++ b/tests/test_maintainence.py @@ -40,9 +40,7 @@ def test_metadata_inplace_operation(): df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" df.dropna(inplace=True) - assert ( - df._metadata_fresh == False - ), "Failed to expire metadata after in-place Pandas operation" + assert df._metadata_fresh == False, "Failed to expire metadata after in-place Pandas operation" def test_metadata_new_df_operation(): @@ -64,9 +62,7 @@ def test_metadata_column_group_reset_df(): assert not hasattr(result, "_metadata_fresh") # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) result._repr_html_() - assert ( - result._metadata_fresh == True - ), "Failed to maintain metadata after display df" + assert result._metadata_fresh == True, "Failed to maintain metadata after display df" colgroup_recs = result.recommendation["Column Groups"] assert len(colgroup_recs) == 5 @@ -81,9 +77,7 @@ def test_recs_inplace_operation(): assert len(df.recommendation["Occurrence"]) == 4 df.drop(columns=["Name"], inplace=True) assert "Name" not in df.columns, "Failed to perform `drop` operation in-place" - assert ( - df._recs_fresh == False - ), "Failed to maintain recommendation after in-place Pandas operation" + assert df._recs_fresh == False, "Failed to maintain recommendation after in-place Pandas operation" df._repr_html_() assert len(df.recommendation["Occurrence"]) == 3 assert df._recs_fresh == True, "Failed to maintain recommendation after display df" diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index ad5008de..d5ebfeb3 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -126,9 +126,7 @@ def test_concat(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - new_df = pd.concat( - [df.loc[:, "Name":"Cylinders"], df.loc[:, "Year":"Origin"]], axis="columns" - ) + new_df = pd.concat([df.loc[:, "Name":"Cylinders"], df.loc[:, "Year":"Origin"]], axis="columns") new_df._repr_html_() assert list(new_df.recommendation.keys()) == [ "Distribution", @@ -156,9 +154,7 @@ def test_qcut(): def test_cut(): df = pd.read_csv("lux/data/car.csv") - df["Weight"] = pd.cut( - df["Weight"], bins=[0, 2500, 7500, 10000], labels=["small", "medium", "large"] - ) + df["Weight"] = pd.cut(df["Weight"], bins=[0, 2500, 7500, 10000], labels=["small", "medium", "large"]) df._repr_html_() @@ -371,9 +367,7 @@ def test_loc(): assert len(new_df.cardinality) == 2 import numpy as np - inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg( - np.mean - ) + inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.loc["chevrolet":"fiat", "Acceleration":"Weight"] new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Column Groups"] @@ -402,9 +396,7 @@ def test_iloc(): assert len(new_df.cardinality) == 2 import numpy as np - inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg( - np.mean - ) + inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.iloc[5:10, 0:2] new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Column Groups"] @@ -486,9 +478,7 @@ def test_df_to_series(): df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Weight"] - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." df["Weight"]._metadata assert df["Weight"]._metadata == [ "_intent", @@ -509,12 +499,8 @@ def test_df_to_series(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Weight" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Weight", "Pandas Series original `name` property not retained." def test_value_counts(): @@ -523,9 +509,7 @@ def test_value_counts(): assert df.cardinality is not None series = df["Weight"] series.value_counts() - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Weight"]._metadata == [ "_intent", "data_type_lookup", @@ -545,12 +529,8 @@ def test_value_counts(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Weight" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Weight", "Pandas Series original `name` property not retained." def test_str_replace(): @@ -558,9 +538,7 @@ def test_str_replace(): df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Brand"].str.replace("chevrolet", "chevy") - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Brand"]._metadata == [ "_intent", "data_type_lookup", @@ -580,9 +558,5 @@ def test_str_replace(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Brand" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Brand", "Pandas Series original `name` property not retained." diff --git a/tests/test_type.py b/tests/test_type.py index f71766c0..a531fe3a 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -45,9 +45,7 @@ def test_check_int_id(): def test_check_str_id(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/churn.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/churn.csv?raw=true") df._repr_html_() assert ( "customerID is not visualized since it resembles an ID field." @@ -56,9 +54,9 @@ def test_check_str_id(): def test_check_hpi(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true" - ).head(10) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true").head( + 10 + ) df.maintain_metadata() @@ -80,9 +78,7 @@ def test_check_hpi(): def test_check_airbnb(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/airbnb_nyc.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/airbnb_nyc.csv?raw=true") df.maintain_metadata() assert df.data_type_lookup == { "id": "id", diff --git a/tests/test_vis.py b/tests/test_vis.py index ff3b6f63..bf1879fd 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -20,9 +20,7 @@ def test_vis(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 @@ -30,9 +28,7 @@ def test_vis(): def test_vis_set_specs(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) @@ -40,14 +36,10 @@ def test_vis_set_specs(): def test_vis_collection(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vlist = VisList(["Height", "SportType=Ball", "?"], df) - vis_with_year = list( - filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist) - )[0] + vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute assert len(vlist) == len(df.columns) - 1 - 1 @@ -56,9 +48,7 @@ def test_vis_collection(): def test_vis_collection_set_intent(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) @@ -100,17 +90,13 @@ def test_remove_identity(): vis = Vis(["Horsepower", "Horsepower"], df) vis.remove_column_from_spec("Horsepower", remove_first=True) assert len(vis._inferred_intent) == 1, "Remove only 1 instances of Horsepower" - assert ( - vis._inferred_intent[0].attribute == "Horsepower" - ), "Remove only 1 instances of Horsepower" + assert vis._inferred_intent[0].attribute == "Horsepower", "Remove only 1 instances of Horsepower" def test_refresh_collection(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - df.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) df._repr_html_() enhanceCollection = df.recommendation["Enhance"] enhanceCollection.refresh_source(df[df["Origin"] == "USA"]) @@ -136,9 +122,7 @@ def test_vis_custom_aggregation_as_numpy_func(): def test_vis_collection_via_list_of_vis(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -177,15 +161,12 @@ def test_vis_to_Altair_standalone(): assert ( "chart = alt.Chart(pd.DataFrame({'Weight': {0: 3504, 1: 3693, 2: 3436, 3: 3433, 4: 3449, 5: 43" in code - or "alt.Chart(pd.DataFrame({'Horsepower': {0: 130, 1: 165, 2: 150, 3: 150, 4: 140," - in code + or "alt.Chart(pd.DataFrame({'Horsepower': {0: 130, 1: 165, 2: 150, 3: 150, 4: 140," in code ) def test_vis_list_custom_title_override(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) df["Year"] = pd.to_datetime(df["Year"], format="%Y") From d43dab9e4b9d8abb9bb1091f91dac3fb1e90dc9b Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:04:01 -0800 Subject: [PATCH 14/39] executor --- lux/executor/PandasExecutor.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 6c5c0da2..144ad3a7 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -247,9 +247,8 @@ def execute_binning(vis: Vis): bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0] if not np.isnan(vis.data[bin_attribute.attribute]).all(): - series = vis.data[ - bin_attribute.attribute - ].dropna() # np.histogram breaks if array contain NaN + # np.histogram breaks if array contain NaN + series = vis.data[bin_attribute.attribute].dropna() # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong. counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size) # bin_edges of size N+1, so need to compute bin_center as the bin location @@ -319,13 +318,8 @@ def execute_2D_binning(vis: Vis): x_attr = vis.get_attr_by_channel("x")[0].attribute y_attr = vis.get_attr_by_channel("y")[0].attribute -<<<<<<< HEAD - vis._vis_data.loc[:, "xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=40) - vis._vis_data.loc[:, "yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=40) -======= vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr], bins=40) vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr], bins=40) ->>>>>>> af0043a3619eac15e962a4270f86f47affa5f126 color_attr = vis.get_attr_by_channel("color") if len(color_attr) > 0: @@ -352,19 +346,11 @@ def execute_2D_binning(vis: Vis): result = result[result["count"] != 0] # convert type to facilitate weighted correlation interestingess calculation -<<<<<<< HEAD - result.loc[:, "xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") - result.loc[:, "xBinEnd"] = result["xBin"].apply(lambda x: x.right) - - result.loc[:, "yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") - result.loc[:, "yBinEnd"] = result["yBin"].apply(lambda x: x.right) -======= result["xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") result["xBinEnd"] = result["xBin"].apply(lambda x: x.right) result["yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") result["yBinEnd"] = result["yBin"].apply(lambda x: x.right) ->>>>>>> af0043a3619eac15e962a4270f86f47affa5f126 vis._vis_data = result.drop(columns=["xBin", "yBin"]) @@ -407,9 +393,8 @@ def compute_data_type(self, ldf: LuxDataFrame): ldf.data_type_lookup[attr] = "id" else: ldf.data_type_lookup[attr] = "nominal" - elif is_datetime_series( - ldf.dtypes[attr] - ): # check if attribute is any type of datetime dtype + # check if attribute is any type of datetime dtype + elif is_datetime_series(ldf.dtypes[attr]): ldf.data_type_lookup[attr] = "temporal" else: ldf.data_type_lookup[attr] = "nominal" From 104c365ab3351be821638e9ada71a9073ea48f6a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:11:41 -0800 Subject: [PATCH 15/39] interestingness --- lux/interestingness/interestingness.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py index f70a658b..bc6fcbb3 100644 --- a/lux/interestingness/interestingness.py +++ b/lux/interestingness/interestingness.py @@ -213,9 +213,8 @@ def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_ import copy unfiltered_vis = copy.copy(vis) - unfiltered_vis._inferred_intent = utils.get_attrs_specs( - vis._inferred_intent - ) # Remove filters, keep only attribute intent + # Remove filters, keep only attribute intent + unfiltered_vis._inferred_intent = utils.get_attrs_specs(vis._inferred_intent) ldf.executor.execute([unfiltered_vis], ldf) v = unfiltered_vis.data[msr_attribute] From 41306c3d59ccae110e2f8dbae0e440380bf0d3f3 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:30:07 -0800 Subject: [PATCH 16/39] processor --- lux/processor/Compiler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index aa02af6d..d5558f34 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -378,8 +378,9 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): # remove the specified channel from auto_channel (matching by value, since channel key may not be same) for i in list(auto_channel.keys()): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) - if (auto_channel[i].attribute == sAttr[0].attribute) and ( - auto_channel[i].channel == sVal + if ( + auto_channel[i].attribute == sAttr[0].attribute + and auto_channel[i].channel == sVal ): auto_channel.pop(i) break From a466a08ca0118a464020f66c0f37c63a0a69b172 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:44:35 -0800 Subject: [PATCH 17/39] vislib --- lux/vislib/altair/AltairRenderer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 1d10aeb0..110ea0c8 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -119,7 +119,6 @@ def create_vis(self, vis, standalone=True): f"pd.DataFrame({str(vis.data.to_dict())})", ) else: - chart.code = chart.code.replace( - "placeholder_variable", found_variable - ) # TODO: Placeholder (need to read dynamically via locals()) + # TODO: Placeholder (need to read dynamically via locals()) + chart.code = chart.code.replace("placeholder_variable", found_variable) return chart.code From a702ab1ad74e01b8016e4d5e426fc2d73c0cacf1 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 02:28:44 -0800 Subject: [PATCH 18/39] tests, travis, CONTRIBUTING --- .travis.yml | 2 +- CONTRIBUTING.md | 2 +- tests/test_executor.py | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98bde1cf..6dfca243 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ install: - pip install git+https://github.com/lux-org/lux-widget # command to run tests script: - - black --target-version py37 --check . + - black --target-version py37 --line-length 105 --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac05767b..a241410a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,7 +46,7 @@ python -m pytest tests/*.py ``` # Code Formatting -In order to keep our codebase clean and readible, we are using PEP8 guidelines. To help us maintain and check code style, we are using [black](https://github.com/psf/black). Simply run `black .` before commiting. Failure to do so may fail the tests run on Travis. This package should have been installed for you. +In order to keep our codebase clean and readible, we are using PEP8 guidelines. To help us maintain and check code style, we are using [black](https://github.com/psf/black). Simply run `black --line-length 105 .` before commiting. Failure to do so may fail the tests run on Travis. This package should have been installed for you. # Submitting a Pull Request diff --git a/tests/test_executor.py b/tests/test_executor.py index d4a05d01..268243f0 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -88,9 +88,8 @@ def test_colored_bar_chart(): color_cardinality = len(df.unique_values["Cylinders"]) group_by_cardinality = len(df.unique_values["Origin"]) assert len(new_vis.data.columns) == 3 - assert ( - len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values + # Not color_cardinality*group_by_cardinality since some combinations have 0 values + assert len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality def test_colored_line_chart(): From eccb8e4d05256792cdd392f2875c8bbb1a3995de Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:02:51 -0800 Subject: [PATCH 19/39] .format () changed --- lux/core/frame.py | 12 ++++----- lux/executor/SQLExecutor.py | 49 ++++++------------------------------- 2 files changed, 14 insertions(+), 47 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 080c0294..47748a77 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -364,8 +364,8 @@ def get_SQL_attributes(self): table_name = self.table_name[self.table_name.index(".") + 1 :] else: table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( - table_name + attr_query = ( + f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" ) attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) for attr in attributes: @@ -375,7 +375,7 @@ def get_SQL_cardinality(self): cardinality = {} for attr in list(self.columns): card_query = pd.read_sql( - "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + f"SELECT Count(Distinct({attr})) FROM {self.table_name}", self.SQLconnection, ) cardinality[attr] = list(card_query["count"])[0] @@ -385,7 +385,7 @@ def get_SQL_unique_values(self): unique_vals = {} for attr in list(self.columns): unique_query = pd.read_sql( - "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + f"SELECT Distinct({attr}) FROM {self.table_name}", self.SQLconnection, ) unique_vals[attr] = list(unique_query[attr]) @@ -401,8 +401,8 @@ def compute_SQL_data_type(self): table_name = self.table_name # get the data types of the attributes in the SQL table for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( - table_name, attr + datatype_query = ( + f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'", ) datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype diff --git a/lux/executor/SQLExecutor.py b/lux/executor/SQLExecutor.py index c3978975..05c608d1 100644 --- a/lux/executor/SQLExecutor.py +++ b/lux/executor/SQLExecutor.py @@ -60,18 +60,14 @@ def execute(vislist: VisList, ldf: LuxDataFrame): required_variables = ",".join(required_variables) row_count = list( pd.read_sql( - "SELECT COUNT(*) FROM {} {}".format(ldf.table_name, where_clause), + f"SELECT COUNT(*) FROM {ldf.table_name} {where_clause}", ldf.SQLconnection, )["count"] )[0] if row_count > 10000: - query = "SELECT {} FROM {} {} ORDER BY random() LIMIT 10000".format( - required_variables, ldf.table_name, where_clause - ) + query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause} ORDER BY random() LIMIT 10000" else: - query = "SELECT {} FROM {} {}".format( - required_variables, ldf.table_name, where_clause - ) + query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause}" data = pd.read_sql(query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(data) if vis.mark == "bar" or vis.mark == "line": @@ -100,13 +96,7 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): # barchart case, need count data for each group if measure_attr.attribute == "Record": where_clause, filterVars = SQLExecutor.execute_filter(vis) - count_query = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - groupby_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + count_query = f"SELECT {groupby_attr.attribute}, COUNT({groupby_attr.attribute}) FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(count_query, ldf.SQLconnection) vis._vis_data = vis.data.rename(columns={"count": "Record"}) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -114,36 +104,15 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): else: where_clause, filterVars = SQLExecutor.execute_filter(vis) if agg_func == "mean": - mean_query = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, AVG({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "sum": - mean_query = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, SUM({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "max": - mean_query = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, MAX({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -181,9 +150,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): upper_edges.append(str(curr_edge)) upper_edges = ",".join(upper_edges) vis_filter, filter_vars = SQLExecutor.execute_filter(vis) - bin_count_query = "SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({}, '{}') FROM {}) as Buckets GROUP BY width_bucket ORDER BY width_bucket".format( - bin_attribute.attribute, "{" + upper_edges + "}", ldf.table_name - ) + bin_count_query = f"SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({bin_attribute.attribute}, '{{{upper_edges}}}') FROM {ldf.table_name}) as Buckets GROUP BY width_bucket ORDER BY width_bucket" bin_count_data = pd.read_sql(bin_count_query, ldf.SQLconnection) # counts,binEdges = np.histogram(ldf[bin_attribute.attribute],bins=bin_attribute.bin_size) From 15963436414682b0638879743398c46d15f38a6e Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:10:49 -0800 Subject: [PATCH 20/39] replace tabs with escape chars --- lux/vislib/altair/AltairChart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index f0ccb869..a069efeb 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -74,10 +74,10 @@ def apply_default_config(self): ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" self.code += ( - " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" + "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" ) self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" - self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" + self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" def encode_color(self): From 8c3b2c16d8c5e2b055a6da4444592bb24007b24f Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:26:06 -0800 Subject: [PATCH 21/39] update using black --- lux/vislib/altair/AltairChart.py | 4 +--- tests/test_type.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index a069efeb..de4830f7 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -73,9 +73,7 @@ def apply_default_config(self): "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" - self.code += ( - "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" - ) + self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" diff --git a/tests/test_type.py b/tests/test_type.py index 1937b26f..aa1b3b53 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -127,9 +127,7 @@ def test_check_datetime(): def test_check_stock(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true") df.maintain_metadata() assert df.data_type_lookup == { "symbol": "nominal", From b468b07f4cd95fdac37199c7df59cdb08ff0c482 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Mon, 16 Nov 2020 21:00:30 +0800 Subject: [PATCH 22/39] more rewrites and merges into single line --- lux/core/frame.py | 12 ++++-------- lux/executor/PandasExecutor.py | 11 +++++------ lux/vislib/altair/BarChart.py | 2 +- tests/test_action.py | 3 +-- tests/test_performance.py | 3 +-- tests/test_vis.py | 15 +++++---------- 6 files changed, 17 insertions(+), 29 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 47748a77..3c6b3977 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -364,10 +364,8 @@ def get_SQL_attributes(self): table_name = self.table_name[self.table_name.index(".") + 1 :] else: table_name = self.table_name - attr_query = ( - f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" - ) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" + attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"]) for attr in attributes: self[attr] = None @@ -401,10 +399,8 @@ def compute_SQL_data_type(self): table_name = self.table_name # get the data types of the attributes in the SQL table for attr in list(self.columns): - datatype_query = ( - f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'", - ) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] + query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'" + datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype data_type = {"quantitative": [], "nominal": [], "temporal": []} diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 6a41379d..a73e607b 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -246,18 +246,17 @@ def execute_binning(vis: Vis): import numpy as np bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0] - if not np.isnan(vis.data[bin_attribute.attribute]).all(): + bin_attr = bin_attribute.attribute + if not np.isnan(vis.data[bin_attr]).all(): # np.histogram breaks if array contain NaN - series = vis.data[bin_attribute.attribute].dropna() + series = vis.data[bin_attr].dropna() # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong. counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size) # bin_edges of size N+1, so need to compute bin_center as the bin location bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]), axis=0) # TODO: Should vis.data be a LuxDataFrame or a Pandas DataFrame? - vis._vis_data = pd.DataFrame( - np.array([bin_center, counts]).T, - columns=[bin_attribute.attribute, "Number of Records"], - ) + binned_result = np.array([bin_center, counts]).T + vis._vis_data = pd.DataFrame(binned_result, columns=[bin_attr, "Number of Records"]) @staticmethod def execute_filter(vis: Vis): diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 0550e590..99e9b1fd 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -67,8 +67,8 @@ def initialize_chart(self): type=x_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))" + y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) y_attr_field_code = ( f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" ) diff --git a/tests/test_action.py b/tests/test_action.py index 5775c614..44337181 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,8 +20,7 @@ def test_vary_filter_val(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) df._repr_html_() diff --git a/tests/test_performance.py b/tests/test_performance.py index a30b4cd2..66a9bd6b 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -20,8 +20,7 @@ # To run the script and see the printed result, run: # python -m pytest -s tests/test_performance.py def test_q1_performance_census(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true") tic = time.perf_counter() df._repr_html_() toc = time.perf_counter() diff --git a/tests/test_vis.py b/tests/test_vis.py index bf1879fd..122c1e3c 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -20,24 +20,21 @@ def test_vis(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count" def test_vis_set_specs(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice" def test_vis_collection(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" @@ -48,8 +45,7 @@ def test_vis_collection(): def test_vis_collection_set_intent(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: @@ -166,8 +162,7 @@ def test_vis_to_Altair_standalone(): def test_vis_list_custom_title_override(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") df["Year"] = pd.to_datetime(df["Year"], format="%Y") vcLst = [] From 11dedf7069953606fc5dbbe44e6418e35c4a7157 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 28 Nov 2020 12:15:34 -0800 Subject: [PATCH 23/39] update pyproject.toml and makefile --- Makefile | 1 + lux/action/filter.py | 4 ---- lux/core/frame.py | 4 ---- lux/processor/Compiler.py | 17 ----------------- lux/processor/Validator.py | 14 -------------- lux/vis/Vis.py | 6 ------ lux/vislib/altair/AltairRenderer.py | 5 ----- pyproject.toml | 2 ++ 8 files changed, 3 insertions(+), 50 deletions(-) create mode 100644 pyproject.toml diff --git a/Makefile b/Makefile index d7ee4e14..b3cb1f08 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ init: pip install -r requirements.txt test: + black --check . python -m pytest tests/ .PHONY: init test \ No newline at end of file diff --git a/lux/action/filter.py b/lux/action/filter.py index d1cf46b7..dde432fc 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -108,11 +108,7 @@ def get_complementary_ops(fltr_op): unique_values = ldf.unique_values[cat] for val in unique_values: new_spec = column_spec.copy() -<<<<<<< HEAD - new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i]) -======= new_filter = lux.Clause(attribute=cat, filter_op="=", value=val) ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) diff --git a/lux/core/frame.py b/lux/core/frame.py index 0770e17f..1abb03ba 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -603,11 +603,7 @@ def set_intent_on_click(self, change): from lux.processor.Compiler import Compiler intent_action = list(self._widget.selectedIntentIndex.keys())[0] -<<<<<<< HEAD - vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] -======= vis = self._recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 self.set_intent_as_vis(vis) self.maintain_metadata() diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index ddbe7654..b07f52ce 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -53,20 +53,11 @@ def compile_vis(ldf: LuxDataFrame, vis: Vis) -> Vis: """ if vis: # autofill data type/model information -<<<<<<< HEAD - vis_collection = Compiler.populate_data_type_model(ldf, [vis]) - # remove invalid visualizations from collection - vis_collection = Compiler.remove_all_invalid(vis_collection) - for vis in vis_collection: - # autofill viz related information - Compiler.determine_encoding(ldf, vis) -======= Compiler.populate_data_type_model(ldf, [vis]) # remove invalid visualizations from collection Compiler.remove_all_invalid([vis]) # autofill viz related information Compiler.determine_encoding(ldf, vis) ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 ldf._compiled = True return vis @@ -93,11 +84,7 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList if _inferred_intent: vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf) # autofill data type/model information -<<<<<<< HEAD - vis_collection = Compiler.populate_data_type_model(ldf, vis_collection) -======= Compiler.populate_data_type_model(ldf, vis_collection) ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 # remove invalid visualizations from collection if len(vis_collection) >= 1: vis_collection = Compiler.remove_all_invalid(vis_collection) @@ -192,10 +179,6 @@ def populate_data_type_model(ldf, vlist): else: chart_title = clause.value vis.title = f"{clause.attribute} {clause.filter_op} {chart_title}" -<<<<<<< HEAD - return vlist -======= ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 @staticmethod def remove_all_invalid(vis_collection: VisList) -> VisList: diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index ba93b102..9be2f5de 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -70,11 +70,6 @@ def validate_clause(clause): # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation if clause.attribute and not is_datetime_string(clause.attribute): if not clause.attribute in list(ldf.columns): -<<<<<<< HEAD - warnings.warn( - f"The input attribute '{clause.attribute}' does not exist in the DataFrame." - ) -======= search_val = clause.attribute match_attr = False for attr, val_list in ldf.unique_values.items(): @@ -84,7 +79,6 @@ def validate_clause(clause): warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}." else: warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos." ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 if clause.value and clause.attribute and clause.filter_op == "=": series = ldf[clause.attribute] if not is_datetime_series(series): @@ -93,17 +87,9 @@ def validate_clause(clause): else: vals = [clause.value] for val in vals: -<<<<<<< HEAD - # (not series.str.contains(val).any()): - if val not in series.values: - warnings.warn( - f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." - ) -======= if val not in series.values: warn_msg = f"\n- The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." return warn_msg ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 warn_msg = "" for clause in intent: diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index 226056ac..3b747925 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -303,12 +303,6 @@ def refresh_source(self, ldf): # -> Vis: from lux.processor.Parser import Parser from lux.processor.Validator import Validator from lux.processor.Compiler import Compiler -<<<<<<< HEAD - from lux.executor.PandasExecutor import PandasExecutor - - # TODO: temporary (generalize to executor) -======= ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 self.check_not_vislist_intent() diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index f8937e24..701f3c0e 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -92,15 +92,10 @@ def create_vis(self, vis, standalone=True): elif self.output_type == "Altair": import inspect -<<<<<<< HEAD - if vis.plot_config: - chart.code += "\n".join(inspect.getsource(vis.plot_config).split("\n ")[1:-1]) -======= if lux.config.plot_config: chart.code += "\n".join( inspect.getsource(lux.config.plot_config).split("\n ")[1:-1] ) ->>>>>>> 8149e7222f218e100b79d114a81d27ccda129784 chart.code += "\nchart" chart.code = chart.code.replace("\n\t\t", "\n") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..f8f8a67b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 105 \ No newline at end of file From 2cef000db6fc0518d8957aabd1e7624963e7bbcc Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 22 Dec 2020 20:55:29 -0800 Subject: [PATCH 24/39] coalesce data_types into data_type_lookup --- lux/core/frame.py | 41 +++++++++++------------ lux/core/series.py | 3 -- lux/executor/Executor.py | 8 +++-- lux/executor/PandasExecutor.py | 12 ------- lux/processor/Compiler.py | 11 +++++-- lux/utils/date_utils.py | 6 ++-- tests/test_dates.py | 3 +- tests/test_pandas_coverage.py | 59 ++++++++++++++++++++-------------- tests/test_type.py | 4 ++- 9 files changed, 74 insertions(+), 73 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 7eab2ed1..1cce2baf 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -21,6 +21,7 @@ from lux.utils.message import Message from lux.utils.utils import check_import_lux_widget from typing import Dict, Union, List, Callable +from lux.executor.Executor import * import warnings import traceback import lux @@ -36,9 +37,6 @@ class LuxDataFrame(pd.DataFrame): "_intent", "_inferred_intent", "data_type_lookup", - "data_type", - "data_model_lookup", - "data_model", "unique_values", "cardinality", "_rec_info", @@ -78,9 +76,6 @@ def __init__(self, *args, **kw): self._pandas_only = False # Metadata self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None self.unique_values = None self.cardinality = None self._min_max = None @@ -127,14 +122,25 @@ def expire_metadata(self): # Set metadata as null self._metadata_fresh = False self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None self.unique_values = None self.cardinality = None self._min_max = None self.pre_aggregated = None + def compute_data_type_from_lookup(self): + return Executor.mapping(Executor, self.data_type_lookup) + + def compute_data_model(self): + data_type = self.compute_data_type_from_lookup() + data_model = { + "measure": data_type["quantitative"], + "dimension": data_type["nominal"] + data_type["temporal"] + data_type["id"], + } + return data_model + + def compute_data_model_lookup(self): + return Executor.reverseMapping(Executor, self.compute_data_model()) + ##################### ## Override Pandas ## ##################### @@ -295,14 +301,10 @@ def compute_SQL_dataset_metadata(self): for attr in list(self.columns): self[attr] = None self.data_type_lookup = {} - self.data_type = {} #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this ##### in the initialization and do it just once self.compute_SQL_data_type() self.compute_SQL_stats() - self.data_model_lookup = {} - self.data_model = {} - self.compute_data_model() def compute_SQL_stats(self): # precompute statistics @@ -362,11 +364,9 @@ def compute_SQL_data_type(self): datatype = list(pd.read_sql(query, lux.config.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype - data_type = {"quantitative": [], "nominal": [], "temporal": []} for attr in list(self.columns): if str(attr).lower() in ["month", "year"]: data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) elif sql_dtypes[attr] in [ "character", "character varying", @@ -375,7 +375,6 @@ def compute_SQL_data_type(self): "text", ]: data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) elif sql_dtypes[attr] in [ "integer", "real", @@ -385,15 +384,11 @@ def compute_SQL_data_type(self): ]: if self.cardinality[attr] < 13: data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) else: data_type_lookup[attr] = "quantitative" - data_type["quantitative"].append(attr) elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) self.data_type_lookup = data_type_lookup - self.data_type = data_type def _append_rec(self, rec_infolist, recommendations: Dict): if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: @@ -419,8 +414,9 @@ def maintain_recs(self): rec_df._message = Message() # Add warning message if there exist ID fields id_fields_str = "" - if len(rec_df.data_type["id"]) > 0: - for id_field in rec_df.data_type["id"]: + data_type = rec_df.compute_data_type_from_lookup() + if len(data_type["id"]) > 0: + for id_field in data_type["id"]: id_fields_str += f"{id_field}, " id_fields_str = id_fields_str[:-2] rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") @@ -877,7 +873,6 @@ def save_as_html(self, filename: str = "export.html") -> None: with open(filename, "w") as fp: fp.write(rendered_template) print(f"Saved HTML to {filename}") - # Overridden Pandas Functions def head(self, n: int = 5): self._prev = self diff --git a/lux/core/series.py b/lux/core/series.py index 44c05bf7..62aa1c35 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -21,9 +21,6 @@ class LuxSeries(pd.Series): _metadata = [ "_intent", "data_type_lookup", - "data_type", - "data_model_lookup", - "data_model", "unique_values", "cardinality", "_rec_info", diff --git a/lux/executor/Executor.py b/lux/executor/Executor.py index 972f6fb6..0564a61f 100644 --- a/lux/executor/Executor.py +++ b/lux/executor/Executor.py @@ -51,16 +51,18 @@ def compute_stats(self): def compute_data_type(self): return NotImplemented - @staticmethod - def compute_data_model(self): - return NotImplemented + # @staticmethod + # def compute_data_model(self): + # return NotImplemented + @staticmethod def mapping(self, rmap): group_map = {} for val in ["quantitative", "id", "nominal", "temporal"]: group_map[val] = list(filter(lambda x: rmap[x] == val, rmap)) return group_map + @staticmethod def reverseMapping(self, map): reverse_map = {} for valKey in map: diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index e0c10a90..36291798 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -360,11 +360,7 @@ def execute_2D_binning(vis: Vis): ####################################################### def compute_dataset_metadata(self, ldf: LuxDataFrame): ldf.data_type_lookup = {} - ldf.data_type = {} self.compute_data_type(ldf) - ldf.data_model_lookup = {} - ldf.data_model = {} - self.compute_data_model(ldf) def compute_data_type(self, ldf: LuxDataFrame): from pandas.api.types import is_datetime64_any_dtype as is_datetime @@ -407,7 +403,6 @@ def compute_data_type(self, ldf: LuxDataFrame): # if self.cardinality[attr]>50: if ldf.index.dtype != "int64" and ldf.index.name: ldf.data_type_lookup[ldf.index.name] = "nominal" - ldf.data_type = self.mapping(ldf.data_type_lookup) non_datetime_attrs = [] for attr in ldf.columns: @@ -455,13 +450,6 @@ def _is_datetime_string(self, series): return True return False - def compute_data_model(self, ldf: LuxDataFrame): - ldf.data_model = { - "measure": ldf.data_type["quantitative"], - "dimension": ldf.data_type["nominal"] + ldf.data_type["temporal"] + ldf.data_type["id"], - } - ldf.data_model_lookup = self.reverseMapping(ldf.data_model) - def compute_stats(self, ldf: LuxDataFrame): # precompute statistics ldf.unique_values = {} diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index b07f52ce..69b1c749 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -158,6 +158,8 @@ def populate_data_type_model(ldf, vlist): # TODO: copy might not be neccesary from lux.utils.date_utils import is_datetime_string + data_model_lookup = ldf.compute_data_model_lookup() + for vis in vlist: for clause in vis._inferred_intent: if clause.description == "?": @@ -170,7 +172,7 @@ def populate_data_type_model(ldf, vlist): if clause.data_type == "id": clause.data_type = "nominal" if clause.data_model == "": - clause.data_model = ldf.data_model_lookup[clause.attribute] + clause.data_model = data_model_lookup[clause.attribute] if clause.value != "": # If user provided title for Vis, then don't override. if vis.title == "": @@ -427,6 +429,9 @@ def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) import copy from lux.utils.utils import convert_to_list + data_type = ldf.compute_data_type_from_lookup() + data_model = ldf.compute_data_model() + intent = {"attributes": [], "filters": []} for clause in _inferred_intent: spec_options = [] @@ -434,9 +439,9 @@ def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) if clause.attribute == "?": options = set(list(ldf.columns)) # all attributes if clause.data_type != "": - options = options.intersection(set(ldf.data_type[clause.data_type])) + options = options.intersection(set(data_type[clause.data_type])) if clause.data_model != "": - options = options.intersection(set(ldf.data_model[clause.data_model])) + options = options.intersection(set(data_model[clause.data_model])) options = list(options) else: options = convert_to_list(clause.attribute) diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index d3ed03ae..0428bec7 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -38,10 +38,12 @@ def date_formatter(time_stamp, ldf): date_str: str A reformatted version of the time_stamp according to granularity """ + data_type = ldf.compute_data_type_from_lookup() + # TODO: method for data_type_lookup to data_type datetime = pd.to_datetime(time_stamp) - if ldf.data_type["temporal"]: + if data_type["temporal"]: # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future - date_column = ldf[ldf.data_type["temporal"][0]] + date_column = ldf[data_type["temporal"][0]] granularity = compute_date_granularity(date_column) date_str = "" diff --git a/tests/test_dates.py b/tests/test_dates.py index 6d4c6407..55a9a36c 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -104,7 +104,8 @@ def test_refresh_inplace(): df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d") df.maintain_metadata() - assert df.data_type["temporal"][0] == "date" + data_type = df.compute_data_type_from_lookup() + assert data_type["temporal"][0] == "date" vis.refresh_source(df) assert vis.mark == "line" diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index f5977da5..e8105a13 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -45,19 +45,28 @@ def test_rename_inplace(global_var): assert df.data_type_lookup["Name"] == new_df.data_type_lookup["Car Name"] - assert df.data_type != new_df.data_type + data_type = df.compute_data_type_from_lookup() + new_data_type = new_df.compute_data_type_from_lookup() - assert df.data_type["nominal"][0] == "Name" - assert new_df.data_type["nominal"][0] == "Car Name" + assert data_type != new_data_type - assert df.data_model_lookup != new_df.data_model_lookup + assert data_type["nominal"][0] == "Name" + assert new_data_type["nominal"][0] == "Car Name" - assert df.data_model_lookup["Name"] == new_df.data_model_lookup["Car Name"] + data_model_lookup = df.compute_data_model_lookup() + new_data_model_lookup = new_df.compute_data_model_lookup() - assert df.data_model != new_df.data_model + assert data_model_lookup != new_data_model_lookup - assert df.data_model["dimension"][0] == "Name" - assert new_df.data_model["dimension"][0] == "Car Name" + assert data_model_lookup["Name"] == new_data_model_lookup["Car Name"] + + data_model = df.compute_data_model() + new_data_model = new_df.compute_data_model() + + assert data_model != new_data_model + + assert data_model["dimension"][0] == "Name" + assert new_data_model["dimension"][0] == "Car Name" assert list(df.unique_values.values()) == list(new_df.unique_values.values()) assert list(df.cardinality.values()) == list(new_df.cardinality.values()) @@ -75,19 +84,28 @@ def test_rename(global_var): assert df.data_type_lookup["Name"] == new_df.data_type_lookup["Car Name"] - assert df.data_type != new_df.data_type + data_type = df.compute_data_type_from_lookup() + new_data_type = new_df.compute_data_type_from_lookup() + + assert data_type != new_data_type + + assert data_type["nominal"][0] == "Name" + assert new_data_type["nominal"][0] == "Car Name" + + data_model_lookup = df.compute_data_model_lookup() + new_data_model_lookup = new_df.compute_data_model_lookup() - assert df.data_type["nominal"][0] == "Name" - assert new_df.data_type["nominal"][0] == "Car Name" + assert data_model_lookup != new_data_model_lookup - assert df.data_model_lookup != new_df.data_model_lookup + assert data_model_lookup["Name"] == new_data_model_lookup["Car Name"] - assert df.data_model_lookup["Name"] == new_df.data_model_lookup["Car Name"] + data_model = df.compute_data_model() + new_data_model = new_df.compute_data_model() - assert df.data_model != new_df.data_model + assert data_model != new_data_model - assert df.data_model["dimension"][0] == "Name" - assert new_df.data_model["dimension"][0] == "Car Name" + assert data_model["dimension"][0] == "Name" + assert new_data_model["dimension"][0] == "Car Name" assert list(df.unique_values.values()) == list(new_df.unique_values.values()) assert list(df.cardinality.values()) == list(new_df.cardinality.values()) @@ -503,9 +521,6 @@ def test_df_to_series(global_var): assert df["Weight"]._metadata == [ "_intent", "data_type_lookup", - "data_type", - "data_model_lookup", - "data_model", "unique_values", "cardinality", "_rec_info", @@ -534,9 +549,6 @@ def test_value_counts(global_var): assert df["Weight"]._metadata == [ "_intent", "data_type_lookup", - "data_type", - "data_model_lookup", - "data_model", "unique_values", "cardinality", "_rec_info", @@ -564,9 +576,6 @@ def test_str_replace(global_var): assert df["Brand"]._metadata == [ "_intent", "data_type_lookup", - "data_type", - "data_model_lookup", - "data_model", "unique_values", "cardinality", "_rec_info", diff --git a/tests/test_type.py b/tests/test_type.py index 4c53656a..96363915 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -37,7 +37,9 @@ def test_check_int_id(): "https://github.com/lux-org/lux-datasets/blob/master/data/instacart_sample.csv?raw=true" ) df._repr_html_() - assert len(df.data_type["id"]) == 3 + data_type = df.compute_data_type_from_lookup() + assert len(data_type["id"]) == 3 + # assert len(df.data_type["id"]) == 3 assert ( "order_id, product_id, user_id is not visualized since it resembles an ID field." in df._message.to_html() From 11b1ae8af17c468fe8ef878d0c46158c89aafdd6 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 22 Dec 2020 21:27:24 -0800 Subject: [PATCH 25/39] black reformat --- lux/core/frame.py | 3 ++- tests/test_nan.py | 6 ++++-- tests/test_type.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 1cce2baf..e0720fbd 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -129,7 +129,7 @@ def expire_metadata(self): def compute_data_type_from_lookup(self): return Executor.mapping(Executor, self.data_type_lookup) - + def compute_data_model(self): data_type = self.compute_data_type_from_lookup() data_model = { @@ -873,6 +873,7 @@ def save_as_html(self, filename: str = "export.html") -> None: with open(filename, "w") as fp: fp.write(rendered_template) print(f"Saved HTML to {filename}") + # Overridden Pandas Functions def head(self, n: int = 5): self._prev = self diff --git a/tests/test_nan.py b/tests/test_nan.py index 96918af0..16f950eb 100644 --- a/tests/test_nan.py +++ b/tests/test_nan.py @@ -46,14 +46,16 @@ def test_nan_data_type_detection(): ] test = pd.DataFrame(dataset) test.maintain_metadata() - assert test.data_type["nominal"] == [ + data_type = test.compute_data_type_from_lookup() + assert data_type["nominal"] == [ "fully_nan", "some_nan", "some_nan2", ], "Categorical columns containing NaNs should be treated as nominal data type" nona_test = test.dropna(subset=["some_nan"]) nona_test.maintain_metadata() - assert nona_test.data_type["nominal"] == [ + data_type = nona_test.compute_data_type_from_lookup() + assert data_type["nominal"] == [ "fully_nan", "some_nan", "some_nan2", diff --git a/tests/test_type.py b/tests/test_type.py index 380920c8..b104e932 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -178,7 +178,8 @@ def test_float_categorical(): ] df = pd.DataFrame(values) df.maintain_metadata() - assert df.data_type["nominal"] == [ + data_type = df.compute_data_type_from_lookup() + assert data_type["nominal"] == [ "A", "B", "C", From 9bd4c2386a22d7f6f8490669a6d661ac8aa1a4dd Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 31 Dec 2020 19:50:46 -0800 Subject: [PATCH 26/39] changed to better variable names --- lux/action/filter.py | 4 ++-- lux/action/univariate.py | 4 +--- lux/core/frame.py | 42 ++++++++++++++++++---------------- lux/core/series.py | 2 +- lux/executor/PandasExecutor.py | 34 +++++++++++++-------------- lux/processor/Compiler.py | 6 ++--- lux/utils/date_utils.py | 6 ++--- tests/test_dates.py | 6 ++--- tests/test_nan.py | 8 +++---- tests/test_pandas_coverage.py | 42 +++++++++++++++++----------------- tests/test_performance.py | 2 +- tests/test_type.py | 37 +++++++++++++++--------------- 12 files changed, 96 insertions(+), 97 deletions(-) diff --git a/lux/action/filter.py b/lux/action/filter.py index 70d85e0e..e8833b0f 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -45,7 +45,7 @@ def filter(ldf): # get unique values for all categorical values specified and creates corresponding filters fltr = filters[0] - if ldf.data_type_lookup[fltr.attribute] == "nominal": + if ldf.data_type[fltr.attribute] == "nominal": recommendation = { "action": "Filter", "description": f"Changing the

    {fltr.attribute}

    filter to an alternative value.", @@ -60,7 +60,7 @@ def filter(ldf): new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) - elif ldf.data_type_lookup[fltr.attribute] == "quantitative": + elif ldf.data_type[fltr.attribute] == "quantitative": recommendation = { "action": "Filter", "description": f"Changing the

    {fltr.attribute}

    filter to an alternative inequality operation.", diff --git a/lux/action/univariate.py b/lux/action/univariate.py index 8f8cd1ac..030a6f03 100644 --- a/lux/action/univariate.py +++ b/lux/action/univariate.py @@ -48,9 +48,7 @@ def univariate(ldf, *args): possible_attributes = [ c for c in ldf.columns - if ldf.data_type_lookup[c] == "quantitative" - and ldf.cardinality[c] > 5 - and c != "Number of Records" + if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) diff --git a/lux/core/frame.py b/lux/core/frame.py index e0720fbd..bffd077e 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -36,7 +36,7 @@ class LuxDataFrame(pd.DataFrame): _metadata = [ "_intent", "_inferred_intent", - "data_type_lookup", + "data_type", "unique_values", "cardinality", "_rec_info", @@ -75,7 +75,7 @@ def __init__(self, *args, **kw): self._message = Message() self._pandas_only = False # Metadata - self.data_type_lookup = None + self.data_type = None self.unique_values = None self.cardinality = None self._min_max = None @@ -121,20 +121,22 @@ def expire_recs(self): def expire_metadata(self): # Set metadata as null self._metadata_fresh = False - self.data_type_lookup = None + self.data_type = None self.unique_values = None self.cardinality = None self._min_max = None self.pre_aggregated = None - def compute_data_type_from_lookup(self): - return Executor.mapping(Executor, self.data_type_lookup) + def invert_data_type(self): + return Executor.mapping(Executor, self.data_type) def compute_data_model(self): - data_type = self.compute_data_type_from_lookup() + data_type_inverted = self.invert_data_type() data_model = { - "measure": data_type["quantitative"], - "dimension": data_type["nominal"] + data_type["temporal"] + data_type["id"], + "measure": data_type_inverted["quantitative"], + "dimension": data_type_inverted["nominal"] + + data_type_inverted["temporal"] + + data_type_inverted["id"], } return data_model @@ -300,7 +302,7 @@ def compute_SQL_dataset_metadata(self): self.get_SQL_attributes() for attr in list(self.columns): self[attr] = None - self.data_type_lookup = {} + self.data_type = {} #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this ##### in the initialization and do it just once self.compute_SQL_data_type() @@ -314,7 +316,7 @@ def compute_SQL_stats(self): self.get_SQL_unique_values() # self.get_SQL_cardinality() for attribute in self.columns: - if self.data_type_lookup[attribute] == "quantitative": + if self.data_type[attribute] == "quantitative": self._min_max[attribute] = ( self[attribute].min(), self[attribute].max(), @@ -351,7 +353,7 @@ def get_SQL_unique_values(self): self.unique_values = unique_vals def compute_SQL_data_type(self): - data_type_lookup = {} + data_type = {} sql_dtypes = {} self.get_SQL_cardinality() if "." in self.table_name: @@ -366,7 +368,7 @@ def compute_SQL_data_type(self): for attr in list(self.columns): if str(attr).lower() in ["month", "year"]: - data_type_lookup[attr] = "temporal" + data_type[attr] = "temporal" elif sql_dtypes[attr] in [ "character", "character varying", @@ -374,7 +376,7 @@ def compute_SQL_data_type(self): "uuid", "text", ]: - data_type_lookup[attr] = "nominal" + data_type[attr] = "nominal" elif sql_dtypes[attr] in [ "integer", "real", @@ -383,12 +385,12 @@ def compute_SQL_data_type(self): "serial", ]: if self.cardinality[attr] < 13: - data_type_lookup[attr] = "nominal" + data_type[attr] = "nominal" else: - data_type_lookup[attr] = "quantitative" + data_type[attr] = "quantitative" elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: - data_type_lookup[attr] = "temporal" - self.data_type_lookup = data_type_lookup + data_type[attr] = "temporal" + self.data_type = data_type def _append_rec(self, rec_infolist, recommendations: Dict): if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: @@ -414,9 +416,9 @@ def maintain_recs(self): rec_df._message = Message() # Add warning message if there exist ID fields id_fields_str = "" - data_type = rec_df.compute_data_type_from_lookup() - if len(data_type["id"]) > 0: - for id_field in data_type["id"]: + inverted_data_type = rec_df.invert_data_type() + if len(inverted_data_type["id"]) > 0: + for id_field in inverted_data_type["id"]: id_fields_str += f"{id_field}, " id_fields_str = id_fields_str[:-2] rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") diff --git a/lux/core/series.py b/lux/core/series.py index 69a0cf02..aea13d0c 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -21,7 +21,7 @@ class LuxSeries(pd.Series): _metadata = [ "_intent", - "data_type_lookup", + "data_type", "unique_values", "cardinality", "_rec_info", diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 3cd7d166..07ca1b3c 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -374,7 +374,7 @@ def execute_2D_binning(vis: Vis): ############ Metadata: data type, model ############# ####################################################### def compute_dataset_metadata(self, ldf: LuxDataFrame): - ldf.data_type_lookup = {} + ldf.data_type = {} self.compute_data_type(ldf) def compute_data_type(self, ldf: LuxDataFrame): @@ -383,50 +383,50 @@ def compute_data_type(self, ldf: LuxDataFrame): for attr in list(ldf.columns): temporal_var_list = ["month", "year", "day", "date", "time"] if is_datetime(ldf[attr]): - ldf.data_type_lookup[attr] = "temporal" + ldf.data_type[attr] = "temporal" elif self._is_datetime_string(ldf[attr]): - ldf.data_type_lookup[attr] = "temporal" + ldf.data_type[attr] = "temporal" elif isinstance(attr, pd._libs.tslibs.timestamps.Timestamp): - ldf.data_type_lookup[attr] = "temporal" + ldf.data_type[attr] = "temporal" elif str(attr).lower() in temporal_var_list: - ldf.data_type_lookup[attr] = "temporal" + ldf.data_type[attr] = "temporal" elif pd.api.types.is_float_dtype(ldf.dtypes[attr]): # int columns gets coerced into floats if contain NaN convertible2int = pd.api.types.is_integer_dtype(ldf[attr].convert_dtypes()) if convertible2int and ldf.cardinality[attr] != len(ldf) and ldf.cardinality[attr] < 20: - ldf.data_type_lookup[attr] = "nominal" + ldf.data_type[attr] = "nominal" else: - ldf.data_type_lookup[attr] = "quantitative" + ldf.data_type[attr] = "quantitative" elif pd.api.types.is_integer_dtype(ldf.dtypes[attr]): # See if integer value is quantitative or nominal by checking if the ratio of cardinality/data size is less than 0.4 and if there are less than 10 unique values if ldf.pre_aggregated: if ldf.cardinality[attr] == len(ldf): - ldf.data_type_lookup[attr] = "nominal" + ldf.data_type[attr] = "nominal" if ldf.cardinality[attr] / len(ldf) < 0.4 and ldf.cardinality[attr] < 20: - ldf.data_type_lookup[attr] = "nominal" + ldf.data_type[attr] = "nominal" else: - ldf.data_type_lookup[attr] = "quantitative" + ldf.data_type[attr] = "quantitative" if check_if_id_like(ldf, attr): - ldf.data_type_lookup[attr] = "id" + ldf.data_type[attr] = "id" # Eliminate this clause because a single NaN value can cause the dtype to be object elif pd.api.types.is_string_dtype(ldf.dtypes[attr]): if check_if_id_like(ldf, attr): - ldf.data_type_lookup[attr] = "id" + ldf.data_type[attr] = "id" else: - ldf.data_type_lookup[attr] = "nominal" + ldf.data_type[attr] = "nominal" # check if attribute is any type of datetime dtype elif is_datetime_series(ldf.dtypes[attr]): - ldf.data_type_lookup[attr] = "temporal" + ldf.data_type[attr] = "temporal" else: - ldf.data_type_lookup[attr] = "nominal" + ldf.data_type[attr] = "nominal" # for attr in list(df.dtypes[df.dtypes=="int64"].keys()): # if self.cardinality[attr]>50: if ldf.index.dtype != "int64" and ldf.index.name: - ldf.data_type_lookup[ldf.index.name] = "nominal" + ldf.data_type[ldf.index.name] = "nominal" non_datetime_attrs = [] for attr in ldf.columns: - if ldf.data_type_lookup[attr] == "temporal" and not is_datetime(ldf[attr]): + if ldf.data_type[attr] == "temporal" and not is_datetime(ldf[attr]): non_datetime_attrs.append(attr) warn_msg = "" if len(non_datetime_attrs) == 1: diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index f989cbb6..40adfd79 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -169,7 +169,7 @@ def populate_data_type_model(ldf, vlist): # and not is_datetime_string(clause.attribute): if clause.attribute != "" and clause.attribute != "Record": if clause.data_type == "": - clause.data_type = ldf.data_type_lookup[clause.attribute] + clause.data_type = ldf.data_type[clause.attribute] if clause.data_type == "id": clause.data_type = "nominal" if clause.data_model == "": @@ -441,7 +441,7 @@ def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) import copy from lux.utils.utils import convert_to_list - data_type = ldf.compute_data_type_from_lookup() + inverted_data_type = ldf.invert_data_type() data_model = ldf.compute_data_model() intent = {"attributes": [], "filters": []} @@ -451,7 +451,7 @@ def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) if clause.attribute == "?": options = set(list(ldf.columns)) # all attributes if clause.data_type != "": - options = options.intersection(set(data_type[clause.data_type])) + options = options.intersection(set(inverted_data_type[clause.data_type])) if clause.data_model != "": options = options.intersection(set(data_model[clause.data_model])) options = list(options) diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index 0428bec7..c521297c 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -38,12 +38,12 @@ def date_formatter(time_stamp, ldf): date_str: str A reformatted version of the time_stamp according to granularity """ - data_type = ldf.compute_data_type_from_lookup() + inverted_data_type = ldf.invert_data_type() # TODO: method for data_type_lookup to data_type datetime = pd.to_datetime(time_stamp) - if data_type["temporal"]: + if inverted_data_type["temporal"]: # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future - date_column = ldf[data_type["temporal"][0]] + date_column = ldf[inverted_data_type["temporal"][0]] granularity = compute_date_granularity(date_column) date_str = "" diff --git a/tests/test_dates.py b/tests/test_dates.py index 55a9a36c..e92d4b8a 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -96,7 +96,7 @@ def test_refresh_inplace(): ) with pytest.warns(UserWarning, match="Lux detects that the attribute 'date' may be temporal."): df._repr_html_() - assert df.data_type_lookup["date"] == "temporal" + assert df.data_type["date"] == "temporal" from lux.vis.Vis import Vis @@ -104,8 +104,8 @@ def test_refresh_inplace(): df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d") df.maintain_metadata() - data_type = df.compute_data_type_from_lookup() - assert data_type["temporal"][0] == "date" + inverted_data_type = df.invert_data_type() + assert inverted_data_type["temporal"][0] == "date" vis.refresh_source(df) assert vis.mark == "line" diff --git a/tests/test_nan.py b/tests/test_nan.py index 16f950eb..7b64ed26 100644 --- a/tests/test_nan.py +++ b/tests/test_nan.py @@ -46,16 +46,16 @@ def test_nan_data_type_detection(): ] test = pd.DataFrame(dataset) test.maintain_metadata() - data_type = test.compute_data_type_from_lookup() - assert data_type["nominal"] == [ + inverted_data_type = test.invert_data_type() + assert inverted_data_type["nominal"] == [ "fully_nan", "some_nan", "some_nan2", ], "Categorical columns containing NaNs should be treated as nominal data type" nona_test = test.dropna(subset=["some_nan"]) nona_test.maintain_metadata() - data_type = nona_test.compute_data_type_from_lookup() - assert data_type["nominal"] == [ + inverted_data_type = nona_test.invert_data_type() + assert inverted_data_type["nominal"] == [ "fully_nan", "some_nan", "some_nan2", diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index e8105a13..d912ac9f 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -41,17 +41,17 @@ def test_rename_inplace(global_var): # new_df is the old dataframe (df) with the new column name changed inplace new_df, df = df, new_df - assert df.data_type_lookup != new_df.data_type_lookup + assert df.data_type != new_df.data_type - assert df.data_type_lookup["Name"] == new_df.data_type_lookup["Car Name"] + assert df.data_type["Name"] == new_df.data_type["Car Name"] - data_type = df.compute_data_type_from_lookup() - new_data_type = new_df.compute_data_type_from_lookup() + inverted_data_type = df.invert_data_type() + new_inverted_data_type = new_df.invert_data_type() - assert data_type != new_data_type + assert inverted_data_type != new_inverted_data_type - assert data_type["nominal"][0] == "Name" - assert new_data_type["nominal"][0] == "Car Name" + assert inverted_data_type["nominal"][0] == "Name" + assert new_inverted_data_type["nominal"][0] == "Car Name" data_model_lookup = df.compute_data_model_lookup() new_data_model_lookup = new_df.compute_data_model_lookup() @@ -80,17 +80,17 @@ def test_rename(global_var): df._repr_html_() new_df = df.rename(columns={"Name": "Car Name"}, inplace=False) new_df._repr_html_() - assert df.data_type_lookup != new_df.data_type_lookup + assert df.data_type != new_df.data_type - assert df.data_type_lookup["Name"] == new_df.data_type_lookup["Car Name"] + assert df.data_type["Name"] == new_df.data_type["Car Name"] - data_type = df.compute_data_type_from_lookup() - new_data_type = new_df.compute_data_type_from_lookup() + inverted_data_type = df.invert_data_type() + new_inverted_data_type = new_df.invert_data_type() - assert data_type != new_data_type + assert inverted_data_type != new_inverted_data_type - assert data_type["nominal"][0] == "Name" - assert new_data_type["nominal"][0] == "Car Name" + assert inverted_data_type["nominal"][0] == "Name" + assert new_inverted_data_type["nominal"][0] == "Car Name" data_model_lookup = df.compute_data_model_lookup() new_data_model_lookup = new_df.compute_data_model_lookup() @@ -323,7 +323,7 @@ def test_change_dtype(global_var): "Occurrence", "Temporal", ] - assert len(df.data_type_lookup) == 10 + assert len(df.data_type) == 10 def test_get_dummies(global_var): @@ -337,7 +337,7 @@ def test_get_dummies(global_var): "Occurrence", "Temporal", ] - assert len(new_df.data_type_lookup) == 339 + assert len(new_df.data_type) == 339 def test_drop(global_var): @@ -520,7 +520,7 @@ def test_df_to_series(global_var): df["Weight"]._metadata assert df["Weight"]._metadata == [ "_intent", - "data_type_lookup", + "data_type", "unique_values", "cardinality", "_rec_info", @@ -548,7 +548,7 @@ def test_value_counts(global_var): assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Weight"]._metadata == [ "_intent", - "data_type_lookup", + "data_type", "unique_values", "cardinality", "_rec_info", @@ -575,7 +575,7 @@ def test_str_replace(global_var): assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Brand"]._metadata == [ "_intent", - "data_type_lookup", + "data_type", "unique_values", "cardinality", "_rec_info", @@ -609,7 +609,7 @@ def test_read_json(global_var): "Occurrence", "Temporal", ] - assert len(df.data_type_lookup) == 10 + assert len(df.data_type) == 10 def test_read_sas(global_var): @@ -617,4 +617,4 @@ def test_read_sas(global_var): df = pd.read_sas(url, format="sas7bdat") df._repr_html_() assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] - assert len(df.data_type_lookup) == 6 + assert len(df.data_type) == 6 diff --git a/tests/test_performance.py b/tests/test_performance.py index 4e557075..256d45fb 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -38,7 +38,7 @@ def test_q1_performance_census(global_var): delta2 < 0.15 < delta ), "Subsequent display of recommendations on Census dataset took a total of {delta2:0.4f} seconds, longer than expected." - assert df.data_type_lookup == { + assert df.data_type == { "age": "quantitative", "workclass": "nominal", "fnlwgt": "quantitative", diff --git a/tests/test_type.py b/tests/test_type.py index b104e932..acd651ca 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -21,15 +21,15 @@ def test_check_cars(): df = pd.read_csv("lux/data/car.csv") df.maintain_metadata() - assert df.data_type_lookup["Name"] == "nominal" - assert df.data_type_lookup["MilesPerGal"] == "quantitative" - assert df.data_type_lookup["Cylinders"] == "nominal" - assert df.data_type_lookup["Displacement"] == "quantitative" - assert df.data_type_lookup["Horsepower"] == "quantitative" - assert df.data_type_lookup["Weight"] == "quantitative" - assert df.data_type_lookup["Acceleration"] == "quantitative" - assert df.data_type_lookup["Year"] == "temporal" - assert df.data_type_lookup["Origin"] == "nominal" + assert df.data_type["Name"] == "nominal" + assert df.data_type["MilesPerGal"] == "quantitative" + assert df.data_type["Cylinders"] == "nominal" + assert df.data_type["Displacement"] == "quantitative" + assert df.data_type["Horsepower"] == "quantitative" + assert df.data_type["Weight"] == "quantitative" + assert df.data_type["Acceleration"] == "quantitative" + assert df.data_type["Year"] == "temporal" + assert df.data_type["Origin"] == "nominal" def test_check_int_id(): @@ -37,9 +37,8 @@ def test_check_int_id(): "https://github.com/lux-org/lux-datasets/blob/master/data/instacart_sample.csv?raw=true" ) df._repr_html_() - data_type = df.compute_data_type_from_lookup() - assert len(data_type["id"]) == 3 - # assert len(df.data_type["id"]) == 3 + inverted_data_type = df.invert_data_type() + assert len(inverted_data_type["id"]) == 3 assert ( "order_id, product_id, user_id is not visualized since it resembles an ID field." in df._message.to_html() @@ -59,7 +58,7 @@ def test_check_hpi(): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true") df.maintain_metadata() - assert df.data_type_lookup == { + assert df.data_type == { "HPIRank": "quantitative", "Country": "nominal", "SubRegion": "nominal", @@ -79,7 +78,7 @@ def test_check_hpi(): def test_check_airbnb(): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/airbnb_nyc.csv?raw=true") df.maintain_metadata() - assert df.data_type_lookup == { + assert df.data_type == { "id": "id", "name": "nominal", "host_id": "id", @@ -113,7 +112,7 @@ def test_check_datetime(): } ) df.maintain_metadata() - assert df.data_type_lookup == { + assert df.data_type == { "a": "temporal", "b": "temporal", "c": "temporal", @@ -128,7 +127,7 @@ def test_check_datetime(): def test_check_stock(): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true") df.maintain_metadata() - assert df.data_type_lookup == { + assert df.data_type == { "symbol": "nominal", "monthdate": "temporal", "price": "quantitative", @@ -138,7 +137,7 @@ def test_check_stock(): def test_check_college(): df = pd.read_csv("lux/data/college.csv") df.maintain_metadata() - assert df.data_type_lookup == { + assert df.data_type == { "Name": "nominal", "PredominantDegree": "nominal", "HighestDegree": "nominal", @@ -178,8 +177,8 @@ def test_float_categorical(): ] df = pd.DataFrame(values) df.maintain_metadata() - data_type = df.compute_data_type_from_lookup() - assert data_type["nominal"] == [ + inverted_data_type = df.invert_data_type() + assert inverted_data_type["nominal"] == [ "A", "B", "C", From 0a9c2453adf3d669d0fdc9ccabc8918e21eacc58 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 31 Dec 2020 21:01:27 -0800 Subject: [PATCH 27/39] lux not defined error --- lux/core/frame.py | 20 ++------------------ lux/executor/Executor.py | 18 ++++++++++++++++-- lux/processor/Compiler.py | 6 +++--- lux/utils/date_utils.py | 4 ++-- tests/test_dates.py | 2 +- tests/test_nan.py | 4 ++-- tests/test_pandas_coverage.py | 24 ++++++++++++------------ tests/test_type.py | 4 ++-- 8 files changed, 40 insertions(+), 42 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index bffd077e..462c8e05 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -21,7 +21,7 @@ from lux.utils.message import Message from lux.utils.utils import check_import_lux_widget from typing import Dict, Union, List, Callable -from lux.executor.Executor import * +# from lux.executor.Executor import * import warnings import traceback import lux @@ -127,22 +127,6 @@ def expire_metadata(self): self._min_max = None self.pre_aggregated = None - def invert_data_type(self): - return Executor.mapping(Executor, self.data_type) - - def compute_data_model(self): - data_type_inverted = self.invert_data_type() - data_model = { - "measure": data_type_inverted["quantitative"], - "dimension": data_type_inverted["nominal"] - + data_type_inverted["temporal"] - + data_type_inverted["id"], - } - return data_model - - def compute_data_model_lookup(self): - return Executor.reverseMapping(Executor, self.compute_data_model()) - ##################### ## Override Pandas ## ##################### @@ -416,7 +400,7 @@ def maintain_recs(self): rec_df._message = Message() # Add warning message if there exist ID fields id_fields_str = "" - inverted_data_type = rec_df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(rec_df.data_type) if len(inverted_data_type["id"]) > 0: for id_field in inverted_data_type["id"]: id_fields_str += f"{id_field}, " diff --git a/lux/executor/Executor.py b/lux/executor/Executor.py index 0564a61f..c93ea9b4 100644 --- a/lux/executor/Executor.py +++ b/lux/executor/Executor.py @@ -55,17 +55,31 @@ def compute_data_type(self): # def compute_data_model(self): # return NotImplemented - @staticmethod def mapping(self, rmap): group_map = {} for val in ["quantitative", "id", "nominal", "temporal"]: group_map[val] = list(filter(lambda x: rmap[x] == val, rmap)) return group_map - @staticmethod def reverseMapping(self, map): reverse_map = {} for valKey in map: for val in map[valKey]: reverse_map[val] = valKey return reverse_map + + def invert_data_type(self, data_type): + return self.mapping(data_type) + + def compute_data_model(self, data_type): + data_type_inverted = self.invert_data_type(data_type) + data_model = { + "measure": data_type_inverted["quantitative"], + "dimension": data_type_inverted["nominal"] + + data_type_inverted["temporal"] + + data_type_inverted["id"], + } + return data_model + + def compute_data_model_lookup(self, data_type): + return self.reverseMapping(self.compute_data_model(data_type)) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index 40adfd79..3131cce3 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -159,7 +159,7 @@ def populate_data_type_model(ldf, vlist): # TODO: copy might not be neccesary from lux.utils.date_utils import is_datetime_string - data_model_lookup = ldf.compute_data_model_lookup() + data_model_lookup = lux.config.executor.compute_data_model_lookup(ldf.data_type) for vis in vlist: for clause in vis._inferred_intent: @@ -441,8 +441,8 @@ def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) import copy from lux.utils.utils import convert_to_list - inverted_data_type = ldf.invert_data_type() - data_model = ldf.compute_data_model() + inverted_data_type = lux.config.executor.invert_data_type(ldf.data_type) + data_model = lux.config.executor.compute_data_model(ldf.data_type) intent = {"attributes": [], "filters": []} for clause in _inferred_intent: diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index c521297c..f8868cea 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -14,7 +14,6 @@ import pandas as pd - def date_formatter(time_stamp, ldf): """ Given a numpy timestamp and ldf, inspects which date granularity is appropriate and reformats timestamp accordingly @@ -38,7 +37,8 @@ def date_formatter(time_stamp, ldf): date_str: str A reformatted version of the time_stamp according to granularity """ - inverted_data_type = ldf.invert_data_type() + + inverted_data_type = lux.config.executor.invert_data_type(ldf.data_type) # TODO: method for data_type_lookup to data_type datetime = pd.to_datetime(time_stamp) if inverted_data_type["temporal"]: diff --git a/tests/test_dates.py b/tests/test_dates.py index e92d4b8a..ce859f5d 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -104,7 +104,7 @@ def test_refresh_inplace(): df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d") df.maintain_metadata() - inverted_data_type = df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(df.data_type) assert inverted_data_type["temporal"][0] == "date" vis.refresh_source(df) diff --git a/tests/test_nan.py b/tests/test_nan.py index 7b64ed26..b2d28fed 100644 --- a/tests/test_nan.py +++ b/tests/test_nan.py @@ -46,7 +46,7 @@ def test_nan_data_type_detection(): ] test = pd.DataFrame(dataset) test.maintain_metadata() - inverted_data_type = test.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(test.data_type) assert inverted_data_type["nominal"] == [ "fully_nan", "some_nan", @@ -54,7 +54,7 @@ def test_nan_data_type_detection(): ], "Categorical columns containing NaNs should be treated as nominal data type" nona_test = test.dropna(subset=["some_nan"]) nona_test.maintain_metadata() - inverted_data_type = nona_test.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(nona_test.data_type) assert inverted_data_type["nominal"] == [ "fully_nan", "some_nan", diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index d912ac9f..21b257a2 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -45,23 +45,23 @@ def test_rename_inplace(global_var): assert df.data_type["Name"] == new_df.data_type["Car Name"] - inverted_data_type = df.invert_data_type() - new_inverted_data_type = new_df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(df.data_type) + new_inverted_data_type = lux.config.executor.invert_data_type(new_df.data_type) assert inverted_data_type != new_inverted_data_type assert inverted_data_type["nominal"][0] == "Name" assert new_inverted_data_type["nominal"][0] == "Car Name" - data_model_lookup = df.compute_data_model_lookup() - new_data_model_lookup = new_df.compute_data_model_lookup() + data_model_lookup = lux.config.executor.compute_data_model_lookup(df.data_type) + new_data_model_lookup = lux.config.executor.compute_data_model_lookup(new_df.data_type) assert data_model_lookup != new_data_model_lookup assert data_model_lookup["Name"] == new_data_model_lookup["Car Name"] - data_model = df.compute_data_model() - new_data_model = new_df.compute_data_model() + data_model = lux.config.executor.compute_data_model(df.data_type) + new_data_model = lux.config.executor.compute_data_model(new_df.data_type) assert data_model != new_data_model @@ -84,23 +84,23 @@ def test_rename(global_var): assert df.data_type["Name"] == new_df.data_type["Car Name"] - inverted_data_type = df.invert_data_type() - new_inverted_data_type = new_df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(df.data_type) + new_inverted_data_type = lux.config.executor.invert_data_type(new_df.data_type) assert inverted_data_type != new_inverted_data_type assert inverted_data_type["nominal"][0] == "Name" assert new_inverted_data_type["nominal"][0] == "Car Name" - data_model_lookup = df.compute_data_model_lookup() - new_data_model_lookup = new_df.compute_data_model_lookup() + data_model_lookup = lux.config.executor.compute_data_model_lookup(df.data_type) + new_data_model_lookup = lux.config.executor.compute_data_model_lookup(new_df.data_type) assert data_model_lookup != new_data_model_lookup assert data_model_lookup["Name"] == new_data_model_lookup["Car Name"] - data_model = df.compute_data_model() - new_data_model = new_df.compute_data_model() + data_model = lux.config.executor.compute_data_model(df.data_type) + new_data_model = lux.config.executor.compute_data_model(new_df.data_type) assert data_model != new_data_model diff --git a/tests/test_type.py b/tests/test_type.py index acd651ca..ac6472fc 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -37,7 +37,7 @@ def test_check_int_id(): "https://github.com/lux-org/lux-datasets/blob/master/data/instacart_sample.csv?raw=true" ) df._repr_html_() - inverted_data_type = df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(df.data_type) assert len(inverted_data_type["id"]) == 3 assert ( "order_id, product_id, user_id is not visualized since it resembles an ID field." @@ -177,7 +177,7 @@ def test_float_categorical(): ] df = pd.DataFrame(values) df.maintain_metadata() - inverted_data_type = df.invert_data_type() + inverted_data_type = lux.config.executor.invert_data_type(df.data_type) assert inverted_data_type["nominal"] == [ "A", "B", From 4c052e703701c50b11f4b8997d32e5fedb30fc84 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 31 Dec 2020 21:06:34 -0800 Subject: [PATCH 28/39] fixed --- lux/utils/date_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index f8868cea..0960d7ef 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -13,6 +13,7 @@ # limitations under the License. import pandas as pd +import lux def date_formatter(time_stamp, ldf): """ From 40107841f812974a465efaaef6a5679257288aa6 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Fri, 1 Jan 2021 21:11:44 -0800 Subject: [PATCH 29/39] black format --- lux/core/frame.py | 1 + lux/utils/date_utils.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 462c8e05..e4ed9e3e 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -21,6 +21,7 @@ from lux.utils.message import Message from lux.utils.utils import check_import_lux_widget from typing import Dict, Union, List, Callable + # from lux.executor.Executor import * import warnings import traceback diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index 0960d7ef..66086f1c 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -15,6 +15,7 @@ import pandas as pd import lux + def date_formatter(time_stamp, ldf): """ Given a numpy timestamp and ldf, inspects which date granularity is appropriate and reformats timestamp accordingly @@ -38,7 +39,7 @@ def date_formatter(time_stamp, ldf): date_str: str A reformatted version of the time_stamp according to granularity """ - + inverted_data_type = lux.config.executor.invert_data_type(ldf.data_type) # TODO: method for data_type_lookup to data_type datetime = pd.to_datetime(time_stamp) From 02e1f55e7663e69307db73cc6f1a2dae347e2759 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 4 Jan 2021 22:02:09 -0800 Subject: [PATCH 30/39] config doc updated --- doc/source/reference/config.rst | 38 +++++++++++++++++++ .../gen/lux.core.frame.LuxDataFrame.rst | 6 +-- .../gen/lux.executor.Executor.Executor.rst | 4 +- ...executor.PandasExecutor.PandasExecutor.rst | 4 +- .../lux.executor.SQLExecutor.SQLExecutor.rst | 4 +- doc/source/reference/gen/lux.vis.Vis.Vis.rst | 4 +- 6 files changed, 52 insertions(+), 8 deletions(-) diff --git a/doc/source/reference/config.rst b/doc/source/reference/config.rst index a1474dc5..a6b8ed9e 100644 --- a/doc/source/reference/config.rst +++ b/doc/source/reference/config.rst @@ -72,3 +72,41 @@ We can disable this feature and revert back to using a scatter plot by running t .. code-block:: python lux.config.heatmap = False + + +Default Renderer +~~~~~~~~~~~~~~~~~ + +For now, all graphs will be rendered using `altair`. We are working on creating support for `matplotlib` and other plotting libraries. + +To set the default renderer, run the following code block: + +.. code-block::python + + lux.config.renderer = "altair" + +Plot Configurations +~~~~~~~~~~~~~~~~~~~ + +Altair supports plot configurations to be applied on top of the generated graphs. To set a default plot configuration, first write a function that can take in a `chart` and returns a `chart`. For example: + +.. code-block::python + def change_color_add_title(chart): + chart = chart.configure_mark(color="green") # change mark color to green + chart.title = "Custom Title" # add title to chart + return chart + +Then, set the `plot_config` to this function so that this function is applied to every plot generated. + +.. code-block::python + + lux.config.plot_config = change_color_add_title + +The above results in the following changes: + +.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/plotconfig-1.png?raw=true + :width: 700 + :align: center + :alt: Retrieves a single attribute from Lux's Action Manager using its defined id. + + diff --git a/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst b/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst index 71259efb..05c1bea1 100644 --- a/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst +++ b/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst @@ -1,4 +1,4 @@ -lux.core.frame.LuxDataFrame +lux.core.frame.LuxDataFrame =========================== .. currentmodule:: lux.core.frame @@ -171,11 +171,11 @@ lux.core.frame.LuxDataFrame ~LuxDataFrame.rsub ~LuxDataFrame.rtruediv ~LuxDataFrame.sample + ~LuxDataFrame.save_as_html ~LuxDataFrame.select_dtypes ~LuxDataFrame.sem - ~LuxDataFrame.set_SQL_connection + ~LuxDataFrame.set_SQL_table ~LuxDataFrame.set_axis - ~LuxDataFrame.set_executor_type ~LuxDataFrame.set_index ~LuxDataFrame.set_intent ~LuxDataFrame.set_intent_as_vis diff --git a/doc/source/reference/gen/lux.executor.Executor.Executor.rst b/doc/source/reference/gen/lux.executor.Executor.Executor.rst index 71e7c0d2..ea67caa3 100644 --- a/doc/source/reference/gen/lux.executor.Executor.Executor.rst +++ b/doc/source/reference/gen/lux.executor.Executor.Executor.rst @@ -1,4 +1,4 @@ -lux.executor.Executor.Executor +lux.executor.Executor.Executor ============================== .. currentmodule:: lux.executor.Executor @@ -15,12 +15,14 @@ lux.executor.Executor.Executor ~Executor.__init__ ~Executor.compute_data_model + ~Executor.compute_data_model_lookup ~Executor.compute_data_type ~Executor.compute_stats ~Executor.execute ~Executor.execute_aggregate ~Executor.execute_binning ~Executor.execute_filter + ~Executor.invert_data_type ~Executor.mapping ~Executor.reverseMapping diff --git a/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst b/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst index 83997a90..e99f2254 100644 --- a/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst +++ b/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst @@ -1,4 +1,4 @@ -lux.executor.PandasExecutor.PandasExecutor +lux.executor.PandasExecutor.PandasExecutor ========================================== .. currentmodule:: lux.executor.PandasExecutor @@ -16,6 +16,7 @@ lux.executor.PandasExecutor.PandasExecutor ~PandasExecutor.__init__ ~PandasExecutor.apply_filter ~PandasExecutor.compute_data_model + ~PandasExecutor.compute_data_model_lookup ~PandasExecutor.compute_data_type ~PandasExecutor.compute_dataset_metadata ~PandasExecutor.compute_stats @@ -25,6 +26,7 @@ lux.executor.PandasExecutor.PandasExecutor ~PandasExecutor.execute_binning ~PandasExecutor.execute_filter ~PandasExecutor.execute_sampling + ~PandasExecutor.invert_data_type ~PandasExecutor.mapping ~PandasExecutor.reverseMapping diff --git a/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst b/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst index c6c3f631..a809a280 100644 --- a/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst +++ b/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst @@ -1,4 +1,4 @@ -lux.executor.SQLExecutor.SQLExecutor +lux.executor.SQLExecutor.SQLExecutor ==================================== .. currentmodule:: lux.executor.SQLExecutor @@ -15,12 +15,14 @@ lux.executor.SQLExecutor.SQLExecutor ~SQLExecutor.__init__ ~SQLExecutor.compute_data_model + ~SQLExecutor.compute_data_model_lookup ~SQLExecutor.compute_data_type ~SQLExecutor.compute_stats ~SQLExecutor.execute ~SQLExecutor.execute_aggregate ~SQLExecutor.execute_binning ~SQLExecutor.execute_filter + ~SQLExecutor.invert_data_type ~SQLExecutor.mapping ~SQLExecutor.reverseMapping diff --git a/doc/source/reference/gen/lux.vis.Vis.Vis.rst b/doc/source/reference/gen/lux.vis.Vis.Vis.rst index 39e2983b..3785b548 100644 --- a/doc/source/reference/gen/lux.vis.Vis.Vis.rst +++ b/doc/source/reference/gen/lux.vis.Vis.Vis.rst @@ -1,4 +1,4 @@ -lux.vis.Vis.Vis +lux.vis.Vis.Vis =============== .. currentmodule:: lux.vis.Vis @@ -22,10 +22,10 @@ lux.vis.Vis.Vis ~Vis.refresh_source ~Vis.remove_column_from_spec ~Vis.remove_filter_from_spec - ~Vis.to_code ~Vis.set_intent ~Vis.to_Altair ~Vis.to_VegaLite + ~Vis.to_code From 97a5da78bff132dcb8ef3d0daf7b2bf373f69217 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 5 Jan 2021 09:19:30 -0800 Subject: [PATCH 31/39] fix link for executor --- doc/source/advanced/architecture.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/advanced/architecture.rst b/doc/source/advanced/architecture.rst index f257cb34..e8514a5d 100644 --- a/doc/source/advanced/architecture.rst +++ b/doc/source/advanced/architecture.rst @@ -80,4 +80,4 @@ Number of Dimensions Number of Measures Mark Type Executor ---------- The data executor populates each Vis with a subset of the dataframe based on the specified intent. -You can learn more about executors in Lux `here `_. \ No newline at end of file +You can learn more about executors in Lux `here `_. \ No newline at end of file From f7e8f3b95c61bd39a08474adea468934765903d3 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 5 Jan 2021 09:41:33 -0800 Subject: [PATCH 32/39] more links --- doc/source/advanced/architecture.rst | 2 +- doc/source/getting_started/overview.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/advanced/architecture.rst b/doc/source/advanced/architecture.rst index e8514a5d..0b2c51f4 100644 --- a/doc/source/advanced/architecture.rst +++ b/doc/source/advanced/architecture.rst @@ -80,4 +80,4 @@ Number of Dimensions Number of Measures Mark Type Executor ---------- The data executor populates each Vis with a subset of the dataframe based on the specified intent. -You can learn more about executors in Lux `here `_. \ No newline at end of file +You can learn more about executors in Lux `here `_. \ No newline at end of file diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst index c873c1f8..2a6edc34 100644 --- a/doc/source/getting_started/overview.rst +++ b/doc/source/getting_started/overview.rst @@ -2,7 +2,7 @@ Overview ******** -.. note:: You can follow along this tutorial in a Jupyter notebook. [`Github `_] [`Binder `_] +.. note:: You can follow along this tutorial in a Jupyter notebook. [`Github `_] [`Binder `_] This tutorial provides an overview of how you can use Lux in your data exploration workflow. From 216770704abc2fbee7b117015b4d6fdfb9692978 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 5 Jan 2021 10:13:15 -0800 Subject: [PATCH 33/39] fixed overview --- doc/source/getting_started/overview.rst | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst index 2a6edc34..29833f59 100644 --- a/doc/source/getting_started/overview.rst +++ b/doc/source/getting_started/overview.rst @@ -25,8 +25,7 @@ Lux preserves the Pandas dataframe semantics -- which means that you can apply a df = pd.read_csv("lux/data/college.csv") -Lux is built on the philosophy that generating useful visualizations should be as simple as printing out a dataframe. -When you print out the dataframe in the notebook, you should see the default Pandas table display with an additional Toggle button. +To see the suggested visualizations, simply print the DataFrame. You should see the default Pandas table display with an additional toggle button. .. code-block:: python @@ -37,7 +36,7 @@ When you print out the dataframe in the notebook, you should see the default Pan :align: center :alt: click on toggle, scroll on Correlation -By clicking on the Toggle button, you can now explore the data visually through Lux. You should see three tabs of visualizations recommended to you. +By clicking on the Toggle button, you can now explore the data visually through Lux. You should see several categories of visualizations recommended to you by pressing on the tabs. .. image:: ../../../../lux-resources/doc_img/overview-2.gif :width: 700 @@ -75,7 +74,7 @@ As shown in the example above, by default, we display three types of actions sho :alt: Example of even and uneven category distributions -Refer to :doc:`this page <../advanced/action>` for details on different types of action in Lux. +Refer to :doc:`this page <../reference/lux.action>` for details on different types of action in Lux. Expressing Analysis Interest and Goals with User `Intent` ---------------------------------------------------------- @@ -111,7 +110,7 @@ You can specify a variety of things that you might be interested in, for example df.intent = ["MedianEarnings", "FundingModel=Public"] df -For more advance use of intent, refer to :doc:`this page <../getting_started/intent>` on how to specify the intent. +For more advance use of intent, refer to :doc:`this page <../guide/intent>` on how to specify the intent. Steering Recommendations via User Intent ---------------------------------------- @@ -129,7 +128,7 @@ Given the updated intent, additional actions (Enhance and Filter) are generated. - {MedianEarnings, **AverageCost**} - {MedianEarnings, **AverageFacultySalary**}. -.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/overview-4.png +.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/overview-4.png?raw=true :width: 700 :align: center :alt: screenshot of Enhance @@ -140,10 +139,7 @@ Given the updated intent, additional actions (Enhance and Filter) are generated. - {MedianEarnings, **Region=Southeast**} - {MedianEarnings, **Region=Great Lakes**}. -.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/overview-5.png +.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/overview-5.png?raw=true :width: 700 :align: center :alt: screenshot of Filter - - -.. Lux is built on the principle that users should always be able to visualize and explore anything they specify, without having to think about how the visualization should look like. From ef8d746a25d7cf4eb2bf425e11f1c106227b6ee5 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 5 Jan 2021 11:40:30 -0800 Subject: [PATCH 34/39] more links fixed --- doc/source/advanced/date.rst | 2 +- doc/source/guide/style.rst | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/advanced/date.rst b/doc/source/advanced/date.rst index 6adf4028..901ecf84 100644 --- a/doc/source/advanced/date.rst +++ b/doc/source/advanced/date.rst @@ -98,7 +98,7 @@ Below we look at an example stocks dataset that also has `date` field with each .. code-block:: python - df = pd.read_csv("../../lux/data/stocks.csv") + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true") df.dtypes diff --git a/doc/source/guide/style.rst b/doc/source/guide/style.rst index 516f1e5a..58a0fa71 100644 --- a/doc/source/guide/style.rst +++ b/doc/source/guide/style.rst @@ -115,4 +115,6 @@ We want to decrease the opacity of scatterplots, but keep the opacity for the ot :width: 700 :align: center +.. note:: For now, if the visualization has already been rendered before, you will need to run `df.expire_recs()` to see the updated visualization. + We can modify the scatterplot setting, without changing the settings for the other chart types. From c5fd1a532f1766236e84bb7d560bb717fa06ec07 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 5 Jan 2021 13:10:24 -0800 Subject: [PATCH 35/39] pandas methods no longer included --- doc/conf.py | 2 +- doc/source/reference/API.rst | 13 +- .../gen/lux.core.series.LuxSeries.rst | 234 ++++++++++++++++++ doc/source/reference/lux.action.rst | 8 - doc/source/reference/lux.core.rst | 9 +- doc/source/reference/lux.utils.rst | 8 - 6 files changed, 253 insertions(+), 21 deletions(-) create mode 100644 doc/source/reference/gen/lux.core.series.LuxSeries.rst diff --git a/doc/conf.py b/doc/conf.py index 03862439..88d574a9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -63,7 +63,7 @@ "sphinx_automodapi.automodsumm", ] -autodoc_default_flags = ["members", "inherited-members"] +autodoc_default_flags = ["members", "undoc-members"] autodoc_member_order = "groupwise" autosummary_generate = True numpydoc_show_class_members = False diff --git a/doc/source/reference/API.rst b/doc/source/reference/API.rst index 68b31def..7c564ee6 100644 --- a/doc/source/reference/API.rst +++ b/doc/source/reference/API.rst @@ -4,17 +4,26 @@ API **** +Core Lux Objects +----------------- + +.. autosummary:: + :toctree: gen + :nosignatures: + + lux.core.frame.LuxDataFrame + lux.core.series.LuxSeries + Basic API Interface ------------------- .. autosummary:: :toctree: gen :nosignatures: - + lux.vis.Vis.Vis lux.vis.VisList.VisList lux.vis.Vis.Clause - lux.core.frame.LuxDataFrame Advanced Internals (Dev) ------------------------- diff --git a/doc/source/reference/gen/lux.core.series.LuxSeries.rst b/doc/source/reference/gen/lux.core.series.LuxSeries.rst new file mode 100644 index 00000000..f44a2548 --- /dev/null +++ b/doc/source/reference/gen/lux.core.series.LuxSeries.rst @@ -0,0 +1,234 @@ +lux.core.series.LuxSeries +========================= + +.. currentmodule:: lux.core.series + +.. autoclass:: LuxSeries + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~LuxSeries.__init__ + ~LuxSeries.abs + ~LuxSeries.add + ~LuxSeries.add_prefix + ~LuxSeries.add_suffix + ~LuxSeries.agg + ~LuxSeries.aggregate + ~LuxSeries.align + ~LuxSeries.all + ~LuxSeries.any + ~LuxSeries.append + ~LuxSeries.apply + ~LuxSeries.argmax + ~LuxSeries.argmin + ~LuxSeries.argsort + ~LuxSeries.asfreq + ~LuxSeries.asof + ~LuxSeries.astype + ~LuxSeries.at_time + ~LuxSeries.autocorr + ~LuxSeries.backfill + ~LuxSeries.between + ~LuxSeries.between_time + ~LuxSeries.bfill + ~LuxSeries.bool + ~LuxSeries.clip + ~LuxSeries.combine + ~LuxSeries.combine_first + ~LuxSeries.compare + ~LuxSeries.convert_dtypes + ~LuxSeries.copy + ~LuxSeries.corr + ~LuxSeries.count + ~LuxSeries.cov + ~LuxSeries.cummax + ~LuxSeries.cummin + ~LuxSeries.cumprod + ~LuxSeries.cumsum + ~LuxSeries.describe + ~LuxSeries.diff + ~LuxSeries.display_pandas + ~LuxSeries.div + ~LuxSeries.divide + ~LuxSeries.divmod + ~LuxSeries.dot + ~LuxSeries.drop + ~LuxSeries.drop_duplicates + ~LuxSeries.droplevel + ~LuxSeries.dropna + ~LuxSeries.duplicated + ~LuxSeries.eq + ~LuxSeries.equals + ~LuxSeries.ewm + ~LuxSeries.expanding + ~LuxSeries.explode + ~LuxSeries.factorize + ~LuxSeries.ffill + ~LuxSeries.fillna + ~LuxSeries.filter + ~LuxSeries.first + ~LuxSeries.first_valid_index + ~LuxSeries.floordiv + ~LuxSeries.ge + ~LuxSeries.get + ~LuxSeries.groupby + ~LuxSeries.gt + ~LuxSeries.head + ~LuxSeries.hist + ~LuxSeries.idxmax + ~LuxSeries.idxmin + ~LuxSeries.infer_objects + ~LuxSeries.interpolate + ~LuxSeries.isin + ~LuxSeries.isna + ~LuxSeries.isnull + ~LuxSeries.item + ~LuxSeries.items + ~LuxSeries.iteritems + ~LuxSeries.keys + ~LuxSeries.kurt + ~LuxSeries.kurtosis + ~LuxSeries.last + ~LuxSeries.last_valid_index + ~LuxSeries.le + ~LuxSeries.lt + ~LuxSeries.mad + ~LuxSeries.map + ~LuxSeries.mask + ~LuxSeries.max + ~LuxSeries.mean + ~LuxSeries.median + ~LuxSeries.memory_usage + ~LuxSeries.min + ~LuxSeries.mod + ~LuxSeries.mode + ~LuxSeries.mul + ~LuxSeries.multiply + ~LuxSeries.ne + ~LuxSeries.nlargest + ~LuxSeries.notna + ~LuxSeries.notnull + ~LuxSeries.nsmallest + ~LuxSeries.nunique + ~LuxSeries.pad + ~LuxSeries.pct_change + ~LuxSeries.pipe + ~LuxSeries.pop + ~LuxSeries.pow + ~LuxSeries.prod + ~LuxSeries.product + ~LuxSeries.quantile + ~LuxSeries.radd + ~LuxSeries.rank + ~LuxSeries.ravel + ~LuxSeries.rdiv + ~LuxSeries.rdivmod + ~LuxSeries.reindex + ~LuxSeries.reindex_like + ~LuxSeries.rename + ~LuxSeries.rename_axis + ~LuxSeries.reorder_levels + ~LuxSeries.repeat + ~LuxSeries.replace + ~LuxSeries.resample + ~LuxSeries.reset_index + ~LuxSeries.rfloordiv + ~LuxSeries.rmod + ~LuxSeries.rmul + ~LuxSeries.rolling + ~LuxSeries.round + ~LuxSeries.rpow + ~LuxSeries.rsub + ~LuxSeries.rtruediv + ~LuxSeries.sample + ~LuxSeries.searchsorted + ~LuxSeries.sem + ~LuxSeries.set_axis + ~LuxSeries.shift + ~LuxSeries.skew + ~LuxSeries.slice_shift + ~LuxSeries.sort_index + ~LuxSeries.sort_values + ~LuxSeries.squeeze + ~LuxSeries.std + ~LuxSeries.sub + ~LuxSeries.subtract + ~LuxSeries.sum + ~LuxSeries.swapaxes + ~LuxSeries.swaplevel + ~LuxSeries.tail + ~LuxSeries.take + ~LuxSeries.to_clipboard + ~LuxSeries.to_csv + ~LuxSeries.to_dict + ~LuxSeries.to_excel + ~LuxSeries.to_frame + ~LuxSeries.to_hdf + ~LuxSeries.to_json + ~LuxSeries.to_latex + ~LuxSeries.to_list + ~LuxSeries.to_markdown + ~LuxSeries.to_numpy + ~LuxSeries.to_pandas + ~LuxSeries.to_period + ~LuxSeries.to_pickle + ~LuxSeries.to_sql + ~LuxSeries.to_string + ~LuxSeries.to_timestamp + ~LuxSeries.to_xarray + ~LuxSeries.tolist + ~LuxSeries.transform + ~LuxSeries.transpose + ~LuxSeries.truediv + ~LuxSeries.truncate + ~LuxSeries.tshift + ~LuxSeries.tz_convert + ~LuxSeries.tz_localize + ~LuxSeries.unique + ~LuxSeries.unstack + ~LuxSeries.update + ~LuxSeries.value_counts + ~LuxSeries.var + ~LuxSeries.view + ~LuxSeries.where + ~LuxSeries.xs + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~LuxSeries.T + ~LuxSeries.array + ~LuxSeries.at + ~LuxSeries.attrs + ~LuxSeries.axes + ~LuxSeries.dtype + ~LuxSeries.dtypes + ~LuxSeries.empty + ~LuxSeries.hasnans + ~LuxSeries.iat + ~LuxSeries.iloc + ~LuxSeries.index + ~LuxSeries.is_monotonic + ~LuxSeries.is_monotonic_decreasing + ~LuxSeries.is_monotonic_increasing + ~LuxSeries.is_unique + ~LuxSeries.loc + ~LuxSeries.name + ~LuxSeries.nbytes + ~LuxSeries.ndim + ~LuxSeries.shape + ~LuxSeries.size + ~LuxSeries.values + + \ No newline at end of file diff --git a/doc/source/reference/lux.action.rst b/doc/source/reference/lux.action.rst index 52ff79fe..6e704caf 100644 --- a/doc/source/reference/lux.action.rst +++ b/doc/source/reference/lux.action.rst @@ -60,14 +60,6 @@ lux.action.row\_group module :undoc-members: :show-inheritance: -lux.action.similarity module ----------------------------- - -.. automodule:: lux.action.similarity - :members: - :undoc-members: - :show-inheritance: - lux.action.univariate module ---------------------------- diff --git a/doc/source/reference/lux.core.rst b/doc/source/reference/lux.core.rst index 93a8e7bf..40e77fa9 100644 --- a/doc/source/reference/lux.core.rst +++ b/doc/source/reference/lux.core.rst @@ -10,13 +10,18 @@ lux.core.frame module .. automodule:: lux.core.frame :members: :undoc-members: - :show-inheritance: +lux.core.series module +----------------------- + +.. automodule:: lux.core.series + :members: + :undoc-members: + Module contents --------------- .. automodule:: lux.core :members: :undoc-members: - :show-inheritance: diff --git a/doc/source/reference/lux.utils.rst b/doc/source/reference/lux.utils.rst index 965d9dd1..5a3177fb 100644 --- a/doc/source/reference/lux.utils.rst +++ b/doc/source/reference/lux.utils.rst @@ -12,14 +12,6 @@ lux.utils.date\_utils module :undoc-members: :show-inheritance: -lux.utils.renderjson module ---------------------------- - -.. automodule:: lux.utils.renderjson - :members: - :undoc-members: - :show-inheritance: - lux.utils.utils module ---------------------- From c85c2a9a75c56f26382f0c6f0cb2eaed01f61c71 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Wed, 6 Jan 2021 21:27:54 -0800 Subject: [PATCH 36/39] updates to some docstrings --- doc/conf.py | 2 +- doc/source/advanced/interestingness.rst | 31 ++++--- doc/source/guide/FAQ.rst | 14 +-- doc/source/guide/intent.rst | 5 +- doc/source/reference/API.rst | 11 ++- .../gen/lux._config.config.Config.rst | 34 +++++++ .../reference/gen/lux.history.event.Event.rst | 22 +++++ .../gen/lux.history.history.Event.rst | 22 +++++ .../gen/lux.history.history.History.rst | 23 +++++ doc/source/reference/lux._config.config.rst | 10 ++ doc/source/reference/lux.action.rst | 11 +-- doc/source/reference/lux.core.rst | 10 +- doc/source/reference/lux.rst | 1 - lux/_config/config.py | 45 ++++++++- lux/core/frame.py | 92 +++++++++++-------- lux/core/series.py | 5 + 16 files changed, 256 insertions(+), 82 deletions(-) create mode 100644 doc/source/reference/gen/lux._config.config.Config.rst create mode 100644 doc/source/reference/gen/lux.history.event.Event.rst create mode 100644 doc/source/reference/gen/lux.history.history.Event.rst create mode 100644 doc/source/reference/gen/lux.history.history.History.rst create mode 100644 doc/source/reference/lux._config.config.rst diff --git a/doc/conf.py b/doc/conf.py index 88d574a9..cafb3786 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -63,7 +63,7 @@ "sphinx_automodapi.automodsumm", ] -autodoc_default_flags = ["members", "undoc-members"] +autodoc_default_flags = ["members", "no-undoc-members"] autodoc_member_order = "groupwise" autosummary_generate = True numpydoc_show_class_members = False diff --git a/doc/source/advanced/interestingness.rst b/doc/source/advanced/interestingness.rst index 0827ab65..482b1771 100644 --- a/doc/source/advanced/interestingness.rst +++ b/doc/source/advanced/interestingness.rst @@ -1,24 +1,24 @@ -********************** +******************************* Interestingness Scoring -********************** +******************************* In Lux, recommended visualizations are scored and ranked based on their statistical properties. Lux uses various standard metrics for determining how interesting a visualization is. The choice of an interestingness metric is dependent on the chart type, as shown in the following table. -+----------------+---------+------------------------------------------------------------------+ -| Chart Type | Filter? | Function | -+================+=========+==================================================================+ -| Bar/Line Chart | ✔ | :func:`lux.interestingness.interestingness.unevenness` | -| +---------+------------------------------------------------------------------+ ++----------------+---------+--------------------------------------------------------------------+ +| Chart Type | Filter? | Function | ++================+=========+====================================================================+ +| Bar/Line Chart | ✔ | :func:`lux.interestingness.interestingness.unevenness` | +| +---------+--------------------------------------------------------------------+ | | X | :func:`lux.interestingness.interestingness.deviation_from_overall` | -+----------------+---------+------------------------------------------------------------------+ -| Histogram | ✔ | :func:`lux.interestingness.interestingness.skewness` | -| +---------+------------------------------------------------------------------+ ++----------------+---------+--------------------------------------------------------------------+ +| Histogram | ✔ | :func:`lux.interestingness.interestingness.skewness` | +| +---------+--------------------------------------------------------------------+ | | X | :func:`lux.interestingness.interestingness.deviation_from_overall` | -+----------------+---------+------------------------------------------------------------------+ -| Scatterplot | ✔/X | :func:`lux.interestingness.interestingness.monotonicity` | -+----------------+---------+------------------------------------------------------------------+ ++----------------+---------+--------------------------------------------------------------------+ +| Scatterplot | ✔/X | :func:`lux.interestingness.interestingness.monotonicity` | ++----------------+---------+--------------------------------------------------------------------+ Bar Chart Interestingness ========================= @@ -42,6 +42,7 @@ The difference is captured via the Euclidean distance (L2 norm). .. Example: "Occurrence" recommendation .. _barWithFilter: + Bar charts with filters: Deviation from Overall ----------------------------------------------- @@ -77,6 +78,7 @@ The skewness is computed based on `scipy.stats.skew `_ for SQL (currently only tested for Postgres). We are actively working on extending Lux to databases. If you are interested in using this feature, please `contact us `_ for more information. + Lux has `some limited support `__ for SQL (currently only tested for Postgres). We are actively working on extending Lux to databases. If you are interested in using this feature, please `contact us `_ for more information. What do I do with date-related attributes in my dataset? """""""""""""""""""""""""""""""""""""""""""""""""""""""" - Lux supports a variety of temporal data types in Pandas. For more information on how to handle temporal data in Lux, refer to `the datetime guide `_. + Lux supports a variety of temporal data types in Pandas. For more information on how to handle temporal data in Lux, refer to `the datetime guide `__. How do I access all of the current recommendations shown in my widget? """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -38,12 +38,12 @@ How do I set the Lux widgets to show up on default? I want to change the opacity of my chart, add title, change chart font size, etc. How do I modify chart settings? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - To add custom plot settings to the recommendations, you can set the global :code:`plot_config` property. See `this tutorial `_ on how to configure chart properties. Lux currently only support chart modifications in Altair. + To add custom plot settings to the recommendations, you can set the global :code:`plot_config` property. See `this tutorial `__ on how to configure chart properties. Lux currently only support chart modifications in Altair. How do I change aggregation functions, binning, or axis channels to non-default values? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" To change the aggregation function to be something that is not average or set an attribute to display on the x-axis instead of y-axis, you can override the default values in the :code:`lux.Clause` specification. - To override automatically inferred properties, you can specify additional arguements inside `lux.Clause` to set the value of the Clause properties. See `this page `_ for more details. + To override automatically inferred properties, you can specify additional arguements inside `lux.Clause` to set the value of the Clause properties. See `this page `__ for more details. I want to look at the default recommendations that were recommended to me, how can I get the dataframe to display those? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -79,12 +79,12 @@ How do I disable sampling and have Lux visualize the full dataset? lux.config.sampling = False df = pd.read_csv("...") - If you want to fine-tune the sampling parameters, you can edit :code:`lux.config.sampling_start` and :code:`lux.config.sampling_cap`. See `this page `_ for more details. + If you want to fine-tune the sampling parameters, you can edit :code:`lux.config.sampling_start` and :code:`lux.config.sampling_cap`. See `this page `__ for more details. Troubleshooting Tips -------------------- -To troubleshoot your Lux installation, we recommend cloning `this repo `_ and using one of the `demo notebooks `_ to test out Lux. +To troubleshoot your Lux installation, we recommend cloning `this repo `__ and using one of the `demo notebooks `__ to test out Lux. The Lux Jupyter widget does not show up when I print a dataframe. """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -140,7 +140,7 @@ I'm not able to export my visualizations via the :code:`exported` property. I have an issue that is not addressed by any of the FAQs. """""""""""""""""""""""""""""""""""""""""""""""""""""""""" -Please submit a `Github Issue `_ or ask a question on `Slack `_. +Please submit a `Github Issue `__ or ask a question on `Slack `__. .. Not Currently Supported .. - What do I do if I want to change the data type of an attribute? diff --git a/doc/source/guide/intent.rst b/doc/source/guide/intent.rst index eba6c287..0a1a8e59 100644 --- a/doc/source/guide/intent.rst +++ b/doc/source/guide/intent.rst @@ -107,8 +107,9 @@ Note that since there are three different visualizations that is generated based :alt: add screenshot You can specify to Lux that you are interested in learning more about colleges in New England. - In the resulting Filter action, we see that Lux suggests visualizations in other `Region`s as recommendations. - + + In the resulting Filter action, we see that Lux suggests visualizations in other `Region` as recommendations. + .. code-block:: python df.intent = ["Region=New England"] diff --git a/doc/source/reference/API.rst b/doc/source/reference/API.rst index 7c564ee6..42659464 100644 --- a/doc/source/reference/API.rst +++ b/doc/source/reference/API.rst @@ -10,10 +10,19 @@ Core Lux Objects .. autosummary:: :toctree: gen :nosignatures: - + lux.core.frame.LuxDataFrame lux.core.series.LuxSeries +Configuration Options +---------------------- + +.. autosummary:: + :toctree: gen + :nosignatures: + + lux._config.config.Config + Basic API Interface ------------------- diff --git a/doc/source/reference/gen/lux._config.config.Config.rst b/doc/source/reference/gen/lux._config.config.Config.rst new file mode 100644 index 00000000..3b7637e2 --- /dev/null +++ b/doc/source/reference/gen/lux._config.config.Config.rst @@ -0,0 +1,34 @@ +lux.\_config.config.Config +========================== + +.. currentmodule:: lux._config.config + +.. autoclass:: Config + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~Config.__init__ + ~Config.set_SQL_connection + ~Config.set_executor_type + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~Config.default_display + ~Config.heatmap + ~Config.sampling + ~Config.sampling_cap + ~Config.sampling_start + + \ No newline at end of file diff --git a/doc/source/reference/gen/lux.history.event.Event.rst b/doc/source/reference/gen/lux.history.event.Event.rst new file mode 100644 index 00000000..6674286a --- /dev/null +++ b/doc/source/reference/gen/lux.history.event.Event.rst @@ -0,0 +1,22 @@ +lux.history.event.Event +======================= + +.. currentmodule:: lux.history.event + +.. autoclass:: Event + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~Event.__init__ + + + + + + \ No newline at end of file diff --git a/doc/source/reference/gen/lux.history.history.Event.rst b/doc/source/reference/gen/lux.history.history.Event.rst new file mode 100644 index 00000000..2b03f4e3 --- /dev/null +++ b/doc/source/reference/gen/lux.history.history.Event.rst @@ -0,0 +1,22 @@ +lux.history.history.Event +========================= + +.. currentmodule:: lux.history.history + +.. autoclass:: Event + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~Event.__init__ + + + + + + \ No newline at end of file diff --git a/doc/source/reference/gen/lux.history.history.History.rst b/doc/source/reference/gen/lux.history.history.History.rst new file mode 100644 index 00000000..e10bd1af --- /dev/null +++ b/doc/source/reference/gen/lux.history.history.History.rst @@ -0,0 +1,23 @@ +lux.history.history.History +=========================== + +.. currentmodule:: lux.history.history + +.. autoclass:: History + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~History.__init__ + ~History.append_event + + + + + + \ No newline at end of file diff --git a/doc/source/reference/lux._config.config.rst b/doc/source/reference/lux._config.config.rst new file mode 100644 index 00000000..dc3875bc --- /dev/null +++ b/doc/source/reference/lux._config.config.rst @@ -0,0 +1,10 @@ +lux._config.config package +========================== + +lux._config.config.Config module +--------------------------------- + +.. automodule:: lux._config.config.config + :members: + :no-undoc-members: + diff --git a/doc/source/reference/lux.action.rst b/doc/source/reference/lux.action.rst index 6e704caf..b29055ab 100644 --- a/doc/source/reference/lux.action.rst +++ b/doc/source/reference/lux.action.rst @@ -10,7 +10,7 @@ lux.action.column\_group module .. automodule:: lux.action.column_group :members: :undoc-members: - :show-inheritance: + lux.action.correlation module ----------------------------- @@ -18,7 +18,7 @@ lux.action.correlation module .. automodule:: lux.action.correlation :members: :undoc-members: - :show-inheritance: + lux.action.custom module ------------------------ @@ -26,7 +26,6 @@ lux.action.custom module .. automodule:: lux.action.custom :members: :undoc-members: - :show-inheritance: lux.action.enhance module ------------------------- @@ -34,7 +33,6 @@ lux.action.enhance module .. automodule:: lux.action.enhance :members: :undoc-members: - :show-inheritance: lux.action.filter module ------------------------ @@ -42,7 +40,6 @@ lux.action.filter module .. automodule:: lux.action.filter :members: :undoc-members: - :show-inheritance: lux.action.generalize module ---------------------------- @@ -50,7 +47,6 @@ lux.action.generalize module .. automodule:: lux.action.generalize :members: :undoc-members: - :show-inheritance: lux.action.row\_group module ---------------------------- @@ -58,7 +54,6 @@ lux.action.row\_group module .. automodule:: lux.action.row_group :members: :undoc-members: - :show-inheritance: lux.action.univariate module ---------------------------- @@ -66,7 +61,6 @@ lux.action.univariate module .. automodule:: lux.action.univariate :members: :undoc-members: - :show-inheritance: Module contents @@ -75,4 +69,3 @@ Module contents .. automodule:: lux.action :members: :undoc-members: - :show-inheritance: diff --git a/doc/source/reference/lux.core.rst b/doc/source/reference/lux.core.rst index 40e77fa9..f1d40000 100644 --- a/doc/source/reference/lux.core.rst +++ b/doc/source/reference/lux.core.rst @@ -9,7 +9,8 @@ lux.core.frame module .. automodule:: lux.core.frame :members: - :undoc-members: + :exclude-members: head, describe, info, tail + lux.core.series module @@ -17,11 +18,4 @@ lux.core.series module .. automodule:: lux.core.series :members: - :undoc-members: - -Module contents ---------------- -.. automodule:: lux.core - :members: - :undoc-members: diff --git a/doc/source/reference/lux.rst b/doc/source/reference/lux.rst index 472a6abd..52732221 100644 --- a/doc/source/reference/lux.rst +++ b/doc/source/reference/lux.rst @@ -22,4 +22,3 @@ Module contents .. automodule:: lux :members: :undoc-members: - :show-inheritance: diff --git a/lux/_config/config.py b/lux/_config/config.py index 21eace4f..59f50e6b 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -149,6 +149,9 @@ def is_callable(obj) -> bool: class Config: + """ + An object for global configurations applying to the entire notebook. + """ def __init__(self): self._default_display = "pandas" self.renderer = "altair" @@ -162,6 +165,12 @@ def __init__(self): @property def sampling_cap(self): + """ + Parameters + ---------- + sample_number : int + Cap on the number of rows to sample. Must be larger than _sampling_start + """ return self._sampling_cap @sampling_cap.setter @@ -183,6 +192,13 @@ def sampling_cap(self, sample_number: int) -> None: @property def sampling_start(self): + """ + Parameters + ---------- + sample_number : int + Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap + + """ return self._sampling_start @sampling_start.setter @@ -205,6 +221,12 @@ def sampling_start(self, sample_number: int) -> None: @property def sampling(self): + """ + Parameters + ---------- + sample_flag : bool + Whether or not sampling will occur. + """ return self._sampling_flag @sampling.setter @@ -225,6 +247,12 @@ def sampling(self, sample_flag: bool) -> None: @property def heatmap(self): + """ + Parameters + ---------- + heatmap_flag : bool + Whether or not a heatmap will be used instead of a scatter plot. + """ return self._heatmap_flag @heatmap.setter @@ -245,6 +273,13 @@ def heatmap(self, heatmap_flag: bool) -> None: @property def default_display(self): + """ + Set the widget display to show Pandas by default or Lux by default + Parameters + ---------- + type : str + Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) + """ return self._default_display @default_display.setter @@ -267,6 +302,13 @@ def default_display(self, type: str) -> None: ) def set_SQL_connection(self, connection): + """ + Sets SQL connection to a database + + Parameters: + connection : SQLAlchemy connectable, str, or sqlite3 connection + For more information, `see here `__ + """ self.SQLconnection = connection def set_executor_type(self, exe): @@ -287,9 +329,6 @@ def set_executor_type(self, exe): self.executor = PandasExecutor() - def set_SQL_connection(self, connection): - self.SQLconnection = connection - config = Config() diff --git a/lux/core/frame.py b/lux/core/frame.py index e4ed9e3e..e983879f 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -98,7 +98,7 @@ def f(*args, **kwargs): return f @property - def history(self): + def history(self): return self._history def maintain_metadata(self): @@ -112,6 +112,10 @@ def maintain_metadata(self): self._metadata_fresh = True def expire_recs(self): + """ + Expires recommendations + + """ self._recs_fresh = False self._recommendation = {} self.current_vis = None @@ -120,6 +124,9 @@ def expire_recs(self): self._sampled = None def expire_metadata(self): + """ + Expire all saved metadata to trigger a recomputation the next time the data is required. + """ # Set metadata as null self._metadata_fresh = False self.data_type = None @@ -166,6 +173,19 @@ def _infer_structure(self): @property def intent(self): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/intent` + """ return self._intent @intent.setter @@ -187,19 +207,6 @@ def clear_intent(self): self.expire_recs() def set_intent(self, intent: List[Union[str, Clause]]): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/clause` - """ self.expire_recs() self._intent = intent self._parse_validate_compile_intent() @@ -225,13 +232,13 @@ def copy_intent(self): return output def set_intent_as_vis(self, vis: Vis): - """ - Set intent of the dataframe as the Vis - Parameters - ---------- - vis : Vis - """ + # Set intent of the dataframe as the Vis + + # Parameters + # ---------- + # vis : Vis + self.expire_recs() self._intent = vis._inferred_intent self._parse_validate_compile_intent() @@ -488,7 +495,9 @@ def exported(self) -> Union[Dict[str, VisList], VisList]: ----- Convert the _selectedVisIdxs dictionary into a programmable VisList Example _selectedVisIdxs : - {'Correlation': [0, 2], 'Occurrence': [1]} + + {'Correlation': [0, 2], 'Occurrence': [1]} + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. Returns @@ -680,27 +689,38 @@ def render_widget(self, renderer: str = "altair", input_current_vis=""): Generate a LuxWidget based on the LuxDataFrame Structure of widgetJSON: - { - 'current_vis': {}, - 'recommendation': [ - { - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - ] - }, - ... repeat for other actions - ] + + { + + 'current_vis': {}, + 'recommendation': [ + + { + + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + + ] + + }, + ... repeat for other actions + + ] + } + Parameters ---------- renderer : str, optional Choice of visualization rendering library, by default "altair" input_current_vis : lux.LuxDataFrame, optional User-specified current vis to override default Current Vis, by default + """ check_import_lux_widget() import luxwidget @@ -862,7 +882,7 @@ def save_as_html(self, filename: str = "export.html") -> None: print(f"Saved HTML to {filename}") # Overridden Pandas Functions - def head(self, n: int = 5): + def head(self, n: int = 5): self._prev = self self._history.append_event("head", n=5) return super(LuxDataFrame, self).head(n) diff --git a/lux/core/series.py b/lux/core/series.py index aea13d0c..96056926 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -19,6 +19,9 @@ class LuxSeries(pd.Series): + """ + A subclass of pd.Series that supports all series operations while housing other variables and functions for generating visual recommendations. + """ _metadata = [ "_intent", "data_type", @@ -55,11 +58,13 @@ def f(*args, **kwargs): return f def to_pandas(self): + """ Convert Lux Series to Pandas Series """ import lux.core return lux.core.originalSeries(self, copy=False) def display_pandas(self): + """ Display Lux Series as Pandas Series""" return self.to_pandas() def __repr__(self): From 74eeb2d8821b36c204aea8a9ff723f9bd7432083 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Wed, 6 Jan 2021 22:05:22 -0800 Subject: [PATCH 37/39] black reformat --- doc/source/reference/lux._config.config.rst | 5 ++--- lux/_config/config.py | 5 +++-- lux/core/frame.py | 12 ++++++------ lux/core/series.py | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/doc/source/reference/lux._config.config.rst b/doc/source/reference/lux._config.config.rst index dc3875bc..73f977f0 100644 --- a/doc/source/reference/lux._config.config.rst +++ b/doc/source/reference/lux._config.config.rst @@ -1,10 +1,9 @@ -lux._config.config package -========================== +lux.config.config package +=================================== lux._config.config.Config module --------------------------------- .. automodule:: lux._config.config.config :members: - :no-undoc-members: diff --git a/lux/_config/config.py b/lux/_config/config.py index 59f50e6b..f6431f1d 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -151,7 +151,8 @@ def is_callable(obj) -> bool: class Config: """ An object for global configurations applying to the entire notebook. - """ + """ + def __init__(self): self._default_display = "pandas" self.renderer = "altair" @@ -308,7 +309,7 @@ def set_SQL_connection(self, connection): Parameters: connection : SQLAlchemy connectable, str, or sqlite3 connection For more information, `see here `__ - """ + """ self.SQLconnection = connection def set_executor_type(self, exe): diff --git a/lux/core/frame.py b/lux/core/frame.py index e983879f..a87bf89e 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -98,7 +98,7 @@ def f(*args, **kwargs): return f @property - def history(self): + def history(self): return self._history def maintain_metadata(self): @@ -115,7 +115,7 @@ def expire_recs(self): """ Expires recommendations - """ + """ self._recs_fresh = False self._recommendation = {} self.current_vis = None @@ -126,7 +126,7 @@ def expire_recs(self): def expire_metadata(self): """ Expire all saved metadata to trigger a recomputation the next time the data is required. - """ + """ # Set metadata as null self._metadata_fresh = False self.data_type = None @@ -690,7 +690,7 @@ def render_widget(self, renderer: str = "altair", input_current_vis=""): Structure of widgetJSON: - { + { 'current_vis': {}, 'recommendation': [ @@ -720,7 +720,7 @@ def render_widget(self, renderer: str = "altair", input_current_vis=""): Choice of visualization rendering library, by default "altair" input_current_vis : lux.LuxDataFrame, optional User-specified current vis to override default Current Vis, by default - + """ check_import_lux_widget() import luxwidget @@ -882,7 +882,7 @@ def save_as_html(self, filename: str = "export.html") -> None: print(f"Saved HTML to {filename}") # Overridden Pandas Functions - def head(self, n: int = 5): + def head(self, n: int = 5): self._prev = self self._history.append_event("head", n=5) return super(LuxDataFrame, self).head(n) diff --git a/lux/core/series.py b/lux/core/series.py index 96056926..ff364e44 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -22,6 +22,7 @@ class LuxSeries(pd.Series): """ A subclass of pd.Series that supports all series operations while housing other variables and functions for generating visual recommendations. """ + _metadata = [ "_intent", "data_type", From edbb595c1ae2e17a96aa3c069df586f5c8cf587d Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Thu, 7 Jan 2021 15:23:26 +0800 Subject: [PATCH 38/39] minor fixes --- doc/source/advanced/interestingness.rst | 2 +- doc/source/getting_started/overview.rst | 4 +-- doc/source/guide/FAQ.rst | 19 +++++------ doc/source/reference/config.rst | 34 +++++++++---------- .../gen/lux._config.config.Config.rst | 2 +- .../gen/lux.core.frame.LuxDataFrame.rst | 2 +- .../gen/lux.core.series.LuxSeries.rst | 2 +- .../gen/lux.executor.Executor.Executor.rst | 2 +- ...executor.PandasExecutor.PandasExecutor.rst | 2 +- .../lux.executor.SQLExecutor.SQLExecutor.rst | 2 +- doc/source/reference/gen/lux.vis.Vis.Vis.rst | 2 +- lux/_config/config.py | 22 ++++++------ lux/core/frame.py | 17 +++++----- lux/core/series.py | 18 +++++----- 14 files changed, 64 insertions(+), 66 deletions(-) diff --git a/doc/source/advanced/interestingness.rst b/doc/source/advanced/interestingness.rst index 482b1771..8581a8ea 100644 --- a/doc/source/advanced/interestingness.rst +++ b/doc/source/advanced/interestingness.rst @@ -30,7 +30,7 @@ Bar charts without filters: Unevenness A chart is scored higher if it is more uneven, indicating high variation in the individual bar values in the chart. The score is computed based -on the difference between the value of the bar chart .. math::`V` and the flat uniform distribution .. math::`V_{flat}`. +on the difference between the value of the bar chart :math:`V` and the flat uniform distribution :math:`V_{flat}`. The difference is captured via the Euclidean distance (L2 norm). diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst index 29833f59..aa66ccf0 100644 --- a/doc/source/getting_started/overview.rst +++ b/doc/source/getting_started/overview.rst @@ -25,7 +25,7 @@ Lux preserves the Pandas dataframe semantics -- which means that you can apply a df = pd.read_csv("lux/data/college.csv") -To see the suggested visualizations, simply print the DataFrame. You should see the default Pandas table display with an additional toggle button. +To visualized your dataframe in Lux, simply print out the dataframe. You should see the default Pandas table display with an additional toggle button. .. code-block:: python @@ -36,7 +36,7 @@ To see the suggested visualizations, simply print the DataFrame. You should see :align: center :alt: click on toggle, scroll on Correlation -By clicking on the Toggle button, you can now explore the data visually through Lux. You should see several categories of visualizations recommended to you by pressing on the tabs. +By clicking on the Toggle button, you can now explore the data visually through Lux. You should see several categories of visualizations recommended to you by browsing through the different tabs. .. image:: ../../../../lux-resources/doc_img/overview-2.gif :width: 700 diff --git a/doc/source/guide/FAQ.rst b/doc/source/guide/FAQ.rst index 1d13a081..c99d29ed 100644 --- a/doc/source/guide/FAQ.rst +++ b/doc/source/guide/FAQ.rst @@ -20,30 +20,30 @@ What do I do with date-related attributes in my dataset? How do I access all of the current recommendations shown in my widget? """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - The recommendations for Lux can be accessed via the :code:`recommendation` property of the dataframe (e.g., df.recommendation). + The recommendations for Lux can be accessed via the :code:`recommendation` property of the dataframe (e.g., :code:`df.recommendation`). How do I set the Lux widgets to show up on default? """""""""""""""""""""""""""""""""""""""""""""""""""""""" - By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The `default_display` property allows users to change the setting so that the Lux widget is set as the default view for future operations on the specified dataframe: + By default, we show the Pandas display and users can use the toggle button to switch to the Lux display. The :code:`default_display` property allows users to change the setting so that the Lux widget is set as the default view for future operations: .. code-block:: python - df.config.default_display = "lux" + lux.config.default_display = "lux" To switch back to Pandas as the default display: .. code-block:: python - df.config.default_display = "pandas" + lux.config.default_display = "pandas" I want to change the opacity of my chart, add title, change chart font size, etc. How do I modify chart settings? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - To add custom plot settings to the recommendations, you can set the global :code:`plot_config` property. See `this tutorial `__ on how to configure chart properties. Lux currently only support chart modifications in Altair. + To add custom plot settings to the recommendations, you can set the :code:`lux.config.plot_config` property. See `this tutorial `__ on how to configure chart properties. Lux currently only support chart modifications in Altair. How do I change aggregation functions, binning, or axis channels to non-default values? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" To change the aggregation function to be something that is not average or set an attribute to display on the x-axis instead of y-axis, you can override the default values in the :code:`lux.Clause` specification. - To override automatically inferred properties, you can specify additional arguements inside `lux.Clause` to set the value of the Clause properties. See `this page `__ for more details. + To override automatically inferred properties, you can specify additional arguements inside :py:class:`lux.vis.Clause` to set the value of the Clause properties. See `this page `__ for more details. I want to look at the default recommendations that were recommended to me, how can I get the dataframe to display those? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -66,7 +66,7 @@ How do I turn off Lux? How do I disable sampling and have Lux visualize the full dataset? """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" - Lux displays a warning saying "Large dataframe detected: Lux is only visualizing a random sample". If you would like to disable sampling, you can run: + When visualizing large datasets, Lux may display a warning stating "`Large dataframe detected: Lux is only visualizing a random sample`". If you would like to disable sampling, you can run: .. code-block:: python @@ -96,9 +96,10 @@ The Lux Jupyter widget does not show up when I print a dataframe. - Validating: OK - If you are able to import lux successfully and you do not see the "Toggle button" when you print the dataframe, it may be possible that Lux is not compatible with your browser. Lux is compatible with Google Chrome, but have not been extensively tested on Safari or Firefox. - - If you recieve the error message :code:`A Jupyter widget could not be displayed because the widget state could not be found.` This could happen if the kernel storing the widget is no longer available, or if the widget state was not saved in the notebook. You may be able to create the widget by running the appropriate cells.`, you may want to restart the notebook and rerun the cell. + - If you recieve the error message :code:`A Jupyter widget could not be displayed because the widget state could not be found.` This could happen if the kernel storing the widget is no longer available, or if the widget state was not saved in the notebook. You may be able to create the widget by running the particular cell again. If this doesn't work, then you may want try restarting the notebook and rerun the cell. - If you receive the error message :code:`ModuleNotFoundError: No module named 'luxwidget'`, it is possible that your luxwidget and lux-api versions are not in sync. The latest version of lux-api requires luxwidget v0.1 or above. Try running the following code: - If you receive the error message :code:`PermissionError: [Errno 13] Permission denied.` during the execution of the command :code:`jupyter nbextension install --py luxwidget`, then you can add the flag :code:`--user` (:code:`jupyter nbextension enable --py --user luxwidget`). + - Alternatively, if none of the above works. You can try creating a fresh virtual environment and follow the `quick install instructions `_. .. code-block:: bash @@ -112,8 +113,6 @@ The Lux Jupyter widget does not show up when I print a dataframe. jupyter nbextension install --py luxwidget jupyter nbextension enable --py luxwidget - - Alternatively, you can also try creating a fresh virtual environment and follow the `quick install instructions `_. I'm not able to export my visualizations via the :code:`exported` property. diff --git a/doc/source/reference/config.rst b/doc/source/reference/config.rst index a6b8ed9e..7b85b687 100644 --- a/doc/source/reference/config.rst +++ b/doc/source/reference/config.rst @@ -2,23 +2,22 @@ Configuration Settings *********************** -In Lux, users can customize various global settings to configure the behavior of Lux through :py:mod:`lux.config.Config`. This page documents some of the configurations that you can apply in Lux. +In Lux, users can customize various global settings to configure the behavior of Lux through :py:class:`lux.config.Config`. This page documents some of the configurations that you can apply in Lux. Change the default display of Lux ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We can set the `default_display` of the global class 'Config' to change the default form of output. In the following block, we set it to 'lux,' therefore the VisList will display first. +We can set the :code:`default_display` to change whether the Pandas table or Lux widget is displayed by default. In the following block, we set the default display to 'lux', therefore the Lux widget will display first. .. code-block:: python - lux.config.default_display = "lux" # Set Lux as default display + lux.config.default_display = "lux" df .. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/display-1.png?raw=true :width: 700 :align: center - :alt: Retrieves a single attribute from Lux's Action Manager using its defined id. We can set the default_display back to 'pandas,' which would allow for the dataframe object to display first. You can still toggle to Lux/Pandas respectively using the 'Toggle' button. @@ -30,7 +29,6 @@ We can set the default_display back to 'pandas,' which would allow for the dataf .. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/display-2.png?raw=true :width: 700 :align: center - :alt: Retrieves a single attribute from Lux's Action Manager using its defined id. If you try to set the default_display to anything other than 'lux' or 'pandas,' a warning will be shown, and the display will default to the previous setting. @@ -42,7 +40,6 @@ If you try to set the default_display to anything other than 'lux' or 'pandas,' .. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/display-3.png?raw=true :width: 700 :align: center - :alt: Retrieves a single attribute from Lux's Action Manager using its defined id. Change the sampling parameters of Lux ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -77,36 +74,37 @@ We can disable this feature and revert back to using a scatter plot by running t Default Renderer ~~~~~~~~~~~~~~~~~ -For now, all graphs will be rendered using `altair`. We are working on creating support for `matplotlib` and other plotting libraries. +Charts in Lux are rendered using `Altair `__. We are working on supporting plotting via `matplotlib `__ and other plotting libraries. -To set the default renderer, run the following code block: +To change the default renderer, run the following code block: -.. code-block::python +.. code-block:: python - lux.config.renderer = "altair" + lux.config.renderer = "matplotlib" Plot Configurations ~~~~~~~~~~~~~~~~~~~ Altair supports plot configurations to be applied on top of the generated graphs. To set a default plot configuration, first write a function that can take in a `chart` and returns a `chart`. For example: -.. code-block::python +.. code-block:: python + def change_color_add_title(chart): - chart = chart.configure_mark(color="green") # change mark color to green - chart.title = "Custom Title" # add title to chart - return chart + chart = chart.configure_mark(color="green") # change mark color to green + chart.title = "Custom Title" # add title to chart + return chart Then, set the `plot_config` to this function so that this function is applied to every plot generated. -.. code-block::python +.. code-block:: python lux.config.plot_config = change_color_add_title The above results in the following changes: -.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/plotconfig-1.png?raw=true - :width: 700 +.. image:: https://github.com/lux-org/lux-resources/blob/master/doc_img/style-2.png?raw=true + :width: 600 :align: center - :alt: Retrieves a single attribute from Lux's Action Manager using its defined id. +See `this page `__ for more details. diff --git a/doc/source/reference/gen/lux._config.config.Config.rst b/doc/source/reference/gen/lux._config.config.Config.rst index 3b7637e2..0000b36f 100644 --- a/doc/source/reference/gen/lux._config.config.Config.rst +++ b/doc/source/reference/gen/lux._config.config.Config.rst @@ -1,4 +1,4 @@ -lux.\_config.config.Config +lux.\_config.config.Config ========================== .. currentmodule:: lux._config.config diff --git a/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst b/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst index 05c1bea1..600daf83 100644 --- a/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst +++ b/doc/source/reference/gen/lux.core.frame.LuxDataFrame.rst @@ -1,4 +1,4 @@ -lux.core.frame.LuxDataFrame +lux.core.frame.LuxDataFrame =========================== .. currentmodule:: lux.core.frame diff --git a/doc/source/reference/gen/lux.core.series.LuxSeries.rst b/doc/source/reference/gen/lux.core.series.LuxSeries.rst index f44a2548..0f50d3e4 100644 --- a/doc/source/reference/gen/lux.core.series.LuxSeries.rst +++ b/doc/source/reference/gen/lux.core.series.LuxSeries.rst @@ -1,4 +1,4 @@ -lux.core.series.LuxSeries +lux.core.series.LuxSeries ========================= .. currentmodule:: lux.core.series diff --git a/doc/source/reference/gen/lux.executor.Executor.Executor.rst b/doc/source/reference/gen/lux.executor.Executor.Executor.rst index ea67caa3..c45473c6 100644 --- a/doc/source/reference/gen/lux.executor.Executor.Executor.rst +++ b/doc/source/reference/gen/lux.executor.Executor.Executor.rst @@ -1,4 +1,4 @@ -lux.executor.Executor.Executor +lux.executor.Executor.Executor ============================== .. currentmodule:: lux.executor.Executor diff --git a/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst b/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst index e99f2254..a65b633e 100644 --- a/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst +++ b/doc/source/reference/gen/lux.executor.PandasExecutor.PandasExecutor.rst @@ -1,4 +1,4 @@ -lux.executor.PandasExecutor.PandasExecutor +lux.executor.PandasExecutor.PandasExecutor ========================================== .. currentmodule:: lux.executor.PandasExecutor diff --git a/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst b/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst index a809a280..f5ddf2ec 100644 --- a/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst +++ b/doc/source/reference/gen/lux.executor.SQLExecutor.SQLExecutor.rst @@ -1,4 +1,4 @@ -lux.executor.SQLExecutor.SQLExecutor +lux.executor.SQLExecutor.SQLExecutor ==================================== .. currentmodule:: lux.executor.SQLExecutor diff --git a/doc/source/reference/gen/lux.vis.Vis.Vis.rst b/doc/source/reference/gen/lux.vis.Vis.Vis.rst index 3785b548..dc79967d 100644 --- a/doc/source/reference/gen/lux.vis.Vis.Vis.rst +++ b/doc/source/reference/gen/lux.vis.Vis.Vis.rst @@ -1,4 +1,4 @@ -lux.vis.Vis.Vis +lux.vis.Vis.Vis =============== .. currentmodule:: lux.vis.Vis diff --git a/lux/_config/config.py b/lux/_config/config.py index f6431f1d..7ee84d58 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -150,7 +150,7 @@ def is_callable(obj) -> bool: class Config: """ - An object for global configurations applying to the entire notebook. + Class for Lux configurations applied globally across entire session """ def __init__(self): @@ -170,7 +170,7 @@ def sampling_cap(self): Parameters ---------- sample_number : int - Cap on the number of rows to sample. Must be larger than _sampling_start + Cap on the number of rows to sample. Must be larger than _sampling_start """ return self._sampling_cap @@ -180,7 +180,7 @@ def sampling_cap(self, sample_number: int) -> None: Parameters ---------- sample_number : int - Cap on the number of rows to sample. Must be larger than _sampling_start + Cap on the number of rows to sample. Must be larger than _sampling_start """ if type(sample_number) == int: assert sample_number >= self._sampling_start @@ -197,7 +197,7 @@ def sampling_start(self): Parameters ---------- sample_number : int - Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap + Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap """ return self._sampling_start @@ -208,7 +208,7 @@ def sampling_start(self, sample_number: int) -> None: Parameters ---------- sample_number : int - Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap + Number of rows required to begin sampling. Must be smaller or equal to _sampling_cap """ if type(sample_number) == int: @@ -226,7 +226,7 @@ def sampling(self): Parameters ---------- sample_flag : bool - Whether or not sampling will occur. + Whether or not sampling will occur. """ return self._sampling_flag @@ -236,7 +236,7 @@ def sampling(self, sample_flag: bool) -> None: Parameters ---------- sample_flag : bool - Whether or not sampling will occur. + Whether or not sampling will occur. """ if type(sample_flag) == bool: self._sampling_flag = sample_flag @@ -252,7 +252,7 @@ def heatmap(self): Parameters ---------- heatmap_flag : bool - Whether or not a heatmap will be used instead of a scatter plot. + Whether or not a heatmap will be used instead of a scatter plot. """ return self._heatmap_flag @@ -262,7 +262,7 @@ def heatmap(self, heatmap_flag: bool) -> None: Parameters ---------- heatmap_flag : bool - Whether or not a heatmap will be used instead of a scatter plot. + Whether or not a heatmap will be used instead of a scatter plot. """ if type(heatmap_flag) == bool: self._heatmap_flag = heatmap_flag @@ -279,7 +279,7 @@ def default_display(self): Parameters ---------- type : str - Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) + Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) """ return self._default_display @@ -290,7 +290,7 @@ def default_display(self, type: str) -> None: Parameters ---------- type : str - Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) + Default display type, can take either the string `lux` or `pandas` (regardless of capitalization) """ if type.lower() == "lux": self._default_display = "lux" diff --git a/lux/core/frame.py b/lux/core/frame.py index a87bf89e..376fa6a7 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -113,8 +113,7 @@ def maintain_metadata(self): def expire_recs(self): """ - Expires recommendations - + Expires and resets all recommendations """ self._recs_fresh = False self._recommendation = {} @@ -127,7 +126,6 @@ def expire_metadata(self): """ Expire all saved metadata to trigger a recomputation the next time the data is required. """ - # Set metadata as null self._metadata_fresh = False self.data_type = None self.unique_values = None @@ -232,13 +230,14 @@ def copy_intent(self): return output def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe based on the intent of a Vis - # Set intent of the dataframe as the Vis - - # Parameters - # ---------- - # vis : Vis - + Parameters + ---------- + vis : Vis + Input Vis object + """ self.expire_recs() self._intent = vis._inferred_intent self._parse_validate_compile_intent() diff --git a/lux/core/series.py b/lux/core/series.py index ff364e44..dd4c4b4e 100644 --- a/lux/core/series.py +++ b/lux/core/series.py @@ -20,7 +20,7 @@ class LuxSeries(pd.Series): """ - A subclass of pd.Series that supports all series operations while housing other variables and functions for generating visual recommendations. + A subclass of pd.Series that supports all 1-D Series operations """ _metadata = [ @@ -58,16 +58,18 @@ def f(*args, **kwargs): f._get_axis_number = super(LuxSeries, self)._get_axis_number return f - def to_pandas(self): - """ Convert Lux Series to Pandas Series """ + def to_pandas(self) -> pd.Series: + """ + Convert Lux Series to Pandas Series + + Returns + ------- + pd.Series + """ import lux.core return lux.core.originalSeries(self, copy=False) - def display_pandas(self): - """ Display Lux Series as Pandas Series""" - return self.to_pandas() - def __repr__(self): from IPython.display import display from IPython.display import clear_output @@ -160,5 +162,5 @@ def on_button_clicked(b): stacklevel=2, ) warnings.warn(traceback.format_exc()) - display(self.display_pandas()) + display(self.to_pandas()) return "" From 8e2bd3164873b843111991baf7ee90c8d3ed409e Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Wed, 6 Jan 2021 23:33:00 -0800 Subject: [PATCH 39/39] minor fix --- doc/source/getting_started/overview.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst index aa66ccf0..cb072921 100644 --- a/doc/source/getting_started/overview.rst +++ b/doc/source/getting_started/overview.rst @@ -25,7 +25,7 @@ Lux preserves the Pandas dataframe semantics -- which means that you can apply a df = pd.read_csv("lux/data/college.csv") -To visualized your dataframe in Lux, simply print out the dataframe. You should see the default Pandas table display with an additional toggle button. +To visualize your dataframe in Lux, simply print out the dataframe. You should see the default Pandas table display with an additional toggle button. .. code-block:: python