lux-org · dorisjlee · Dec 26, 2020 · Dec 22, 2020 · Dec 23, 2020 · Dec 25, 2020
diff --git a/lux/action/filter.py b/lux/action/filter.py
@@ -18,6 +18,7 @@
 from lux.vis.VisList import VisList
 from lux.processor.Compiler import Compiler
 from lux.utils import utils
+from lux.utils.utils import get_filter_specs
 
 
 def filter(ldf):
@@ -112,9 +113,28 @@ def get_complementary_ops(fltr_op):
                 new_spec.append(new_filter)
                 temp_vis = Vis(new_spec)
                 output.append(temp_vis)
+    if (
+        ldf.current_vis is not None
+        and len(ldf.current_vis) == 1
+        and ldf.current_vis[0].mark == "line"
+        and len(get_filter_specs(ldf.intent)) > 0
+    ):
+        recommendation = {
+            "action": "Similarity",
+            "description": "Show other charts that are visually similar to the Current vis.",
+        }
+        last = get_filter_specs(ldf.intent)[-1]
+        output = ldf.intent.copy()[0:-1]
+        # array of possible values for attribute
+        arr = ldf[last.attribute].unique().tolist()
+        output.append(lux.Clause(last.attribute, last.attribute, arr))
     vlist = lux.vis.VisList.VisList(output, ldf)
-    for vis in vlist:
-        vis.score = interestingness(vis, ldf)
+    vlist_copy = lux.vis.VisList.VisList(output, ldf)
+    for i in range(len(vlist_copy)):
+        vlist[i].score = interestingness(vlist_copy[i], ldf)
     vlist = vlist.topK(15)
-    recommendation["collection"] = vlist
+    if recommendation["action"] == "Similarity":
+        recommendation["collection"] = vlist[1:]
+    else:
+        recommendation["collection"] = vlist
     return recommendation
diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py
@@ -22,6 +22,9 @@
 from pandas.api.types import is_datetime64_any_dtype as is_datetime
 from scipy.spatial.distance import euclidean
 import lux
+from lux.utils.utils import get_filter_specs
+from lux.interestingness.similarity import preprocess, euclidean_dist
+from lux.vis.VisList import VisList
 
 
 def interestingness(vis: Vis, ldf: LuxDataFrame) -> int:
@@ -68,6 +71,22 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int:
     dimension_lst = vis.get_attr_by_data_model("dimension")
     measure_lst = vis.get_attr_by_data_model("measure")
     v_size = len(vis.data)
+
+    if (
+        n_dim == 1
+        and (n_msr == 0 or n_msr == 1)
+        and ldf.current_vis is not None
+        and vis.get_attr_by_channel("y")[0].data_type == "quantitative"
+        and len(ldf.current_vis) == 1
+        and ldf.current_vis[0].mark == "line"
+        and len(get_filter_specs(ldf.intent)) > 0
+    ):
+        query_vc = VisList(ldf.current_vis, ldf)
+        query_vis = query_vc[0]
+        preprocess(query_vis)
+        preprocess(vis)
+        return 1 - euclidean_dist(query_vis, vis)
+
     # Line/Bar Chart
     # print("r:", n_record, "m:", n_msr, "d:",n_dim)
     if n_dim == 1 and (n_msr == 0 or n_msr == 1):

diff --git a/lux/action/similarity.py → lux/interestingness/similarity.py b/lux/action/similarity.py → lux/interestingness/similarity.py
@@ -17,70 +17,7 @@
 import math
 import numpy as np
 from lux.vis.VisList import VisList
-
-
-def similar_pattern(ldf, intent, topK=-1):
-    """
-    Generates visualizations with similar patterns to a query visualization.
-
-    Parameters
-    ----------
-    ldf : lux.core.frame
-        LuxDataFrame with underspecified intent.
-
-    intent: list[lux.Clause]
-        intent for specifying the visual query for the similarity search.
-
-    topK: int
-        number of visual recommendations to return.
-
-    Returns
-    -------
-    recommendations : Dict[str,obj]
-        object with a collection of visualizations that result from the Similarity action
-    """
-    row_specs = list(filter(lambda x: x.value != "", intent))
-    if len(row_specs) == 1:
-        search_space_vc = VisList(ldf.current_vis.collection.copy(), ldf)
-
-        query_vc = VisList(intent, ldf)
-        query_vis = query_vc[0]
-        preprocess(query_vis)
-        # for loop to create assign euclidean distance
-        recommendation = {
-            "action": "Similarity",
-            "description": "Show other charts that are visually similar to the Current vis.",
-        }
-        for vis in search_space_vc:
-            preprocess(vis)
-            vis.score = euclidean_dist(query_vis, vis)
-        search_space_vc.normalize_score(invert_order=True)
-        if topK != -1:
-            search_space_vc = search_space_vc.topK(topK)
-        recommendation["collection"] = search_space_vc
-        return recommendation
-    else:
-        print("Query needs to have 1 row value")
-
-
-def aggregate(vis):
-    """
-    Aggregates data values on the y axis so that the vis is a time series
-
-    Parameters
-    ----------
-    vis : lux.vis.Vis
-        vis that represents the candidate visualization
-    Returns
-    -------
-    None
-    """
-    if vis.get_attr_by_channel("x") and vis.get_attr_by_channel("y"):
-
-        xAxis = vis.get_attr_by_channel("x")[0].attribute
-        yAxis = vis.get_attr_by_channel("y")[0].attribute
-
-        vis.data = vis.data[[xAxis, yAxis]].groupby(xAxis, as_index=False).agg({yAxis: "mean"}).copy()
+from lux.utils.utils import get_filter_specs
 
 
 def interpolate(vis, length):
@@ -204,6 +141,4 @@ def preprocess(vis):
     -------
     None
     """
-    aggregate(vis)
-    interpolate(vis, 100)
     normalize(vis)
diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py
@@ -18,6 +18,7 @@
 from lux.core.frame import LuxDataFrame
 from lux.vis.VisList import VisList
 from lux.utils import date_utils
+from lux.utils import utils
 import pandas as pd
 import numpy as np
 import warnings
@@ -179,12 +180,25 @@ def populate_data_type_model(ldf, vlist):
                         else:
                             chart_title = clause.value
                         vis.title = f"{clause.attribute} {clause.filter_op} {chart_title}"
+            vis._ndim = 0
+            vis._nmsr = 0
+
+            for clause in vis._inferred_intent:
+                if clause.value == "":
+                    if clause.data_model == "dimension":
+                        vis._ndim += 1
+                    elif clause.data_model == "measure" and clause.attribute != "Record":
+                        vis._nmsr += 1
 
     @staticmethod
     def remove_all_invalid(vis_collection: VisList) -> VisList:
         """
         Given an expanded vis list, remove all visualizations that are invalid.
-        Currently, the invalid visualizations are ones that contain two of the same attribute, no more than two temporal attributes, or overlapping attributes (same filter attribute and visualized attribute).
+        Currently, the invalid visualizations are ones that do not contain:
+        - two of the same attribute,
+        - more than two temporal attributes,
+        - no overlapping attributes (same filter attribute and visualized attribute),
+        - more than 1 temporal attribute with 2 or more measures
         Parameters
         ----------
         vis_collection : list[lux.vis.Vis]
@@ -203,7 +217,11 @@ def remove_all_invalid(vis_collection: VisList) -> VisList:
                 if clause.data_type == "temporal":
                     num_temporal_specs += 1
             all_distinct_specs = 0 == len(vis._inferred_intent) - len(attribute_set)
-            if num_temporal_specs < 2 and all_distinct_specs:
+            if (
+                num_temporal_specs < 2
+                and all_distinct_specs
+                and not (vis._nmsr == 2 and num_temporal_specs == 1)
+            ):
                 new_vc.append(vis)
             # else:
             # 	warnings.warn("\nThere is more than one duplicate attribute specified in the intent.\nPlease check your intent specification again.")
@@ -235,17 +253,11 @@ def determine_encoding(ldf: LuxDataFrame, vis: Vis):
         https://doi.org/10.1109/TVCG.2007.70594
         """
         # Count number of measures and dimensions
-        ndim = 0
-        nmsr = 0
-        filters = []
-        for clause in vis._inferred_intent:
-            if clause.value == "":
-                if clause.data_model == "dimension":
-                    ndim += 1
-                elif clause.data_model == "measure" and clause.attribute != "Record":
-                    nmsr += 1
-            else:  # preserve to add back to _inferred_intent later
-                filters.append(clause)
+        ndim = vis._ndim
+        nmsr = vis._nmsr
+        # preserve to add back to _inferred_intent later
+        filters = utils.get_filter_specs(vis._inferred_intent)
+
         # Helper function (TODO: Move this into utils)
         def line_or_bar(ldf, dimension: Clause, measure: Clause):
             dim_type = dimension.data_type

diff --git a/tests/test_action.py b/tests/test_action.py
@@ -197,3 +197,48 @@ def test_year_filter_value(global_var):
         "T00:00:00.000000000" not in vis.to_Altair()
     ), "Year filter title contains extraneous string, not displayed as summarized string"
     df.clear_intent()
+
+
+def test_similarity(global_var):
+    df = pytest.car_df
+    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
+    df.set_intent(
+        [
+            lux.Clause("Year", channel="x"),
+            lux.Clause("Displacement", channel="y"),
+            lux.Clause("Origin=USA"),
+        ]
+    )
+    df._repr_html_()
+    assert len(df.recommendation["Similarity"]) == 2
+    ranked_list = df.recommendation["Similarity"]
+
+    japan_vis = list(
+        filter(lambda vis: vis.get_attr_by_attr_name("Origin")[0].value == "Japan", ranked_list)
+    )[0]
+    europe_vis = list(
+        filter(lambda vis: vis.get_attr_by_attr_name("Origin")[0].value == "Europe", ranked_list)
+    )[0]
+    assert japan_vis.score > europe_vis.score
+    df.clear_intent()
+
+
+def test_similarity2():
+    df = pd.read_csv(
+        "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/real_estate_tutorial.csv"
+    )
+
+    df["Month"] = pd.to_datetime(df["Month"], format="%m")
+    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
+
+    df.intent = [lux.Clause("Year"), lux.Clause("PctForeclosured"), lux.Clause("City=Crofton")]
+
+    ranked_list = df.recommendation["Similarity"]
+
+    morrisville_vis = list(
+        filter(lambda vis: vis.get_attr_by_attr_name("City")[0].value == "Morrisville", ranked_list)
+    )[0]
+    watertown_vis = list(
+        filter(lambda vis: vis.get_attr_by_attr_name("City")[0].value == "Watertown", ranked_list)
+    )[0]
+    assert morrisville_vis.score > watertown_vis.score
diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py
@@ -226,7 +226,6 @@ def test_interestingness_0_2_0(global_var):
     assert interestingness(df.recommendation["Enhance"][0], df) != None
     rank1 = -1
     rank2 = -1
-    rank3 = -1
     for f in range(0, len(df.recommendation["Enhance"])):
         if (
             str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Origin"
@@ -238,12 +237,7 @@ def test_interestingness_0_2_0(global_var):
             and str(df.recommendation["Enhance"][f].mark) == "scatter"
         ):
             rank2 = f
-        if (
-            str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Year"
-            and str(df.recommendation["Enhance"][f].mark) == "scatter"
-        ):
-            rank3 = f
-    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
+    assert rank1 < rank2
 
     # check that top recommended filter graph score is not none and that ordering makes intuitive sense
     assert interestingness(df.recommendation["Filter"][0], df) != None