Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing druid's regex filters #501

Merged
merged 3 commits into from
May 23, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 29 additions & 26 deletions caravel/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,29 +642,18 @@ class QueryForm(OmgWtForm):
collapsed_fieldsets = HiddenField()
viz_type = self.field_dict.get('viz_type')

filter_cols = viz.datasource.filterable_column_names or ['']
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1',
default=filter_cols[0],
choices=self.choicify(filter_cols)))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1',
default='in',
choices=self.choicify(['in', 'not in'])))
setattr(
QueryForm, 'flt_eq_' + str(i),
TextField("Super", default=''))

for field in viz.flat_form_fields():
setattr(QueryForm, field, self.field_dict[field])

def add_to_form(attrs):
for attr in attrs:
setattr(QueryForm, attr, self.field_dict[attr])

filter_choices = self.choicify(['in', 'not in'])
# datasource type specific form elements
if viz.datasource.__class__.__name__ == 'SqlaTable':
datasource_classname = viz.datasource.__class__.__name__
time_fields = None
if datasource_classname == 'SqlaTable':
QueryForm.fieldsets += ({
'label': 'SQL',
'fields': ['where', 'having'],
Expand All @@ -675,8 +664,6 @@ def add_to_form(attrs):
add_to_form(('where', 'having'))
grains = viz.datasource.database.grains()

if not viz.datasource.any_dttm_col:
return QueryForm
if grains:
time_fields = ('granularity_sqla', 'time_grain_sqla')
self.field_dict['time_grain_sqla'] = SelectField(
Expand All @@ -695,19 +682,35 @@ def add_to_form(attrs):
else:
time_fields = 'granularity_sqla'
add_to_form((time_fields, ))
else:
elif datasource_classname == 'DruidDatasource':
time_fields = ('granularity', 'druid_time_origin')
add_to_form(('granularity', 'druid_time_origin'))
field_css_classes['granularity'] = ['form-control', 'select2_freeform']
field_css_classes['druid_time_origin'] = ['form-control', 'select2_freeform']
filter_choices = self.choicify(['in', 'not in', 'regex'])
add_to_form(('since', 'until'))

QueryForm.fieldsets = ({
'label': 'Time',
'fields': (
time_fields,
('since', 'until'),
),
'description': "Time related form attributes",
},) + tuple(QueryForm.fieldsets)
filter_cols = viz.datasource.filterable_column_names or ['']
for i in range(10):
setattr(QueryForm, 'flt_col_' + str(i), SelectField(
'Filter 1',
default=filter_cols[0],
choices=self.choicify(filter_cols)))
setattr(QueryForm, 'flt_op_' + str(i), SelectField(
'Filter 1',
default='in',
choices=filter_choices))
setattr(
QueryForm, 'flt_eq_' + str(i),
TextField("Super", default=''))

if time_fields:
QueryForm.fieldsets = ({
'label': 'Time',
'fields': (
time_fields,
('since', 'until'),
),
'description': "Time related form attributes",
},) + tuple(QueryForm.fieldsets)
return QueryForm
8 changes: 6 additions & 2 deletions caravel/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,8 @@ def recursive_get_fields(_conf):
cond = Dimension(col) == eq
if op == 'not in':
cond = ~cond
elif op == 'regex':
cond = Filter(type="regex", pattern=eq, dimension=col)
if filters:
filters = Filter(type="and", fields=[
Filter.build_filter(cond),
Expand Down Expand Up @@ -1201,7 +1203,8 @@ def recursive_get_fields(_conf):
}
client.groupby(**pre_qry)
query_str += "// Two phase query\n// Phase 1\n"
query_str += json.dumps(client.query_dict, indent=2) + "\n"
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2) + "\n"
query_str += "//\nPhase 2 (built based on phase one's results)\n"
df = client.export_pandas()
if df is not None and not df.empty:
Expand Down Expand Up @@ -1237,7 +1240,8 @@ def recursive_get_fields(_conf):
}],
}
client.groupby(**qry)
query_str += json.dumps(client.query_dict, indent=2)
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2)
df = client.export_pandas()
if df is None or df.size == 0:
raise Exception(_("No data was returned."))
Expand Down
17 changes: 0 additions & 17 deletions caravel/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from flask.ext.babelpkg import gettext as _
from flask_appbuilder.models.sqla.filters import BaseFilter

from pydruid.client import doublesum
from sqlalchemy import create_engine, select, text
from sqlalchemy.sql.expression import TextAsFrom
from werkzeug.routing import BaseConverter
Expand Down Expand Up @@ -904,22 +903,6 @@ def refresh_datasources(self):
session.commit()
return redirect("/druiddatasourcemodelview/list/")

@expose("/autocomplete/<datasource>/<column>/")
def autocomplete(self, datasource, column):
"""used for filter autocomplete"""
client = utils.get_pydruid_client()
top = client.topn(
datasource=datasource,
granularity='all',
intervals='2013-10-04/2020-10-10',
aggregations={"count": doublesum("count")},
dimension=column,
metric='count',
threshold=1000,
)
values = sorted([d[column] for d in top[0]['result']])
return json.dumps(values)

@app.errorhandler(500)
def show_traceback(self):
if config.get("SHOW_STACKTRACE"):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
'markdown>=2.6.2, <3.0.0',
'pandas==0.18.0',
'parsedatetime==2.0.0',
'pydruid>=0.2.2, <0.3',
'pydruid>=0.2.3, <0.4',
'python-dateutil>=2.4.2, <3.0.0',
'requests>=2.7.0, <3.0.0',
'sqlalchemy>=1.0.12, <2.0.0',
Expand Down
1 change: 1 addition & 0 deletions tests/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ def test_client(self, PyDruid):
df = pd.DataFrame(nres)
instance.export_pandas.return_value = df
instance.query_dict = {}
instance.query_builder.last_query.query_dict = {}
resp = self.client.get('/caravel/explore/druid/1/?viz_type=table&granularity=one+day&druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&include_search=false&metrics=count&groupby=name&flt_col_0=dim1&flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&action=&datasource_name=test_datasource&datasource_id=1&datasource_type=druid&previous_viz_type=table&json=true&force=true')
print('-'*300)
print(resp.data.decode('utf-8'))
Expand Down