From b43b95d3371cc2e9beaa3c7a2d756806bf483469 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Fri, 13 Sep 2013 13:58:52 -0700 Subject: [PATCH 01/10] Making this into a python project --- CHANGES.txt | 1 + MANIFEST.in | 2 ++ README.md | 26 ++++++++++++++++++++++---- setup.py | 18 ++++++++++++++++++ test/__init__.py | 0 5 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 CHANGES.txt create mode 100644 MANIFEST.in create mode 100644 setup.py create mode 100644 test/__init__.py diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 00000000..ca13fd38 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1 @@ +v0.01, 9/11/2013 -- Initial Release diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..2985d188 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.txt +recursive-include docs *.txt diff --git a/README.md b/README.md index a26d9e76..7294b4b8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,24 @@ -pydruid -======= +=========== +pyDruid +=========== -A Python connector for Druid +pyDruid provides a python interface to the Druid analytic store. Typical usage +often looks like this:: -This is a side project that needs some love! + #!/usr/bin/env python + + from pyDruid import * + + # Druid Config + endpoint = 'druid/v2/?pretty' + demo_bard_url = 'http://localhost:8083' + dataSource = 'wikipedia' + intervals = ["2013-01-01/p1y"] + + query = pyDruid(demo_bard_url, endpoint) + + counts = query.timeseries(dataSource = dataSource, + granularity = "minute", + intervals = intervals, + aggregations = {"count" : doubleSum("edits")} + ) diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..e6bbff1b --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +from distutils.core import setup + +setup( + name='pyDruid', + version='0.01', + author='Deep Ganguli', + author_email='deep@metamarkets.com', + packages=['pydruid'], + url='http://pypi.python.org/pypi/pyDruid/', + license='LICENSE', + description='Druid analytical data-store Python library', + long_description=open('README.md').read(), + install_requires=[ + "pandas >= 0.12", + "simplejson" >= "3.3.0", + "matplotlib" >= "1.3.0" + ], +) \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b From 0e6ee149483561e4a2fc86c594356436e417bc45 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 30 Sep 2013 16:38:30 -0700 Subject: [PATCH 02/10] Made the module into a package, some re-org --- MANIFEST | 10 + README | 24 +++ README.md | 11 +- build/lib/pydruid/__init__.py | 0 build/lib/pydruid/client.py | 222 ++++++++++++++++++++++ build/lib/pydruid/utils/__init__.py | 0 build/lib/pydruid/utils/aggregators.py | 37 ++++ build/lib/pydruid/utils/filters.py | 58 ++++++ build/lib/pydruid/utils/postaggregator.py | 56 ++++++ build/lib/pydruid/utils/query_utils.py | 43 +++++ dist/pyDruid-0.01.tar.gz | Bin 0 -> 2980 bytes dist/pyDruid-0.1.0.tar.gz | Bin 0 -> 4474 bytes pyDruid/__init__.py | 0 pyDruid/client.py | 222 ++++++++++++++++++++++ pyDruid/pyDruidUtils/__init__.py | 0 pyDruid/pyDruidUtils/aggregators.py | 37 ++++ pyDruid/pyDruidUtils/filters.py | 58 ++++++ pyDruid/pyDruidUtils/postaggregator.py | 56 ++++++ pyDruid/pyDruidUtils/query_utils.py | 43 +++++ pyDruid/test/__init__.py | 0 pyDruid/utils/__init__.py | 0 pyDruid/utils/aggregators.py | 37 ++++ pyDruid/utils/filters.py | 58 ++++++ pyDruid/utils/postaggregator.py | 56 ++++++ pyDruid/utils/query_utils.py | 43 +++++ setup.py | 8 +- 26 files changed, 1071 insertions(+), 8 deletions(-) create mode 100644 MANIFEST create mode 100644 README create mode 100644 build/lib/pydruid/__init__.py create mode 100644 build/lib/pydruid/client.py create mode 100644 build/lib/pydruid/utils/__init__.py create mode 100644 build/lib/pydruid/utils/aggregators.py create mode 100644 build/lib/pydruid/utils/filters.py create mode 100644 build/lib/pydruid/utils/postaggregator.py create mode 100644 build/lib/pydruid/utils/query_utils.py create mode 100644 dist/pyDruid-0.01.tar.gz create mode 100644 dist/pyDruid-0.1.0.tar.gz create mode 100644 pyDruid/__init__.py create mode 100755 pyDruid/client.py create mode 100644 pyDruid/pyDruidUtils/__init__.py create mode 100644 pyDruid/pyDruidUtils/aggregators.py create mode 100644 pyDruid/pyDruidUtils/filters.py create mode 100644 pyDruid/pyDruidUtils/postaggregator.py create mode 100644 pyDruid/pyDruidUtils/query_utils.py create mode 100644 pyDruid/test/__init__.py create mode 100644 pyDruid/utils/__init__.py create mode 100644 pyDruid/utils/aggregators.py create mode 100644 pyDruid/utils/filters.py create mode 100644 pyDruid/utils/postaggregator.py create mode 100644 pyDruid/utils/query_utils.py diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 00000000..f547254f --- /dev/null +++ b/MANIFEST @@ -0,0 +1,10 @@ +# file GENERATED by distutils, do NOT edit +CHANGES.txt +setup.py +pydruid/__init__.py +pydruid/client.py +pydruid/utils/__init__.py +pydruid/utils/aggregators.py +pydruid/utils/filters.py +pydruid/utils/postaggregator.py +pydruid/utils/query_utils.py diff --git a/README b/README new file mode 100644 index 00000000..7294b4b8 --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +=========== +pyDruid +=========== + +pyDruid provides a python interface to the Druid analytic store. Typical usage +often looks like this:: + + #!/usr/bin/env python + + from pyDruid import * + + # Druid Config + endpoint = 'druid/v2/?pretty' + demo_bard_url = 'http://localhost:8083' + dataSource = 'wikipedia' + intervals = ["2013-01-01/p1y"] + + query = pyDruid(demo_bard_url, endpoint) + + counts = query.timeseries(dataSource = dataSource, + granularity = "minute", + intervals = intervals, + aggregations = {"count" : doubleSum("edits")} + ) diff --git a/README.md b/README.md index 7294b4b8..d67fe340 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ often looks like this:: #!/usr/bin/env python - from pyDruid import * + from pydruid.client import * # Druid Config endpoint = 'druid/v2/?pretty' @@ -19,6 +19,9 @@ often looks like this:: counts = query.timeseries(dataSource = dataSource, granularity = "minute", - intervals = intervals, - aggregations = {"count" : doubleSum("edits")} - ) + intervals = intervals, + aggregations = {"count" : doubleSum("edits")} + ) + + >>> print counts + [{'timestamp': '2013-09-30T23:31:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:32:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:33:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:34:00.000Z', 'result': {'count': 0.0}}] diff --git a/build/lib/pydruid/__init__.py b/build/lib/pydruid/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/build/lib/pydruid/client.py b/build/lib/pydruid/client.py new file mode 100644 index 00000000..3a2bc840 --- /dev/null +++ b/build/lib/pydruid/client.py @@ -0,0 +1,222 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import division +import urllib2 +import simplejson as json +import csv +import re +import os +import sys +import pandas +import dateutil.parser +from matplotlib import * +from matplotlib.pyplot import * +from utils.aggregators import * +from utils.postaggregator import * +from utils.filters import * +from utils.query_utils import * + + +class pyDruid: + + def __init__(self,url,endpoint): + self.url = url + self.endpoint = endpoint + self.result = None + self.result_json = None + self.query_type = None + + # serializes a dict representation of the query into a json + # object, and sends it to bard via post, and gets back the + # json representation of the query result + def post(self,query): + querystr = json.dumps(query) + url = self.url + '/' + self.endpoint + headers = {'Content-Type' : 'application/json'} + req = urllib2.Request(url, querystr, headers) + res = urllib2.urlopen(req) + data = res.read() + self.result_json = data; + self.querystr = querystr + res.close() + + # de-serializes the data returned from druid into a + # list of dicts + def parse(self): + if self.result_json: + res = json.loads(self.result_json) + return res + else: + print("Empty query") + return None + + def export_tsv(self,dest_path): + + f = open(dest_path,'wb') + tsv_file = csv.writer(f, delimiter = '\t') + + if(self.query_type == "timeseries"): + header = self.result[0]['result'].keys() + header.append('timestamp') + elif(self.query_type == "groupby"): + header = self.result[0]['event'].keys() + header.append('timestamp') + header.append('version') + + tsv_file.writerow(header) + + # use unicodewriter to encode results in unicode + w = query_utils.UnicodeWriter(f) + + if self.result: + if self.query_type == "timeseries": + for item in self.result: + timestamp = item['timestamp'] + result = item['result'] + + if type(result) is list: + for line in result: + w.writerow(line.values() + [timestamp]) + else: #timeseries + w.writerow(result.values() + [timestamp]) + elif self.query_type == "groupby": + for item in self.result: + timestamp = item['timestamp'] + version = item['version'] + w.writerow(item['event'].values() + [timestamp] + [version]) + + f.close() + + # Exports a JSON query object into a Pandas data-frame + def export_pandas(self): + if self.result: + if self.query_type == "timeseries": + nres = [v['result'].items() + [('timestamp',v['timestamp'])] for v in self.result] + nres = [dict(v) for v in nres] + else: + print('not implemented yet') + return None + + df = pandas.DataFrame(nres) + df['timestamp'] = df['timestamp'].map(lambda x: dateutil.parser.parse(x)) + df['t'] = dates.date2num(df['timestamp']) + return df + + # implements a timeseries query + def timeseries(self, **args): + + query_dict = {"queryType" : "timeseries"} + valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', 'intervals'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + elif key == "aggregations" : + query_dict[key] = build_aggregators(val) + elif key == "postAggregations": + query_dict[key] = build_post_aggregators(val) + elif key == "filter": + query_dict[key] = val.filter['filter'] + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + self.query_type = "timeseries" + return self.result + + # Implements a groupBy query + def groupBy(self, **args): + + query_dict = {"queryType" : "groupBy"} + valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', + 'intervals', 'dimensions'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + elif key == "aggregations" : + query_dict[key] = build_aggregators(val) + elif key == "postAggregations": + query_dict[key] = build_post_aggregators(val) + elif key == "filter": + query_dict[key] = val.filter['filter'] + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + self.query_type = "groupby" + return self.parse() + + # Implements a segmentMetadata query. This query type is for pulling the tsv-equivalent + # size and cardinality broken out by dimension in a specified data source. + def segmentMetadata(self, **args): + + query_dict = {"queryType" : "segmentMetadata"} + valid_parts = ['dataSource', 'intervals'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + return self.parse() + + # implements a time boundary query + def timeBoundary(self, **args): + + query_dict = {"queryType" : "timeBoundary"} + valid_parts = ['dataSource'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + else: + query_dict[key] = val + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + return self.parse() + + # prints internal variables of the object + def describe(self): + print("url: " + self.url) diff --git a/build/lib/pydruid/utils/__init__.py b/build/lib/pydruid/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/build/lib/pydruid/utils/aggregators.py b/build/lib/pydruid/utils/aggregators.py new file mode 100644 index 00000000..efd54f42 --- /dev/null +++ b/build/lib/pydruid/utils/aggregators.py @@ -0,0 +1,37 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#create a json list of user-defined aggregations + +# Define the support aggregation types + +def longSum(raw_metric): + return {"type" : "longSum", "fieldName" : raw_metric} + +def doubleSum(raw_metric): + return {"type" : "doubleSum", "fieldName" : raw_metric} + +def min(raw_metric): + return {"type" : "min", "fieldName" : raw_metric} + +def max(raw_metric): + return {"type" : "max", "fieldName" : raw_metric} + +def count(raw_metric): + return {"type" : "count", "fieldName" : raw_metric} + +def build_aggregators(agg_input): + return [dict([('name',k)] + v.items()) for (k,v) in agg_input.iteritems()] \ No newline at end of file diff --git a/build/lib/pydruid/utils/filters.py b/build/lib/pydruid/utils/filters.py new file mode 100644 index 00000000..562cf78d --- /dev/null +++ b/build/lib/pydruid/utils/filters.py @@ -0,0 +1,58 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +class Filter: + + def __init__(self, **args): + + # constrct a selector + if 'type' not in args.keys(): + self.filter = {"filter": {"type": "selector", + "dimension" : args["dimension"], + "value" : args["value"]}} + # construct an and filter + elif args["type"] == "and": + self.filter = {"filter": {"type" : "and", + "fields" : args["fields"]}} + # construct an or filter + elif args["type"] == "or": + self.filter = {"filter": {"type" : "or", + "fields" : args["fields"]}} + # construct a not filter + elif args["type"] == "not": + self.filter = {"filter" : {"type" : "not", + "field" : args["field"]}} + else: + print("you've slain me. nevermind the teeth and the fingernails. the show must go on.") + + def show(self): + print(json.dumps(self.filter, indent = 4)) + + def __and__(self, x): + return Filter(type = "and", fields = [self.filter['filter'], x.filter['filter']]) + + def __or__(self,x): + return Filter(type = "or", fields = [self.filter['filter'], x.filter['filter']]) + + def __invert__(self): + return Filter(type = "not", field = self.filter['filter']) + +class Dimension: + + def __init__(self, dim): + self.dimension = dim + + def __eq__(self,other): + return Filter(dimension = self.dimension, value = other) \ No newline at end of file diff --git a/build/lib/pydruid/utils/postaggregator.py b/build/lib/pydruid/utils/postaggregator.py new file mode 100644 index 00000000..0d647850 --- /dev/null +++ b/build/lib/pydruid/utils/postaggregator.py @@ -0,0 +1,56 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pprint as pp +import re + +# determine whether arguments are strings or numeric constants +def is_numeric(field): + for obj in [int, float, long]: + try: + obj(field) + return True + except: + pass + + return False + +# specify field type as source field name (string) or constant and build json +def field_spec(field): + if is_numeric(field): + return {"type" : "constant", + "name" : "constant_%d" % int(field), + "value" : field} + else: + return {"type" : "fieldAccess", + "fieldName" : field} + +# use regex to extract fields and operand from user-submitted postAgg string +def parse_calc(postAggOpString): + fields = [] + rgx = "(.+)[ ]?([*+/-])[ ]?(.+)" + postAggOp = re.findall(rgx, postAggOpString).pop() + + if postAggOp: + operation = postAggOp[0] + fn = postAggOp[1] + fields.append(field_spec(postAggOp[0])) + fields.append(field_spec(postAggOp[2])) + base = {"type" : "arithmetic", + "fn" : fn, + "fields" : fields} + return base + else: + raise Exception("Not a valid postAggregation operation") \ No newline at end of file diff --git a/build/lib/pydruid/utils/query_utils.py b/build/lib/pydruid/utils/query_utils.py new file mode 100644 index 00000000..4524e432 --- /dev/null +++ b/build/lib/pydruid/utils/query_utils.py @@ -0,0 +1,43 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import csv, codecs, cStringIO + +# A special CSV writer which will write rows to TSV file "f", which is encoded in utf-8. +# this is necessary because the values in druid are not all ASCII. +class UnicodeWriter: + + def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds): #delimiter="\t" + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") if isinstance(s, unicode) else s for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) \ No newline at end of file diff --git a/dist/pyDruid-0.01.tar.gz b/dist/pyDruid-0.01.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..09fcbe27766443938353a1992c7ae756a35c8b32 GIT binary patch literal 2980 zcmV;V3tRLbiwFpmelt=6|72-%bT4puL~?a$WGyf*FflH4VR8WN8*6jhHqu|sUxAZ9 zkjhEa!p&A=uhTKVyXLpq-Z8%hHd^g=YiDg3z;zIJ@Zv@h!{-3?vAG|m`@zn?QrS-qNvva%tVNC14 zwcYG=wn6`$oxR-+(k%7=)nSH;uaSQ>TCGMK9FNrNq#tv|X-JM)$f#t_MhV|7toZ+% zS1;=Q!Skc95dZJ+e=xh9wfMiYx3^RJ|6{;wrf5JFt^dM=_j zLL+fbvM8cr-X(gF(3pmE#m5jYG##lD!N{Arx=3OY@{s^s=VcPHdP=82^g^k$+i0Zo zl>4d1@)I%D_YFH?!`aVZK)@7?rQ`*Tr&-AD!#ay754$9Qp`Rk)9+z~or)RB85mOK^yVe2w}2O*rewBo)bw2TT%5%n}HX6%!LWW<(`K zU9dIb5G@s-oWd$mQkeumxnl23kF{cH=*(U`oy170iG|$#6tP zFw8^<=)|?k4ii}PMIu%AhvpC4YcGM(oFth5(ZK{){F0|E;B*y9i+4st2{>mJ%v1Gd z3;s3I*1U3l^OAngn3w~;6@a(GU2UdkMDlPp#^ohIWe9x=%TXVP0?czIJ>eSdR# zITbX{LMpgIb}A8%GsP+e{7rz>H7>5NVbSSSuqj1@M#PU5&1i*yLnPTKWZ<1%1q7&M zrTVG(uDK|z_Dx9DBoR@UydAt69K9QqdH(O7|MU3!m+${N+wF4y_t)v){z86!1pT)< z&CagYf2-5(?CpRETYKBBa{u=zu;pw)(og4tPcIbFJHXe=qKdo_NtVKfaO^vXY3r+i zWO2ZRE_C)&I))!RQX{rlByGP*JVdD2p-S~fpfFD|645z{69rX}DRsz%hm5cfW0opN zeUN*jG~_gf412}Zg`UO|0XE35mP9gA6tF4ar*JnZ;t{Ge!w9wrxw^XYsb*FeoS{LK z4O=SKfnoj1+c;!W62X4YxPYaO=7gre&zM4bB_X{ciGVaL7>q%&2OL~MYUlB^MkG{n zS5z>d5^$*mA3>%s3M=Oh)?7pYanYDm_D@Lvq(YwVpY%^^K;vEi^ySgpQ}S;A_;`PC z+CMxYN5|yZ(cqwe+CLh=-E*=(_?5is4-TN90a3v;>;sf$z&|j_QH(4=nVm4U!T=I@1b_39v7F_XAXR&n$i<8E#bpW{(#0K=Wh_%X z^iwLJY%_dCRHb2}VCH2pyfy$453kn7Z8&@j1f`!BV2O}-p`}pmE>Z8o1mlNd?@nU6 z1|8yp3TTcq4k;+wVs;&8Ghh>tK99K?4n4`jNexTgMWx$yHaGF9Z_0D{n?ESj`1xf% z1jdwwI50?Jw)$|WRn{Y$#UaD1nx`y($JyK>SUb~@|4;9k0zO84qMfqX*i-Jlrf7%h z6Ts7!052z__iU_cXoNs04nPPBT@Q)cHU&lj z2UJG`uX?j05$(qnMVf`@mu4D&943+h4U})d>IL1RSoK`6JH&Jvm_2A0R>~*dZ=An-wh#YdyK0nbn>(&(B7M(SY%aA{EiPm_ zU@-7pO$+jQs@Vf3Zk}?Ad44p%Z$35y!+rR4>!UMM8<@rCqOw9tt~`US2e))_ki~qQ z1k50y$FmsktxtHAC;L4y?qBYtoC&AKk75jkbv?6DUPE>pBfs zO}=p4y#v-+K@{f(Wf4iuc&%bxF(+0W|63 z-*%iu-Wttfgbya0TG5NupstsaHySq@Eu8o13_zYdp<*hHTJx%4s0$f9gwwZ}&+#Cx zs7%b{V2nHf;}H5h@yuPG8M-y%7ETB8)>MOdRXpj#dn``&*PboT2~8M5Z9-_S5qN}3 zMV8TOgteU90s%*FzyPJdoYA_V93t;8=s<@;B*Jq)Ha}rB!WXE)x4wb%`WDSGk)WqTf%{=;~Hd@Qy9+&vLb{dDxBbIIESkwXx-m&YfbeDQG-hwJyhX zo;Q2xyc&FYdiq8y6LyKFjM7T(A6X5*rXlKuS;ZP{HJ92d6u#@6)i~?}^~PbRTCEn# zt<~Lw;Z%EYbuqfKS17to!D$O*wDb0h&a2bS(|OTz?W50dTl?nM^;t)s#GCW@g8Kq) z0Lzc{Zo72m(wVGqbLb(Rk4_(hQ^#PxX!y zb`o_wtpj-@_xij6wxjw$U`$28W3Wxw^NmDu$zc1RDLm9$_?vdF{3Hy-rl*(M*?kjJ z)_D7j*s@090Ws#gOYNnf-0BF|3Q{skGAN_Pe7z=pYM-x}p!1NDG=KL#{Flg@fhfhna9!Ejt9h@9+P|hx-Sw z55G$P_Z|IztyZg(fB*07?R2pJud`cz|9uoF_Pgc1aK&fN7w&nxt=G-Dy|>M|y_e0o zrFZTAp0z*Jt9ECP+C9B#&L8SUbN)>4*+Y8H?(Q}FoF216Z&?{mzwP)hp`J*;RM*eL z|NqC{c6-tP1$X-Ye?UqoGwB%wbDyQYG2tM)T{&1MKNo-iERmLgcvq6@cTK9`Vx z_ej%kwJWvd$g(S@g2R<s5teN$2BD=9^f*YWTE?~#N+_X(5=tncgc3?9p@b4j aD4~QBN+_X(5=!{H!v6u#M={|5Pyhf^8rXOM literal 0 HcmV?d00001 diff --git a/dist/pyDruid-0.1.0.tar.gz b/dist/pyDruid-0.1.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5f176dfe4be946898246e4b681737f904f9bb01d GIT binary patch literal 4474 zcmV-=5rys_iwFp<(MeJQ|72-%bT4puL~?a$WGyf*F)lDJbYXG;?L2>T<2J7QZ~Q6v zv~we=brjo)lU{Cecj<1n&FeMU*<{E{D1w;SKFs&ug*Wb{eRE=2eliw?EiSMx8KeGKcajq3Lfx+Z+|DU zJWS$4271st<4bYKly3B@DpHc(MY&w^Y<mu~QU=X6bNHW1*uZ!y>W>GlH z0JisavJ~4XUjWwYqDT*ScG7hkdMQD9Nw%Q-9o6Gb@3@4D47v~^Wnb}lQAVM9xGk1E zj1E}X7rU)Spfndr znE~sdgWK>XOhpj#CJ?cA#iJZxt}I-rwg*G_+ewFOYqEJrKb0a|1H58@)1a3!0rf*SAH&+{m14%IQyfKjQtK3}@d!o_vLCWX2m!~He?EDRTaensl^z!ul4DMdBy_ zylS|vE>ob8S9jo*i?RqKFXb6{?UJq~FVZL}pyxUkYE^*6!>d+3V#+=3cqduDu|9?H zriR%F0(uT^{pRKrW=!e2uJ85yh=VYdl|JnCwgNF{dUKm}p!)>iNZQainf)mIqK}FOh~fa4P~h~!c#W+>h_ivcvU-7rX~z8<#D;B& z#P{_jha!20cqHGbju=K)Fdbnuh|3GgWtuy(9;ziPE3SUd>>V5aH@FI5*MbKqT?oo- z2%{mSFSa4x7dFIZwlX0|+Ya_&zk?QFW2B65Hh6Eq;s_v)qEk&$pDDC1!RH1%{3fZ0 z9l(N>7>PnHz<}s@;JBLwiD3U|2vQDQU&?MgKT2`|IFP@A*fwN~Y^A<3QQ(pp$TUMB zlMEzPY!rcgLn)vlEJG^xS)tV|TrT11=eXnGoIBt%K2CE-EECd4E!etRK zEwR@&4j7FdI3{QS>dIinM1oY7RMutEu}aj0JYz=otoH zvk)P?6_3DJI4;^zTE1dc@wRM<0k~N?|Ghtw5^Upjp;y z{oG7C-YERxPGa8fek-IxDw^#EMC8%P^P9UfQwrc_Og{-hxG6 zi$YHBmf?g|1`VTRg7*?w^a?rTAZAxqH|WImruoEM^3;j=au#rQci48+!ia0##G2F79_`?&Df zB`o}URhJ-DC79{_ko^#|pNH>k50lEga5R;=Z?`rpNE}WHbNs+oBCclp6@a1iXJr@# zQ^Wsu0PB5ZZRT0;$A{pLBbBmx5EZmhUj3C8YiovWY}0Fc(n`nb8(KTFHDMq)@>=V9 zTIXfFti8dnFE3w{Fd-6@ZWIam^jt~!TOOfYgc7Wj)}WS_;rO;G>xU5QQQ)xWx~}PN zHKPYfDa~NFs%^vy=Cm;;t$~In@2NR?X}S4wZO&Zv=vPcz)$;e9vpss*B44=}(-&X^ z7=BCJXR>Z}Zz|_vftPyEfqJ#dKp8`|GLwId%8N52ErA>Hf+9|c zB(^al2mD6rDc=C$C?y1Zo&{kHx(PAgEK6<#g#TrMiMwjP(d5$EE?^sVpVMUbq)lnj z_A6{l3&Rs^%%5&)@8-#a6XBji%4SIkZgjS8`=np0=PecNJ;f!}Z68y;Vf$gZ=%3?*0GAmcJ2C{U#N7Z2$LYcef?~ z;oe>+|8Dn+odp{yVu zN4gB-N6`Vy6A=Yw#4(vM}(;EUV z{>$BpugGp3_ImP6SbM*skg{7(u=gcl>Bb~}ZQle7sPdaQI zx}q$WG+UG))h~SXGDlf@iQVcqT0UH?>Lmg$huHO%@o4g7G_+r(Mj>w}@7~>GLWx|6 z>CJ|uzk;BbS+Y3|XCmLb#ewz9*nq1&wt6z{|q5h?jX}vyIB_f~mDvFvTOZl1t&SSgdeLgI>`a zH6g&Za&-qT_yQBjxT#KBhU?^(En#&8ivR}7v)p<hk~sQ>Bk3%Y0>n2OMi=!PTR?R0*`(2FlIcDl?-DibxAz#vzw_vP{E(u-zUi1j7&8Fff-7vxR z4nLR)oYYGgZ3i%2C1|7YG#;y^?lLRuk90`=Y{_kjw6X&` zZ9*@FtQVysu4NSgXQ@*P!uo?b|SD7~sYKlzc7 zY;lJkn&W-$Ua`r)oU6}1-`SqXYj|c6P=$()V}b7$2Rw=#XxnG)ZoD)}9jSa;niWXR zKpIyeovo_+YEXapWZtY98Z|TmeLTldBNLD7{u20+2|wexpjfAlEWsRJFGUghn^bI0 zVe~xi_udn!$*Xtm@3JU8!Y(&y8ST|Mp=k=Na;!6B3sP%8Y0Xi~?fkz7{6F)c)$jQJ zcXu@0Z@>Q??{?pR=={IV|NAukKR;jf8J^4ZbNH1j-RU_l;K#B@9xy_NcmjFY$Va>rykYK<%c|tUxqoKuo`wJW(=@D)_73fpouG*RKxzsQr z6B2ZmR31}CSCclVLYK!EFHTQAl}>4#Cw(}jLRl4#iR2>-^)}ir_{{1{%3!cZ77=D) zuKVUTzzyWjtJA?p)(?ecPLipnMy&`-gafMwo%F4<4$NyOd)*814bXm>Mi&!=if^TKtER9JPEQ}Pk6 z+NmiCQ6Kue%2kJs;;Ab=-jEzrUpSy*>VU9I4gr&g2T0n0VB#9AekBS}p|_W>w!hHa zCRb5?DgfB)d0x-_GM&zDQ7w*?RLNkt0>U2;xM31%bPQ<8K?SQJk2i0|^4lHYi%2)$ z$8Wdluj`;P^m_E;(8R^`z>%#~#+DJBI&6PrlLo1WoW9h0WSgIFZB$L^Bd$mGrjNKP zKycM=z#++xu51;S&}mC~=@GfMdiz>tfa)pVl~t&f?{{P?-%mXb>TksY=}-Q+twx%} zi>Zl%J|AMgIXQm$?TNPxY*&4kMS|1%6-86!XTmTm+i1!2_3WQV_{H6k5k*_Zr;?y{ z*W2|5X0`E&Bd8uyHPr&6I^)`vu5_g= 0.12", - "simplejson" >= "3.3.0", - "matplotlib" >= "1.3.0" + "simplejson >= 3.3.0", + "matplotlib >= 1.3.0" ], ) \ No newline at end of file From 978eeaad75eeed7e79070349730cba2623f6b828 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 30 Sep 2013 16:40:09 -0700 Subject: [PATCH 03/10] Massive cleanup --- README | 24 --- build/lib/pydruid/__init__.py | 0 build/lib/pydruid/client.py | 222 ---------------------- build/lib/pydruid/utils/__init__.py | 0 build/lib/pydruid/utils/aggregators.py | 37 ---- build/lib/pydruid/utils/filters.py | 58 ------ build/lib/pydruid/utils/postaggregator.py | 56 ------ build/lib/pydruid/utils/query_utils.py | 43 ----- dist/pyDruid-0.01.tar.gz | Bin 2980 -> 0 bytes dist/pyDruid-0.1.0.tar.gz | Bin 4474 -> 0 bytes pyDruid.py | 222 ---------------------- pyDruid/__init__.py | 0 pyDruid/client.py | 222 ---------------------- pyDruid/pyDruidUtils/__init__.py | 0 pyDruid/pyDruidUtils/aggregators.py | 37 ---- pyDruid/pyDruidUtils/filters.py | 58 ------ pyDruid/pyDruidUtils/postaggregator.py | 56 ------ pyDruid/pyDruidUtils/query_utils.py | 43 ----- pyDruid/test/__init__.py | 0 pyDruid/utils/__init__.py | 0 pyDruid/utils/aggregators.py | 37 ---- pyDruid/utils/filters.py | 58 ------ pyDruid/utils/postaggregator.py | 56 ------ pyDruid/utils/query_utils.py | 43 ----- pyDruidUtils/__init__.py | 0 pyDruidUtils/aggregators.py | 37 ---- pyDruidUtils/filters.py | 58 ------ pyDruidUtils/postaggregator.py | 56 ------ pyDruidUtils/query_utils.py | 43 ----- test/__init__.py | 0 30 files changed, 1466 deletions(-) delete mode 100644 README delete mode 100644 build/lib/pydruid/__init__.py delete mode 100644 build/lib/pydruid/client.py delete mode 100644 build/lib/pydruid/utils/__init__.py delete mode 100644 build/lib/pydruid/utils/aggregators.py delete mode 100644 build/lib/pydruid/utils/filters.py delete mode 100644 build/lib/pydruid/utils/postaggregator.py delete mode 100644 build/lib/pydruid/utils/query_utils.py delete mode 100644 dist/pyDruid-0.01.tar.gz delete mode 100644 dist/pyDruid-0.1.0.tar.gz delete mode 100755 pyDruid.py delete mode 100644 pyDruid/__init__.py delete mode 100755 pyDruid/client.py delete mode 100644 pyDruid/pyDruidUtils/__init__.py delete mode 100644 pyDruid/pyDruidUtils/aggregators.py delete mode 100644 pyDruid/pyDruidUtils/filters.py delete mode 100644 pyDruid/pyDruidUtils/postaggregator.py delete mode 100644 pyDruid/pyDruidUtils/query_utils.py delete mode 100644 pyDruid/test/__init__.py delete mode 100644 pyDruid/utils/__init__.py delete mode 100644 pyDruid/utils/aggregators.py delete mode 100644 pyDruid/utils/filters.py delete mode 100644 pyDruid/utils/postaggregator.py delete mode 100644 pyDruid/utils/query_utils.py delete mode 100644 pyDruidUtils/__init__.py delete mode 100644 pyDruidUtils/aggregators.py delete mode 100644 pyDruidUtils/filters.py delete mode 100644 pyDruidUtils/postaggregator.py delete mode 100644 pyDruidUtils/query_utils.py delete mode 100644 test/__init__.py diff --git a/README b/README deleted file mode 100644 index 7294b4b8..00000000 --- a/README +++ /dev/null @@ -1,24 +0,0 @@ -=========== -pyDruid -=========== - -pyDruid provides a python interface to the Druid analytic store. Typical usage -often looks like this:: - - #!/usr/bin/env python - - from pyDruid import * - - # Druid Config - endpoint = 'druid/v2/?pretty' - demo_bard_url = 'http://localhost:8083' - dataSource = 'wikipedia' - intervals = ["2013-01-01/p1y"] - - query = pyDruid(demo_bard_url, endpoint) - - counts = query.timeseries(dataSource = dataSource, - granularity = "minute", - intervals = intervals, - aggregations = {"count" : doubleSum("edits")} - ) diff --git a/build/lib/pydruid/__init__.py b/build/lib/pydruid/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/build/lib/pydruid/client.py b/build/lib/pydruid/client.py deleted file mode 100644 index 3a2bc840..00000000 --- a/build/lib/pydruid/client.py +++ /dev/null @@ -1,222 +0,0 @@ -# -# Copyright 2013 Metamarkets Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import division -import urllib2 -import simplejson as json -import csv -import re -import os -import sys -import pandas -import dateutil.parser -from matplotlib import * -from matplotlib.pyplot import * -from utils.aggregators import * -from utils.postaggregator import * -from utils.filters import * -from utils.query_utils import * - - -class pyDruid: - - def __init__(self,url,endpoint): - self.url = url - self.endpoint = endpoint - self.result = None - self.result_json = None - self.query_type = None - - # serializes a dict representation of the query into a json - # object, and sends it to bard via post, and gets back the - # json representation of the query result - def post(self,query): - querystr = json.dumps(query) - url = self.url + '/' + self.endpoint - headers = {'Content-Type' : 'application/json'} - req = urllib2.Request(url, querystr, headers) - res = urllib2.urlopen(req) - data = res.read() - self.result_json = data; - self.querystr = querystr - res.close() - - # de-serializes the data returned from druid into a - # list of dicts - def parse(self): - if self.result_json: - res = json.loads(self.result_json) - return res - else: - print("Empty query") - return None - - def export_tsv(self,dest_path): - - f = open(dest_path,'wb') - tsv_file = csv.writer(f, delimiter = '\t') - - if(self.query_type == "timeseries"): - header = self.result[0]['result'].keys() - header.append('timestamp') - elif(self.query_type == "groupby"): - header = self.result[0]['event'].keys() - header.append('timestamp') - header.append('version') - - tsv_file.writerow(header) - - # use unicodewriter to encode results in unicode - w = query_utils.UnicodeWriter(f) - - if self.result: - if self.query_type == "timeseries": - for item in self.result: - timestamp = item['timestamp'] - result = item['result'] - - if type(result) is list: - for line in result: - w.writerow(line.values() + [timestamp]) - else: #timeseries - w.writerow(result.values() + [timestamp]) - elif self.query_type == "groupby": - for item in self.result: - timestamp = item['timestamp'] - version = item['version'] - w.writerow(item['event'].values() + [timestamp] + [version]) - - f.close() - - # Exports a JSON query object into a Pandas data-frame - def export_pandas(self): - if self.result: - if self.query_type == "timeseries": - nres = [v['result'].items() + [('timestamp',v['timestamp'])] for v in self.result] - nres = [dict(v) for v in nres] - else: - print('not implemented yet') - return None - - df = pandas.DataFrame(nres) - df['timestamp'] = df['timestamp'].map(lambda x: dateutil.parser.parse(x)) - df['t'] = dates.date2num(df['timestamp']) - return df - - # implements a timeseries query - def timeseries(self, **args): - - query_dict = {"queryType" : "timeseries"} - valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', 'intervals'] - - for key, val in args.iteritems(): - if key not in valid_parts: - raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) - elif key == "aggregations" : - query_dict[key] = build_aggregators(val) - elif key == "postAggregations": - query_dict[key] = build_post_aggregators(val) - elif key == "filter": - query_dict[key] = val.filter['filter'] - else: - query_dict[key] = val - - self.query_dict = query_dict - - try: - self.post(query_dict) - except urllib2.HTTPError, e: - raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) - else: - self.result = self.parse() - self.query_type = "timeseries" - return self.result - - # Implements a groupBy query - def groupBy(self, **args): - - query_dict = {"queryType" : "groupBy"} - valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', - 'intervals', 'dimensions'] - - for key, val in args.iteritems(): - if key not in valid_parts: - raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) - elif key == "aggregations" : - query_dict[key] = build_aggregators(val) - elif key == "postAggregations": - query_dict[key] = build_post_aggregators(val) - elif key == "filter": - query_dict[key] = val.filter['filter'] - else: - query_dict[key] = val - - self.query_dict = query_dict - - try: - self.post(query_dict) - except urllib2.HTTPError, e: - raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) - else: - self.result = self.parse() - self.query_type = "groupby" - return self.parse() - - # Implements a segmentMetadata query. This query type is for pulling the tsv-equivalent - # size and cardinality broken out by dimension in a specified data source. - def segmentMetadata(self, **args): - - query_dict = {"queryType" : "segmentMetadata"} - valid_parts = ['dataSource', 'intervals'] - - for key, val in args.iteritems(): - if key not in valid_parts: - raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) - else: - query_dict[key] = val - - self.query_dict = query_dict - - try: - self.post(query_dict) - except urllib2.HTTPError, e: - raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) - else: - self.result = self.parse() - return self.parse() - - # implements a time boundary query - def timeBoundary(self, **args): - - query_dict = {"queryType" : "timeBoundary"} - valid_parts = ['dataSource'] - - for key, val in args.iteritems(): - if key not in valid_parts: - raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) - else: - query_dict[key] = val - - try: - self.post(query_dict) - except urllib2.HTTPError, e: - raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) - else: - self.result = self.parse() - return self.parse() - - # prints internal variables of the object - def describe(self): - print("url: " + self.url) diff --git a/build/lib/pydruid/utils/__init__.py b/build/lib/pydruid/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/build/lib/pydruid/utils/aggregators.py b/build/lib/pydruid/utils/aggregators.py deleted file mode 100644 index efd54f42..00000000 --- a/build/lib/pydruid/utils/aggregators.py +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright 2013 Metamarkets Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -#create a json list of user-defined aggregations - -# Define the support aggregation types - -def longSum(raw_metric): - return {"type" : "longSum", "fieldName" : raw_metric} - -def doubleSum(raw_metric): - return {"type" : "doubleSum", "fieldName" : raw_metric} - -def min(raw_metric): - return {"type" : "min", "fieldName" : raw_metric} - -def max(raw_metric): - return {"type" : "max", "fieldName" : raw_metric} - -def count(raw_metric): - return {"type" : "count", "fieldName" : raw_metric} - -def build_aggregators(agg_input): - return [dict([('name',k)] + v.items()) for (k,v) in agg_input.iteritems()] \ No newline at end of file diff --git a/build/lib/pydruid/utils/filters.py b/build/lib/pydruid/utils/filters.py deleted file mode 100644 index 562cf78d..00000000 --- a/build/lib/pydruid/utils/filters.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright 2013 Metamarkets Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -class Filter: - - def __init__(self, **args): - - # constrct a selector - if 'type' not in args.keys(): - self.filter = {"filter": {"type": "selector", - "dimension" : args["dimension"], - "value" : args["value"]}} - # construct an and filter - elif args["type"] == "and": - self.filter = {"filter": {"type" : "and", - "fields" : args["fields"]}} - # construct an or filter - elif args["type"] == "or": - self.filter = {"filter": {"type" : "or", - "fields" : args["fields"]}} - # construct a not filter - elif args["type"] == "not": - self.filter = {"filter" : {"type" : "not", - "field" : args["field"]}} - else: - print("you've slain me. nevermind the teeth and the fingernails. the show must go on.") - - def show(self): - print(json.dumps(self.filter, indent = 4)) - - def __and__(self, x): - return Filter(type = "and", fields = [self.filter['filter'], x.filter['filter']]) - - def __or__(self,x): - return Filter(type = "or", fields = [self.filter['filter'], x.filter['filter']]) - - def __invert__(self): - return Filter(type = "not", field = self.filter['filter']) - -class Dimension: - - def __init__(self, dim): - self.dimension = dim - - def __eq__(self,other): - return Filter(dimension = self.dimension, value = other) \ No newline at end of file diff --git a/build/lib/pydruid/utils/postaggregator.py b/build/lib/pydruid/utils/postaggregator.py deleted file mode 100644 index 0d647850..00000000 --- a/build/lib/pydruid/utils/postaggregator.py +++ /dev/null @@ -1,56 +0,0 @@ -# -# Copyright 2013 Metamarkets Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pprint as pp -import re - -# determine whether arguments are strings or numeric constants -def is_numeric(field): - for obj in [int, float, long]: - try: - obj(field) - return True - except: - pass - - return False - -# specify field type as source field name (string) or constant and build json -def field_spec(field): - if is_numeric(field): - return {"type" : "constant", - "name" : "constant_%d" % int(field), - "value" : field} - else: - return {"type" : "fieldAccess", - "fieldName" : field} - -# use regex to extract fields and operand from user-submitted postAgg string -def parse_calc(postAggOpString): - fields = [] - rgx = "(.+)[ ]?([*+/-])[ ]?(.+)" - postAggOp = re.findall(rgx, postAggOpString).pop() - - if postAggOp: - operation = postAggOp[0] - fn = postAggOp[1] - fields.append(field_spec(postAggOp[0])) - fields.append(field_spec(postAggOp[2])) - base = {"type" : "arithmetic", - "fn" : fn, - "fields" : fields} - return base - else: - raise Exception("Not a valid postAggregation operation") \ No newline at end of file diff --git a/build/lib/pydruid/utils/query_utils.py b/build/lib/pydruid/utils/query_utils.py deleted file mode 100644 index 4524e432..00000000 --- a/build/lib/pydruid/utils/query_utils.py +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright 2013 Metamarkets Group Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import csv, codecs, cStringIO - -# A special CSV writer which will write rows to TSV file "f", which is encoded in utf-8. -# this is necessary because the values in druid are not all ASCII. -class UnicodeWriter: - - def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds): #delimiter="\t" - # Redirect output to a queue - self.queue = cStringIO.StringIO() - self.writer = csv.writer(self.queue, dialect=dialect, **kwds) - self.stream = f - self.encoder = codecs.getincrementalencoder(encoding)() - - def writerow(self, row): - self.writer.writerow([s.encode("utf-8") if isinstance(s, unicode) else s for s in row]) - # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") - # ... and reencode it into the target encoding - data = self.encoder.encode(data) - # write to the target stream - self.stream.write(data) - # empty queue - self.queue.truncate(0) - - def writerows(self, rows): - for row in rows: - self.writerow(row) \ No newline at end of file diff --git a/dist/pyDruid-0.01.tar.gz b/dist/pyDruid-0.01.tar.gz deleted file mode 100644 index 09fcbe27766443938353a1992c7ae756a35c8b32..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2980 zcmV;V3tRLbiwFpmelt=6|72-%bT4puL~?a$WGyf*FflH4VR8WN8*6jhHqu|sUxAZ9 zkjhEa!p&A=uhTKVyXLpq-Z8%hHd^g=YiDg3z;zIJ@Zv@h!{-3?vAG|m`@zn?QrS-qNvva%tVNC14 zwcYG=wn6`$oxR-+(k%7=)nSH;uaSQ>TCGMK9FNrNq#tv|X-JM)$f#t_MhV|7toZ+% zS1;=Q!Skc95dZJ+e=xh9wfMiYx3^RJ|6{;wrf5JFt^dM=_j zLL+fbvM8cr-X(gF(3pmE#m5jYG##lD!N{Arx=3OY@{s^s=VcPHdP=82^g^k$+i0Zo zl>4d1@)I%D_YFH?!`aVZK)@7?rQ`*Tr&-AD!#ay754$9Qp`Rk)9+z~or)RB85mOK^yVe2w}2O*rewBo)bw2TT%5%n}HX6%!LWW<(`K zU9dIb5G@s-oWd$mQkeumxnl23kF{cH=*(U`oy170iG|$#6tP zFw8^<=)|?k4ii}PMIu%AhvpC4YcGM(oFth5(ZK{){F0|E;B*y9i+4st2{>mJ%v1Gd z3;s3I*1U3l^OAngn3w~;6@a(GU2UdkMDlPp#^ohIWe9x=%TXVP0?czIJ>eSdR# zITbX{LMpgIb}A8%GsP+e{7rz>H7>5NVbSSSuqj1@M#PU5&1i*yLnPTKWZ<1%1q7&M zrTVG(uDK|z_Dx9DBoR@UydAt69K9QqdH(O7|MU3!m+${N+wF4y_t)v){z86!1pT)< z&CagYf2-5(?CpRETYKBBa{u=zu;pw)(og4tPcIbFJHXe=qKdo_NtVKfaO^vXY3r+i zWO2ZRE_C)&I))!RQX{rlByGP*JVdD2p-S~fpfFD|645z{69rX}DRsz%hm5cfW0opN zeUN*jG~_gf412}Zg`UO|0XE35mP9gA6tF4ar*JnZ;t{Ge!w9wrxw^XYsb*FeoS{LK z4O=SKfnoj1+c;!W62X4YxPYaO=7gre&zM4bB_X{ciGVaL7>q%&2OL~MYUlB^MkG{n zS5z>d5^$*mA3>%s3M=Oh)?7pYanYDm_D@Lvq(YwVpY%^^K;vEi^ySgpQ}S;A_;`PC z+CMxYN5|yZ(cqwe+CLh=-E*=(_?5is4-TN90a3v;>;sf$z&|j_QH(4=nVm4U!T=I@1b_39v7F_XAXR&n$i<8E#bpW{(#0K=Wh_%X z^iwLJY%_dCRHb2}VCH2pyfy$453kn7Z8&@j1f`!BV2O}-p`}pmE>Z8o1mlNd?@nU6 z1|8yp3TTcq4k;+wVs;&8Ghh>tK99K?4n4`jNexTgMWx$yHaGF9Z_0D{n?ESj`1xf% z1jdwwI50?Jw)$|WRn{Y$#UaD1nx`y($JyK>SUb~@|4;9k0zO84qMfqX*i-Jlrf7%h z6Ts7!052z__iU_cXoNs04nPPBT@Q)cHU&lj z2UJG`uX?j05$(qnMVf`@mu4D&943+h4U})d>IL1RSoK`6JH&Jvm_2A0R>~*dZ=An-wh#YdyK0nbn>(&(B7M(SY%aA{EiPm_ zU@-7pO$+jQs@Vf3Zk}?Ad44p%Z$35y!+rR4>!UMM8<@rCqOw9tt~`US2e))_ki~qQ z1k50y$FmsktxtHAC;L4y?qBYtoC&AKk75jkbv?6DUPE>pBfs zO}=p4y#v-+K@{f(Wf4iuc&%bxF(+0W|63 z-*%iu-Wttfgbya0TG5NupstsaHySq@Eu8o13_zYdp<*hHTJx%4s0$f9gwwZ}&+#Cx zs7%b{V2nHf;}H5h@yuPG8M-y%7ETB8)>MOdRXpj#dn``&*PboT2~8M5Z9-_S5qN}3 zMV8TOgteU90s%*FzyPJdoYA_V93t;8=s<@;B*Jq)Ha}rB!WXE)x4wb%`WDSGk)WqTf%{=;~Hd@Qy9+&vLb{dDxBbIIESkwXx-m&YfbeDQG-hwJyhX zo;Q2xyc&FYdiq8y6LyKFjM7T(A6X5*rXlKuS;ZP{HJ92d6u#@6)i~?}^~PbRTCEn# zt<~Lw;Z%EYbuqfKS17to!D$O*wDb0h&a2bS(|OTz?W50dTl?nM^;t)s#GCW@g8Kq) z0Lzc{Zo72m(wVGqbLb(Rk4_(hQ^#PxX!y zb`o_wtpj-@_xij6wxjw$U`$28W3Wxw^NmDu$zc1RDLm9$_?vdF{3Hy-rl*(M*?kjJ z)_D7j*s@090Ws#gOYNnf-0BF|3Q{skGAN_Pe7z=pYM-x}p!1NDG=KL#{Flg@fhfhna9!Ejt9h@9+P|hx-Sw z55G$P_Z|IztyZg(fB*07?R2pJud`cz|9uoF_Pgc1aK&fN7w&nxt=G-Dy|>M|y_e0o zrFZTAp0z*Jt9ECP+C9B#&L8SUbN)>4*+Y8H?(Q}FoF216Z&?{mzwP)hp`J*;RM*eL z|NqC{c6-tP1$X-Ye?UqoGwB%wbDyQYG2tM)T{&1MKNo-iERmLgcvq6@cTK9`Vx z_ej%kwJWvd$g(S@g2R<s5teN$2BD=9^f*YWTE?~#N+_X(5=tncgc3?9p@b4j aD4~QBN+_X(5=!{H!v6u#M={|5Pyhf^8rXOM diff --git a/dist/pyDruid-0.1.0.tar.gz b/dist/pyDruid-0.1.0.tar.gz deleted file mode 100644 index 5f176dfe4be946898246e4b681737f904f9bb01d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4474 zcmV-=5rys_iwFp<(MeJQ|72-%bT4puL~?a$WGyf*F)lDJbYXG;?L2>T<2J7QZ~Q6v zv~we=brjo)lU{Cecj<1n&FeMU*<{E{D1w;SKFs&ug*Wb{eRE=2eliw?EiSMx8KeGKcajq3Lfx+Z+|DU zJWS$4271st<4bYKly3B@DpHc(MY&w^Y<mu~QU=X6bNHW1*uZ!y>W>GlH z0JisavJ~4XUjWwYqDT*ScG7hkdMQD9Nw%Q-9o6Gb@3@4D47v~^Wnb}lQAVM9xGk1E zj1E}X7rU)Spfndr znE~sdgWK>XOhpj#CJ?cA#iJZxt}I-rwg*G_+ewFOYqEJrKb0a|1H58@)1a3!0rf*SAH&+{m14%IQyfKjQtK3}@d!o_vLCWX2m!~He?EDRTaensl^z!ul4DMdBy_ zylS|vE>ob8S9jo*i?RqKFXb6{?UJq~FVZL}pyxUkYE^*6!>d+3V#+=3cqduDu|9?H zriR%F0(uT^{pRKrW=!e2uJ85yh=VYdl|JnCwgNF{dUKm}p!)>iNZQainf)mIqK}FOh~fa4P~h~!c#W+>h_ivcvU-7rX~z8<#D;B& z#P{_jha!20cqHGbju=K)Fdbnuh|3GgWtuy(9;ziPE3SUd>>V5aH@FI5*MbKqT?oo- z2%{mSFSa4x7dFIZwlX0|+Ya_&zk?QFW2B65Hh6Eq;s_v)qEk&$pDDC1!RH1%{3fZ0 z9l(N>7>PnHz<}s@;JBLwiD3U|2vQDQU&?MgKT2`|IFP@A*fwN~Y^A<3QQ(pp$TUMB zlMEzPY!rcgLn)vlEJG^xS)tV|TrT11=eXnGoIBt%K2CE-EECd4E!etRK zEwR@&4j7FdI3{QS>dIinM1oY7RMutEu}aj0JYz=otoH zvk)P?6_3DJI4;^zTE1dc@wRM<0k~N?|Ghtw5^Upjp;y z{oG7C-YERxPGa8fek-IxDw^#EMC8%P^P9UfQwrc_Og{-hxG6 zi$YHBmf?g|1`VTRg7*?w^a?rTAZAxqH|WImruoEM^3;j=au#rQci48+!ia0##G2F79_`?&Df zB`o}URhJ-DC79{_ko^#|pNH>k50lEga5R;=Z?`rpNE}WHbNs+oBCclp6@a1iXJr@# zQ^Wsu0PB5ZZRT0;$A{pLBbBmx5EZmhUj3C8YiovWY}0Fc(n`nb8(KTFHDMq)@>=V9 zTIXfFti8dnFE3w{Fd-6@ZWIam^jt~!TOOfYgc7Wj)}WS_;rO;G>xU5QQQ)xWx~}PN zHKPYfDa~NFs%^vy=Cm;;t$~In@2NR?X}S4wZO&Zv=vPcz)$;e9vpss*B44=}(-&X^ z7=BCJXR>Z}Zz|_vftPyEfqJ#dKp8`|GLwId%8N52ErA>Hf+9|c zB(^al2mD6rDc=C$C?y1Zo&{kHx(PAgEK6<#g#TrMiMwjP(d5$EE?^sVpVMUbq)lnj z_A6{l3&Rs^%%5&)@8-#a6XBji%4SIkZgjS8`=np0=PecNJ;f!}Z68y;Vf$gZ=%3?*0GAmcJ2C{U#N7Z2$LYcef?~ z;oe>+|8Dn+odp{yVu zN4gB-N6`Vy6A=Yw#4(vM}(;EUV z{>$BpugGp3_ImP6SbM*skg{7(u=gcl>Bb~}ZQle7sPdaQI zx}q$WG+UG))h~SXGDlf@iQVcqT0UH?>Lmg$huHO%@o4g7G_+r(Mj>w}@7~>GLWx|6 z>CJ|uzk;BbS+Y3|XCmLb#ewz9*nq1&wt6z{|q5h?jX}vyIB_f~mDvFvTOZl1t&SSgdeLgI>`a zH6g&Za&-qT_yQBjxT#KBhU?^(En#&8ivR}7v)p<hk~sQ>Bk3%Y0>n2OMi=!PTR?R0*`(2FlIcDl?-DibxAz#vzw_vP{E(u-zUi1j7&8Fff-7vxR z4nLR)oYYGgZ3i%2C1|7YG#;y^?lLRuk90`=Y{_kjw6X&` zZ9*@FtQVysu4NSgXQ@*P!uo?b|SD7~sYKlzc7 zY;lJkn&W-$Ua`r)oU6}1-`SqXYj|c6P=$()V}b7$2Rw=#XxnG)ZoD)}9jSa;niWXR zKpIyeovo_+YEXapWZtY98Z|TmeLTldBNLD7{u20+2|wexpjfAlEWsRJFGUghn^bI0 zVe~xi_udn!$*Xtm@3JU8!Y(&y8ST|Mp=k=Na;!6B3sP%8Y0Xi~?fkz7{6F)c)$jQJ zcXu@0Z@>Q??{?pR=={IV|NAukKR;jf8J^4ZbNH1j-RU_l;K#B@9xy_NcmjFY$Va>rykYK<%c|tUxqoKuo`wJW(=@D)_73fpouG*RKxzsQr z6B2ZmR31}CSCclVLYK!EFHTQAl}>4#Cw(}jLRl4#iR2>-^)}ir_{{1{%3!cZ77=D) zuKVUTzzyWjtJA?p)(?ecPLipnMy&`-gafMwo%F4<4$NyOd)*814bXm>Mi&!=if^TKtER9JPEQ}Pk6 z+NmiCQ6Kue%2kJs;;Ab=-jEzrUpSy*>VU9I4gr&g2T0n0VB#9AekBS}p|_W>w!hHa zCRb5?DgfB)d0x-_GM&zDQ7w*?RLNkt0>U2;xM31%bPQ<8K?SQJk2i0|^4lHYi%2)$ z$8Wdluj`;P^m_E;(8R^`z>%#~#+DJBI&6PrlLo1WoW9h0WSgIFZB$L^Bd$mGrjNKP zKycM=z#++xu51;S&}mC~=@GfMdiz>tfa)pVl~t&f?{{P?-%mXb>TksY=}-Q+twx%} zi>Zl%J|AMgIXQm$?TNPxY*&4kMS|1%6-86!XTmTm+i1!2_3WQV_{H6k5k*_Zr;?y{ z*W2|5X0`E&Bd8uyHPr&6I^)`vu5_g Date: Mon, 30 Sep 2013 18:11:07 -0700 Subject: [PATCH 04/10] Crap, whats going on? --- MANIFEST.in | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2985d188..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include *.txt -recursive-include docs *.txt From 1cf29718ee53d8a290839d54ddf9a752bd6b3554 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 30 Sep 2013 18:19:23 -0700 Subject: [PATCH 05/10] Trying to make pip install work. --- CHANGES.txt | 2 ++ MANIFEST | 9 +-------- README.md => README | 2 +- setup.py | 4 ++-- 4 files changed, 6 insertions(+), 11 deletions(-) rename README.md => README (97%) diff --git a/CHANGES.txt b/CHANGES.txt index ca13fd38..3fc161c6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1 +1,3 @@ v0.01, 9/11/2013 -- Initial Release +v0.01.1, 9/30/2013 -- Fixing problems with build :( +v0.01.2, 9/30/2013 -- Fixing problems with build :( \ No newline at end of file diff --git a/MANIFEST b/MANIFEST index f547254f..1dd015cc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,10 +1,3 @@ # file GENERATED by distutils, do NOT edit -CHANGES.txt +README setup.py -pydruid/__init__.py -pydruid/client.py -pydruid/utils/__init__.py -pydruid/utils/aggregators.py -pydruid/utils/filters.py -pydruid/utils/postaggregator.py -pydruid/utils/query_utils.py diff --git a/README.md b/README similarity index 97% rename from README.md rename to README index d67fe340..de43975a 100644 --- a/README.md +++ b/README @@ -23,5 +23,5 @@ often looks like this:: aggregations = {"count" : doubleSum("edits")} ) - >>> print counts + print counts [{'timestamp': '2013-09-30T23:31:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:32:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:33:00.000Z', 'result': {'count': 0.0}}, {'timestamp': '2013-09-30T23:34:00.000Z', 'result': {'count': 0.0}}] diff --git a/setup.py b/setup.py index bdcc2ce3..354e7c53 100644 --- a/setup.py +++ b/setup.py @@ -2,14 +2,14 @@ setup( name='pyDruid', - version='0.1.0', + version='0.01.2', author='Deep Ganguli', author_email='deep@metamarkets.com', packages=['pydruid', 'pydruid.utils'], url='http://pypi.python.org/pypi/pyDruid/', license='LICENSE', description='Druid analytical data-store Python library', - long_description=open('README.md').read(), + long_description=open('README').read(), install_requires=[ "pandas >= 0.12", "simplejson >= 3.3.0", From 441e7aa5e4291316ea9b40bb23c8ea1b0897a677 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 30 Sep 2013 18:27:39 -0700 Subject: [PATCH 06/10] Still trying to make pypi work, but I can't get the build to update on pypi. --- CHANGES.txt | 3 ++- MANIFEST | 1 + MANIFEST.in | 3 +++ setup.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 MANIFEST.in diff --git a/CHANGES.txt b/CHANGES.txt index 3fc161c6..b551cf07 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,4 @@ v0.01, 9/11/2013 -- Initial Release v0.01.1, 9/30/2013 -- Fixing problems with build :( -v0.01.2, 9/30/2013 -- Fixing problems with build :( \ No newline at end of file +v0.01.2, 9/30/2013 -- Fixing problems with build :( +v0.01.3, 9/30/2013 -- Fixing problems with build :( \ No newline at end of file diff --git a/MANIFEST b/MANIFEST index 1dd015cc..5ec9fc38 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,3 +1,4 @@ # file GENERATED by distutils, do NOT edit +CHANGES.txt README setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..f3b93cbf --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt +include *.md +recursive-include docs *.txt \ No newline at end of file diff --git a/setup.py b/setup.py index 354e7c53..c05bab20 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='pyDruid', - version='0.01.2', + version='0.01.3', author='Deep Ganguli', author_email='deep@metamarkets.com', packages=['pydruid', 'pydruid.utils'], From 3c9e1a0fc98a8d9de9076995c6c3ef8579d9d33f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 30 Sep 2013 18:36:47 -0700 Subject: [PATCH 07/10] Renaming readme. --- README => README.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename README => README.txt (100%) diff --git a/README b/README.txt similarity index 100% rename from README rename to README.txt From cc99963f15a76073c12492267660238eff8d29ad Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 2 Oct 2013 12:10:26 -0700 Subject: [PATCH 08/10] Trying to re-create source code. Something weird and terrible went on. --- CHANGES.txt | 3 +- MANIFEST | 2 +- pydruid/client.py | 222 ++++++++++++++++++++++++++++++++ pydruid/utils/__init__.py | 0 pydruid/utils/aggregators.py | 37 ++++++ pydruid/utils/filters.py | 58 +++++++++ pydruid/utils/postaggregator.py | 56 ++++++++ pydruid/utils/query_utils.py | 43 +++++++ setup.py | 6 +- 9 files changed, 422 insertions(+), 5 deletions(-) create mode 100755 pydruid/client.py create mode 100644 pydruid/utils/__init__.py create mode 100644 pydruid/utils/aggregators.py create mode 100644 pydruid/utils/filters.py create mode 100644 pydruid/utils/postaggregator.py create mode 100644 pydruid/utils/query_utils.py diff --git a/CHANGES.txt b/CHANGES.txt index b551cf07..9e668b9a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ v0.01, 9/11/2013 -- Initial Release v0.01.1, 9/30/2013 -- Fixing problems with build :( v0.01.2, 9/30/2013 -- Fixing problems with build :( -v0.01.3, 9/30/2013 -- Fixing problems with build :( \ No newline at end of file +v0.01.3, 9/30/2013 -- Fixing problems with build :( +v0.1.4, 9/30/2013 -- Still working on getting around README.md errors diff --git a/MANIFEST b/MANIFEST index 5ec9fc38..7f4312dc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,4 @@ # file GENERATED by distutils, do NOT edit CHANGES.txt -README +README.txt setup.py diff --git a/pydruid/client.py b/pydruid/client.py new file mode 100755 index 00000000..3a2bc840 --- /dev/null +++ b/pydruid/client.py @@ -0,0 +1,222 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import division +import urllib2 +import simplejson as json +import csv +import re +import os +import sys +import pandas +import dateutil.parser +from matplotlib import * +from matplotlib.pyplot import * +from utils.aggregators import * +from utils.postaggregator import * +from utils.filters import * +from utils.query_utils import * + + +class pyDruid: + + def __init__(self,url,endpoint): + self.url = url + self.endpoint = endpoint + self.result = None + self.result_json = None + self.query_type = None + + # serializes a dict representation of the query into a json + # object, and sends it to bard via post, and gets back the + # json representation of the query result + def post(self,query): + querystr = json.dumps(query) + url = self.url + '/' + self.endpoint + headers = {'Content-Type' : 'application/json'} + req = urllib2.Request(url, querystr, headers) + res = urllib2.urlopen(req) + data = res.read() + self.result_json = data; + self.querystr = querystr + res.close() + + # de-serializes the data returned from druid into a + # list of dicts + def parse(self): + if self.result_json: + res = json.loads(self.result_json) + return res + else: + print("Empty query") + return None + + def export_tsv(self,dest_path): + + f = open(dest_path,'wb') + tsv_file = csv.writer(f, delimiter = '\t') + + if(self.query_type == "timeseries"): + header = self.result[0]['result'].keys() + header.append('timestamp') + elif(self.query_type == "groupby"): + header = self.result[0]['event'].keys() + header.append('timestamp') + header.append('version') + + tsv_file.writerow(header) + + # use unicodewriter to encode results in unicode + w = query_utils.UnicodeWriter(f) + + if self.result: + if self.query_type == "timeseries": + for item in self.result: + timestamp = item['timestamp'] + result = item['result'] + + if type(result) is list: + for line in result: + w.writerow(line.values() + [timestamp]) + else: #timeseries + w.writerow(result.values() + [timestamp]) + elif self.query_type == "groupby": + for item in self.result: + timestamp = item['timestamp'] + version = item['version'] + w.writerow(item['event'].values() + [timestamp] + [version]) + + f.close() + + # Exports a JSON query object into a Pandas data-frame + def export_pandas(self): + if self.result: + if self.query_type == "timeseries": + nres = [v['result'].items() + [('timestamp',v['timestamp'])] for v in self.result] + nres = [dict(v) for v in nres] + else: + print('not implemented yet') + return None + + df = pandas.DataFrame(nres) + df['timestamp'] = df['timestamp'].map(lambda x: dateutil.parser.parse(x)) + df['t'] = dates.date2num(df['timestamp']) + return df + + # implements a timeseries query + def timeseries(self, **args): + + query_dict = {"queryType" : "timeseries"} + valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', 'intervals'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + elif key == "aggregations" : + query_dict[key] = build_aggregators(val) + elif key == "postAggregations": + query_dict[key] = build_post_aggregators(val) + elif key == "filter": + query_dict[key] = val.filter['filter'] + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + self.query_type = "timeseries" + return self.result + + # Implements a groupBy query + def groupBy(self, **args): + + query_dict = {"queryType" : "groupBy"} + valid_parts = ['dataSource', 'granularity', 'filter', 'aggregations', 'postAggregations', + 'intervals', 'dimensions'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + elif key == "aggregations" : + query_dict[key] = build_aggregators(val) + elif key == "postAggregations": + query_dict[key] = build_post_aggregators(val) + elif key == "filter": + query_dict[key] = val.filter['filter'] + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + self.query_type = "groupby" + return self.parse() + + # Implements a segmentMetadata query. This query type is for pulling the tsv-equivalent + # size and cardinality broken out by dimension in a specified data source. + def segmentMetadata(self, **args): + + query_dict = {"queryType" : "segmentMetadata"} + valid_parts = ['dataSource', 'intervals'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + else: + query_dict[key] = val + + self.query_dict = query_dict + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + return self.parse() + + # implements a time boundary query + def timeBoundary(self, **args): + + query_dict = {"queryType" : "timeBoundary"} + valid_parts = ['dataSource'] + + for key, val in args.iteritems(): + if key not in valid_parts: + raise ValueError('{0} is not a valid query component. The list of valid components is: \n {1}'.format(key, valid_parts)) + else: + query_dict[key] = val + + try: + self.post(query_dict) + except urllib2.HTTPError, e: + raise IOError('Malformed query: \n {0}'.format(json.dumps(self.query_dict ,indent = 4))) + else: + self.result = self.parse() + return self.parse() + + # prints internal variables of the object + def describe(self): + print("url: " + self.url) diff --git a/pydruid/utils/__init__.py b/pydruid/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pydruid/utils/aggregators.py b/pydruid/utils/aggregators.py new file mode 100644 index 00000000..efd54f42 --- /dev/null +++ b/pydruid/utils/aggregators.py @@ -0,0 +1,37 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#create a json list of user-defined aggregations + +# Define the support aggregation types + +def longSum(raw_metric): + return {"type" : "longSum", "fieldName" : raw_metric} + +def doubleSum(raw_metric): + return {"type" : "doubleSum", "fieldName" : raw_metric} + +def min(raw_metric): + return {"type" : "min", "fieldName" : raw_metric} + +def max(raw_metric): + return {"type" : "max", "fieldName" : raw_metric} + +def count(raw_metric): + return {"type" : "count", "fieldName" : raw_metric} + +def build_aggregators(agg_input): + return [dict([('name',k)] + v.items()) for (k,v) in agg_input.iteritems()] \ No newline at end of file diff --git a/pydruid/utils/filters.py b/pydruid/utils/filters.py new file mode 100644 index 00000000..562cf78d --- /dev/null +++ b/pydruid/utils/filters.py @@ -0,0 +1,58 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +class Filter: + + def __init__(self, **args): + + # constrct a selector + if 'type' not in args.keys(): + self.filter = {"filter": {"type": "selector", + "dimension" : args["dimension"], + "value" : args["value"]}} + # construct an and filter + elif args["type"] == "and": + self.filter = {"filter": {"type" : "and", + "fields" : args["fields"]}} + # construct an or filter + elif args["type"] == "or": + self.filter = {"filter": {"type" : "or", + "fields" : args["fields"]}} + # construct a not filter + elif args["type"] == "not": + self.filter = {"filter" : {"type" : "not", + "field" : args["field"]}} + else: + print("you've slain me. nevermind the teeth and the fingernails. the show must go on.") + + def show(self): + print(json.dumps(self.filter, indent = 4)) + + def __and__(self, x): + return Filter(type = "and", fields = [self.filter['filter'], x.filter['filter']]) + + def __or__(self,x): + return Filter(type = "or", fields = [self.filter['filter'], x.filter['filter']]) + + def __invert__(self): + return Filter(type = "not", field = self.filter['filter']) + +class Dimension: + + def __init__(self, dim): + self.dimension = dim + + def __eq__(self,other): + return Filter(dimension = self.dimension, value = other) \ No newline at end of file diff --git a/pydruid/utils/postaggregator.py b/pydruid/utils/postaggregator.py new file mode 100644 index 00000000..0d647850 --- /dev/null +++ b/pydruid/utils/postaggregator.py @@ -0,0 +1,56 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import pprint as pp +import re + +# determine whether arguments are strings or numeric constants +def is_numeric(field): + for obj in [int, float, long]: + try: + obj(field) + return True + except: + pass + + return False + +# specify field type as source field name (string) or constant and build json +def field_spec(field): + if is_numeric(field): + return {"type" : "constant", + "name" : "constant_%d" % int(field), + "value" : field} + else: + return {"type" : "fieldAccess", + "fieldName" : field} + +# use regex to extract fields and operand from user-submitted postAgg string +def parse_calc(postAggOpString): + fields = [] + rgx = "(.+)[ ]?([*+/-])[ ]?(.+)" + postAggOp = re.findall(rgx, postAggOpString).pop() + + if postAggOp: + operation = postAggOp[0] + fn = postAggOp[1] + fields.append(field_spec(postAggOp[0])) + fields.append(field_spec(postAggOp[2])) + base = {"type" : "arithmetic", + "fn" : fn, + "fields" : fields} + return base + else: + raise Exception("Not a valid postAggregation operation") \ No newline at end of file diff --git a/pydruid/utils/query_utils.py b/pydruid/utils/query_utils.py new file mode 100644 index 00000000..4524e432 --- /dev/null +++ b/pydruid/utils/query_utils.py @@ -0,0 +1,43 @@ +# +# Copyright 2013 Metamarkets Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import csv, codecs, cStringIO + +# A special CSV writer which will write rows to TSV file "f", which is encoded in utf-8. +# this is necessary because the values in druid are not all ASCII. +class UnicodeWriter: + + def __init__(self, f, dialect="excel-tab", encoding="utf-8", **kwds): #delimiter="\t" + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") if isinstance(s, unicode) else s for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) \ No newline at end of file diff --git a/setup.py b/setup.py index c05bab20..d1c42ab2 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,15 @@ -from distutils.core import setup +from setuptools import setup setup( name='pyDruid', - version='0.01.3', + version='0.1.4', author='Deep Ganguli', author_email='deep@metamarkets.com', packages=['pydruid', 'pydruid.utils'], url='http://pypi.python.org/pypi/pyDruid/', license='LICENSE', description='Druid analytical data-store Python library', - long_description=open('README').read(), + long_description=open('README.txt').read(), install_requires=[ "pandas >= 0.12", "simplejson >= 3.3.0", From efa2786debac9a9d7d8a05a9800c205890bbecb0 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Wed, 2 Oct 2013 12:26:43 -0700 Subject: [PATCH 09/10] Added __init__.py to pydruid and updated to version 0.1.5 for release. --- CHANGES.txt | 1 + pydruid/__init__.py | 0 setup.py | 2 +- 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 pydruid/__init__.py diff --git a/CHANGES.txt b/CHANGES.txt index 9e668b9a..b9921fe5 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,3 +3,4 @@ v0.01.1, 9/30/2013 -- Fixing problems with build :( v0.01.2, 9/30/2013 -- Fixing problems with build :( v0.01.3, 9/30/2013 -- Fixing problems with build :( v0.1.4, 9/30/2013 -- Still working on getting around README.md errors +v0.1.5, 10/2/2013 -- Still working on build problems diff --git a/pydruid/__init__.py b/pydruid/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/setup.py b/setup.py index d1c42ab2..25137b6d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='pyDruid', - version='0.1.4', + version='0.1.5', author='Deep Ganguli', author_email='deep@metamarkets.com', packages=['pydruid', 'pydruid.utils'], From 5fb737ee5c6abc867c18e3ab739e20c5bb3321fe Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Thu, 3 Oct 2013 13:11:25 -0700 Subject: [PATCH 10/10] Working 1.7 --- CHANGES.txt | 2 ++ MANIFEST | 8 ++++++++ MANIFEST.in | 3 ++- __init__.py | 0 setup.py | 5 +++-- 5 files changed, 15 insertions(+), 3 deletions(-) delete mode 100644 __init__.py diff --git a/CHANGES.txt b/CHANGES.txt index b9921fe5..eedb82c2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,3 +4,5 @@ v0.01.2, 9/30/2013 -- Fixing problems with build :( v0.01.3, 9/30/2013 -- Fixing problems with build :( v0.1.4, 9/30/2013 -- Still working on getting around README.md errors v0.1.5, 10/2/2013 -- Still working on build problems +v0.1.6, 10/2/2013 -- Still working on build problems +v0.1.7, 10/2/2013 -- still working on build problems diff --git a/MANIFEST b/MANIFEST index 7f4312dc..6eb99b0d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2,3 +2,11 @@ CHANGES.txt README.txt setup.py +pydruid +pydruid/__init__.py +pydruid/client.py +pydruid/utils/__init__.py +pydruid/utils/aggregators.py +pydruid/utils/filters.py +pydruid/utils/postaggregator.py +pydruid/utils/query_utils.py diff --git a/MANIFEST.in b/MANIFEST.in index f3b93cbf..056aef99 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include *.txt include *.md -recursive-include docs *.txt \ No newline at end of file +recursive-include docs *.txt +global-include pydruid *.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/setup.py b/setup.py index 25137b6d..923dda81 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='pyDruid', - version='0.1.5', + version='0.1.7', author='Deep Ganguli', author_email='deep@metamarkets.com', packages=['pydruid', 'pydruid.utils'], @@ -13,6 +13,7 @@ install_requires=[ "pandas >= 0.12", "simplejson >= 3.3.0", - "matplotlib >= 1.3.0" + "matplotlib >= 1.3.0", + "numpy >= 1.7.1" ], ) \ No newline at end of file