-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
61 lines (51 loc) · 1.65 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import re
import sqlparse
from main import Query
ok = {'select':0, 'create':0, 'use':0, 'drop':0, 'desc':0,
'show':0, 'set':0, 'insert':0, 'describe':0, 'from':0, 'alter':0}
extra={}
weirdness=[]
for statement in Query.search(size=10000)['hits']['hits']:
if len(statement['_source']['statement'].strip()) == 0:
break
for query in sqlparse.parse(statement['_source']['statement'].strip()):
if query.to_unicode():
if query.to_unicode().lower()[:7] == 'select(':
query = sqlparse.parse(
re.sub(r'^select\(', 'SELECT (', query.to_unicode(), flags=re.IGNORECASE).strip())[0]
elif query.to_unicode().strip() == ';':
break
try:
keyword = query.token_next_by_type(0, sqlparse.tokens.Token.Keyword).to_unicode().lower()
if keyword in ok.keys():
ok[keyword]+=1
else:
weirdness.append(statement['_id'])
if extra.has_key(query.tokens[0].to_unicode().lower()):
extra[query.tokens[0].to_unicode().lower()] += 1
else:
extra[query.tokens[0].to_unicode().lower()] = 1
except:
print "weird error"
print statement
def process_query(query):
"""Focus on extracting the from tables first"""
index=0
query.token_next_by_instance(0, sqlparse.tokens.Token.Keyword)
return None
print ok
print extra
print weirdness
# non-select queries
# drop, show, create table, show, use, desc, alter table
# Need to account for: comments
# {
# comments: [comments]
# select: [column names],
# from: [table names],
# where: [where statements],
# group_by: [group by statements]
# }
# select, from, where, group by, order by, sort by,
# distribute by, cluster by, sort by, limit, union all,
# (left|right) (inner|outer) join