-
Notifications
You must be signed in to change notification settings - Fork 0
/
sqltxt_test.py
77 lines (65 loc) · 3.76 KB
/
sqltxt_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import unittest
import os
import subprocess
import re
import warnings
def get_awk_version():
awk_version = None
awk_version_str = subprocess.check_output(['awk -Wversion 2>/dev/null || awk --version'], shell=True)
if re.compile('awk version').match(awk_version_str):
awk_version = 'AWK'
elif re.compile('GNU Awk').match(awk_version_str):
awk_version = 'GAWK'
else:
warnings.warn("Unrecognized awk version: {}".format(awk_version_str))
return awk_version
class SqltxtTest(unittest.TestCase):
def test_select(self):
cmd = "sqltxt 'select col_a from tests/data/table_a.txt'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """echo "col_a"; tail -n+2 tests/data/table_a.txt | awk -F',' 'OFS="," { print $1 }'\n"""
self.assertEqual(expected_output, actual_output)
def test_executed_select(self):
cmd = "sqltxt -e 'select col_a from tests/data/table_a.txt'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """col_a\n1\n2\n3\n"""
self.assertEqual(expected_output, actual_output)
def test_where(self):
cmd = "sqltxt 'select col_a from tests/data/table_a.txt where col_b > 2'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """echo "col_a"; tail -n+2 tests/data/table_a.txt | awk -F',' 'OFS="," { if ($2 > 2) { print $1,$2 } }' | awk -F',' 'OFS="," { print $1 }'\n"""
self.assertEqual(expected_output, actual_output)
def test_executed_where(self):
cmd = "sqltxt -e 'select col_a from tests/data/table_a.txt where col_b > 2'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """col_a\n2\n"""
self.assertEqual(expected_output, actual_output)
def test_executed_join(self):
cmd = "sqltxt -e 'select ta.col_a, col_z from tests/data/table_a.txt ta join tests/data/table_b.txt tb on (ta.col_a = tb.col_a) where col_b > 1'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """col_a,col_z\n2,x\n2,y\n"""
self.assertEqual(expected_output, actual_output)
def test_rows_are_sampled_for_sample_size_one(self):
cmd = "sqltxt -e --random-seed=100 'select ta.col_a, col_z from tests/data/table_a.txt ta join tests/data/table_b.txt tb on (ta.col_a = tb.col_a) tablesample (1)'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output_for_awk = {
'GAWK': """col_a,col_z\n2,x\n""",
'AWK': """col_a,col_z\n2,y\n""",
}
awk_version = get_awk_version() or 'AWK'
expected_output = expected_output_for_awk[awk_version]
self.assertEqual(expected_output, actual_output)
cmd = "sqltxt -e --random-seed=101 'select ta.col_a, col_z from tests/data/table_a.txt ta join tests/data/table_b.txt tb on (ta.col_a = tb.col_a) tablesample (1)'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output = """col_a,col_z\n1,w\n"""
self.assertEqual(expected_output, actual_output)
def test_rows_are_sampled_for_sample_size_more_than_one(self):
cmd = "sqltxt -e --random-seed=101 'select ta.col_a, col_z from tests/data/table_a.txt ta join tests/data/table_b.txt tb on (ta.col_a = tb.col_a) tablesample (2)'"
actual_output = subprocess.check_output(['/bin/bash', '-c', cmd])
expected_output_for_awk = {
'GAWK': """col_a,col_z\n1,w\n2,y\n""",
'AWK': """col_a,col_z\n2,x\n1,w\n""",
}
awk_version = get_awk_version() or 'AWK'
expected_output = expected_output_for_awk[awk_version]
self.assertEqual(expected_output, actual_output)