-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_scrape.py
166 lines (116 loc) · 8.46 KB
/
test_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import argparse
from lat_epig.parse import scrape
import re
############ UNIT TESTS #################################################################
# Template
# def test_inscription_XXX():
# # ./parse.py -e 09000264 % --debug
# args = argparse.Namespace(EDCS='09000264', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
#
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert "[3]" in test_output[0]['inscription']
# assert "[3]" not in test_output[0]['inscription interpretive cleaning']
def test_EDCS_ID():
# ./parse.py -e EDCS-07600345 % --debug
args = argparse.Namespace(EDCS='07600345', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert "[]" not in test_output[0]['EDCS-ID']
assert re.match(r"EDCS-[0-9]{8,8}", test_output[0]['EDCS-ID'])
def test_publication():
# ./parse.py -e 78800166 % --debug
args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert "AE 1937, 00075" in test_output[0]['publication']
assert "[]" not in test_output[0]['publication']
def test_province():
# ./parse.py -e 78800170 % --debug
args = argparse.Namespace(EDCS='78800170', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert "Pontus et Bithynia" in test_output[0]['province']
assert "Pontus et Bithynia" not in test_output[0]['place']
assert "[]" not in test_output[0]['province']
def test_place():
# ./parse.py -e 16201127 % --debug
args = argparse.Namespace(EDCS='16201127', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert '[]' not in test_output[0]['place']
assert "Acireale / Acium" in test_output[0]['place']
def test_status():
# ./parse.py -e 55701594 % --debug
args = argparse.Namespace(EDCS='55701594', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert '[]' not in test_output[0]['status']
assert "sigilla impressa; tituli fabricationis" in test_output[0]['status']
def test_material():
# ./parse.py -e 32001159 % --debug
args = argparse.Namespace(EDCS='32001159', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert '[]' not in test_output[0]['material']
assert "lapis" in test_output[0]['material']
def test_comment():
# ./parse.py -e 36400015 % --debug
args = argparse.Namespace(EDCS='36400015', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
assert "comment DOI: 10.3406/crai.2005.22934" not in test_output[0]['inscription']
assert "comment DOI: 10.3406/crai.2005.22934" in test_output[0]['comment']
# Petra continue from here
# def test_latitude():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']
# def test_longitude():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']
# def test_photo():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']
# def test_partner_link():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']
# def test_extra_text():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']
# def test_extra_html():
# # ./parse.py -e 78800166 % --debug
# # csvcut -t -c 10,17 output/2021-07-30-EDCS_78800166+term1_%-1.tsv | csvlook
# # | inscription | language |
# # | ----------- | -------- |
# # | // GR" | PALMYR |
# args = argparse.Namespace(EDCS='78800166', publication=None, province=None, place=None, operator='and', term2=None, dating_from=None, dating_to=None, inscription_genus=None, and_not_inscription_genus=None, to_file=None, from_file=None, debug=True, term1='%')
# test_output = scrape(args, prevent_write=True, show_inscription_transform=True)
# assert 'GR"' not in test_output[0]['inscription']
# assert "PALMYR, GR" in test_output[0]['language']