-
Notifications
You must be signed in to change notification settings - Fork 3
/
geocode_busroute.py
executable file
·152 lines (119 loc) · 4.16 KB
/
geocode_busroute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import dist_latlng
import codecs
import google_geocode
import sys
import os
import time
import json
def warning(msg):
sys.stderr.write("Warning: %s\n" % msg)
def dist(g1, g2):
return dist_latlng.dist_latlng(g1[0], g1[1], g2[0], g2[1])
geocode = google_geocode.geocode
def validStr(s):
return s and len(s.strip()) > 0
base_radius = 1.5 # kilometer
g_geoDict_fname = 'geoDict.json'
g_geoDict = {}
def save_geoDict():
global g_geoDict
with codecs.open(g_geoDict_fname, mode='w', encoding='UTF-8') as f:
json.dump(g_geoDict, f, indent=4, ensure_ascii=False)
def load_geoDict():
global g_geoDict
if os.path.isfile(g_geoDict_fname):
with open(g_geoDict_fname, 'r') as f:
g_geoDict = json.load(f, encoding='UTF-8')
# convert list of lists => list of tuples
for k, v in g_geoDict.items():
g_geoDict[k] = [tuple(x) for x in v] if v else v
def list_elemAt(ls, i):
return ls[i] if len(ls) > i else None
g_ApiKeys = []
g_queryCount = 0
def first_or_default(ls, predicate):
return list_elemAt(list(filter(predicate, ls)), 0)
def process_route(fin, fout, numStats, reverse=False):
radius = 1
prevGeo = None
lines = []
for iStat in range(0, numStats):
lines.append(fin.readline())
if reverse:
lines.reverse()
for iStat in range(0, numStats):
line = lines[iStat]
cols = list(filter(validStr, map(str.strip, line.split('\t'))))
if len(cols) < 2:
lines[iStat] = line
continue
station = cols[0]
curGeo = tuple(list(map(float, cols[2].split(',')))) \
if len(cols) >= 3 else None
if not curGeo and iStat > 0:
global g_geoDict
curGeo = g_geoDict.get(station, None)
if not curGeo:
global g_queryCount, g_ApiKeys
try:
curGeo = list(geocode(station, g_ApiKeys[g_queryCount % len(g_ApiKeys)]))
except Exception as ex:
curGeo=None
warning("ex="+str(ex))
pass
time.sleep(0.5)
g_queryCount += 1
g_geoDict[station] = curGeo
save_geoDict()
if not prevGeo or not curGeo:
lines[iStat] = line
radius += 1
if curGeo and len(curGeo) > 0:
prevGeo = curGeo[0]
continue
print (u'iStat=%d Station="%s" radius=%d, curGeo=%s' %
(iStat, station, radius, curGeo))
curGeo = first_or_default(
curGeo, lambda g: dist(g, prevGeo) < radius * base_radius)
if curGeo:
lines[iStat] = (u"\t" + "%s\t%s\t%f,%f\n" %
(station, cols[1], curGeo[0], curGeo[1]))
radius = 1
prevGeo = curGeo
else:
lines[iStat] = u"\t" + "%s\t%s\t\n" % (station, cols[1])
radius += 1
if reverse:
lines.reverse()
for iStat in range(0, numStats):
fout.write(lines[iStat])
fout.flush()
def process(fin, fout, reverse):
line = fin.readline()
while validStr(line):
if not line.startswith('\t'):
# bus name declaration
busName, goStats, backStats = line.split('\t')
goStats = int(goStats)
backStats = int(backStats)
fout.write(line)
print(line.rstrip())
process_route(fin, fout, goStats, reverse)
process_route(fin, fout, backStats, reverse)
line = fin.readline()
continue
def main():
finName = sys.argv[1] if len(sys.argv) >= 2 else "all_buses_6.txt"
foutName = os.path.splitext(finName)[0] + '_out' + '.txt'
global g_ApiKeys
with open("apikeys.json", mode="r") as f:
g_ApiKeys = json.load(f, encoding='UTF-8')
reverse = True
load_geoDict()
with codecs.open(finName, mode='r', encoding='UTF-8') as fin,\
codecs.open(foutName, mode='w', encoding='UTF-8') as fout:
process(fin, fout, reverse)
if __name__ == "__main__":
main()