-
Notifications
You must be signed in to change notification settings - Fork 137
/
layer-stats
executable file
·188 lines (170 loc) · 7.27 KB
/
layer-stats
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python
"""
Show per zoom statistics for some column (field) in a single layer.
Usage:
layer-stats frequency <tileset> <layer> <column>...
([--zoom=<zoom>]... | [--minzoom=<min>] [--maxzoom=<max>])
[--pghost=<host>] [--pgport=<port>] [--dbname=<db>]
[--user=<user>] [--password=<password>] [--verbose]
layer-stats toplength <tileset> <layer> <column> [--max-count=<c>]
([--zoom=<zoom>]... | [--minzoom=<min>] [--maxzoom=<max>])
[--pghost=<host>] [--pgport=<port>] [--dbname=<db>]
[--user=<user>] [--password=<password>] [--verbose]
layer-stats variance <tileset> <layer> <column>
([--zoom=<zoom>]... | [--minzoom=<min>] [--maxzoom=<max>])
[--pghost=<host>] [--pgport=<port>] [--dbname=<db>]
[--user=<user>] [--password=<password>] [--verbose]
layer-stats --help
layer-stats --version
Methods:
frequency Shows how often each unique value occurs in a layer's column.
If more than one column is given, shows unique combinations.
toplength Shows the longest N values for a given layer's column.
variance Shows a few statistical metrics for a column's numeric value.
Options:
<tileset> Tileset definition yaml file
<column> Columns to analyze, could be more than one.
<layer> Which layer to examine
-z --zoom=<zoom> Limit testing to a specific zoom. If set, ignores min/max.
-m --minzoom=<min> Test tiles in zooms more or equal to this value [default: 0]
-n --maxzoom=<max> Test tiles in zooms less or equal to this value [default: 14]
--max-count=<max> For toplength, how many longest values to show [default: 30]
-v --verbose Print additional debugging information
--help Show this screen.
--version Show version.
PostgreSQL Options:
-h --pghost=<host> Postgres hostname. By default uses PGHOST env or "localhost" if not set.
-P --pgport=<port> Postgres port. By default uses PGPORT env or "5432" if not set.
-d --dbname=<db> Postgres db name. By default uses PGDATABASE env or "openmaptiles" if not set.
-U --user=<user> Postgres user. By default uses PGUSER env or "openmaptiles" if not set.
--password=<password> Postgres password. By default uses PGPASSWORD env or "openmaptiles" if not set.
These legacy environment variables should not be used, but they are still supported:
POSTGRES_HOST, POSTGRES_PORT, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD
"""
import asyncio
import re
from collections import defaultdict
import asyncpg
# noinspection PyProtectedMember
from docopt import docopt, DocoptExit
from tabulate import tabulate
import openmaptiles
from openmaptiles.pgutils import parse_pg_args, get_postgis_version, PgWarnings, \
print_query_error
from openmaptiles.sqltomvt import MvtGenerator
from openmaptiles.tileset import Tileset
from openmaptiles.utils import parse_zoom_list
async def main(args):
tileset_path = args['<tileset>']
pghost, pgport, dbname, user, password = parse_pg_args(args)
layer = args.layer
columns = args.column
max_count = args['--max-count']
for c in columns:
if not re.match(r'^[a-z_][a-z0-9_]*$', c):
raise DocoptExit(f"Column '{c}' is invalid")
zooms = parse_zoom_list(args['--zoom'], args['--minzoom'], args['--maxzoom'])
verbose = args['--verbose']
tileset = Tileset.parse(tileset_path)
conn = await asyncpg.connect(
database=dbname, host=pghost, port=pgport, user=user, password=password,
)
pg_warnings = PgWarnings(conn, delay_printing=True)
mvt = MvtGenerator(
tileset,
zoom=None, x=None, y=None,
layer_ids=[layer],
postgis_ver=await get_postgis_version(conn),
)
layer_def = tileset.layers_by_id[layer]
if layer_def.has_localized_names:
# Alter stored query to hide localized names
layer_def.definition['layer']['datasource']['query'] = \
layer_def.raw_query.format(name_languages='NULL as _hidden_names_')
raw_query = mvt.layer_to_query(layer_def, to_mvt_geometry=False)
query = mvt.substitute_sql(raw_query, 'CAST($1 as int)',
mvt.bbox(0, 0, 0)).strip()
if len(columns) > 1 and not args.frequency:
raise DocoptExit(f"The 'toplength' and 'variance' methods require "
f'a single column, but {len(columns)} was given')
if args.frequency:
method = 'value occurrence'
fields = ','.join(columns)
query = f"""
SELECT {fields}, count(*) AS _result
FROM {query}
GROUP BY {fields}
ORDER BY {fields};
""".strip()
elif args.toplength:
method = 'longest field values'
query = f"""
SELECT DISTINCT {columns[0]}, length( {columns[0]} ) AS _result
FROM {query}
WHERE length( {columns[0]} ) > 0
ORDER BY length( {columns[0]} ) DESC NULLS LAST
LIMIT {max_count};
""".strip()
elif args.variance:
method = 'field value variance'
query = f"""
SELECT
'z' || $1 as zoom,
COUNT( {columns[0]} ) as count,
MIN( {columns[0]} ) as min,
MAX( {columns[0]} ) as max,
AVG( {columns[0]} ) as avg,
STDDEV( {columns[0]} ) as stddev,
VARIANCE( {columns[0]} ) as variance
FROM {query};
""".strip()
else:
raise DocoptExit('Unknown method used')
if verbose:
print(f'\n======= Querying layer {layer} (#{layer_def.index}) =======\n'
f'{query}')
if args.variance:
results = []
else:
results = defaultdict(dict)
for zoom in zooms:
try:
query_result = await conn.fetch(query, zoom)
except asyncpg.PostgresError as err:
print_query_error(f'ERROR in {layer} layer',
err, pg_warnings, verbose, query)
return
for row in query_result:
if args.variance:
if row['count']:
results.append(dict(row))
else:
key = tuple(row[v] for v in columns)
results[key][zoom] = row['_result']
if pg_warnings.messages:
print(f'======= Layer {layer} has warnings at zoom {zoom} =======')
pg_warnings.print()
print(f"======= Analyzing {method} in layer '{layer}' "
f"for field{'s' if len(columns) > 1 else ''} [{', '.join(columns)}] =======")
if results:
if args.variance:
values = results
headers = 'keys'
else:
existing_zooms = set()
values = []
for key, zoom_dict in results.items():
vals = {v: fmt_val(key[i]) for i, v in enumerate(columns)}
for z, v in zoom_dict.items():
vals[f'z{z}'] = v
existing_zooms.add(z)
values.append(vals)
headers = {v: v for v in
(columns + [f'z{z}' for z in sorted(existing_zooms)])}
print(tabulate(values, headers=headers))
else:
print('No results were found')
def fmt_val(val):
return '<null>' if val is None else '<blank>' if val == '' else val
if __name__ == '__main__':
asyncio.run(main(docopt(__doc__, version=openmaptiles.__version__)))