-
Notifications
You must be signed in to change notification settings - Fork 1
/
subbasins.py
733 lines (589 loc) · 31.4 KB
/
subbasins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
r"""
Delineation of watershed subbasins, using data from
MERIT-Basins and MERIT-Hydro.
Created by Matthew Heberger, May 2024.
See README for more detailed instructions.
Quick Start:
First, set parameters in the file config.py.
Run this script from the command line with two required arguments:
$ python subbasins.py outlets.csv testrun
or with a full file path as follows on Windows or Linux:
$ python subbasins.py C:\Users\matt\Desktop\outlets.csv test
$ python subbasins.py /home/files/outlets.csv test
or in Python as follows:
>> from subbasins import delineate
>> delineate('outlets.csv', 'testrun')
"""
# Standard Python libraries. See requirements.txt for recommended versions.
import argparse
import sys
from shapely.geometry import Point
import topojson
import warnings
# My stuff
from config import * # This file contains a bunch of variables to be set before running this script
from py.consolidate import consolidate_network, show_area_stats
from py.util import make_folders, get_megabasins, load_gdf, plot_basins, calc_area, \
find_repeated_elements, fix_polygon, calc_length, validate, save_network, \
write_geodata, PROJ_WGS84 # Contains a bunch of functions
from py.graph_tools import make_river_network, calculate_strahler_stream_order, calculate_shreve_stream_order, \
prune_node, upstream_nodes # Functions for working with river network information as a Python NetworkX graph
from py.merit_detailed import split_catchment
from py.plot_network import draw_graph
from py.fast_dissolve import dissolve_geopandas, buffer, close_holes
from os.path import isfile
import geopandas as gpd
import pandas as pd
import networkx as nx
# Shapely throws a bunch of FutureWarnings. Safe to ignore for now, as long as we
# are using a virtual environment, and use the library versions in requirements.txt.
warnings.simplefilter(action='ignore', category=FutureWarning)
PIXEL_AREA = 0.000000695 # Constant for the area of a single pixel in MERIT-Hydro, in decimal degrees
FILL_AREA_MAX = FILL_THRESHOLD * PIXEL_AREA
def get_wshed_rows(df: gpd.GeoDataFrame, outlet):
"""
Extracts rows of the gages GeoDataFrame for an outlet and any upstream points.
"""
start_index = df[df['id'] == outlet].index[0]
# Slice the DataFrame from the start_index to the end
subset_df = df.iloc[start_index:]
# Find the index of the first row where is_outlet is True
indices = list(subset_df[subset_df['is_outlet'] == True].index)
if len(indices) > 1:
end_index = indices[1] - 1
# Select all rows from start_index to end_index (inclusive)
result_df = subset_df.loc[0:end_index]
else:
result_df = subset_df
return result_df
def delineate(input_csv: str, output_prefix: str):
"""
Finds the watershed for a set of outlets.
Make sure to set the variables in `config.py` before running.
Version #2, where all the points are in a single watershed,
and we are interested in returning a set of subbasins.
Reads a list of outlet points from a .csv file.
The first row should contain the main watershed outlet.
Subsequent rows of the .csv should contain uptstream points
within the watershed that we will use to create *subbasin* outlets.
Outputs geodata (.shp, .gpkg, etc.) and optionally a CSV file with a summary of results
(including the watershed id, names, and areas).
"""
# Make sure the user folders from `config.py` exist (for output & plots). If not create them.
make_folders()
# Read the outlet points CSV file and put the data into a Pandas DataFrame
# (I call the outlet points gages, because I usually in delineated watersheds at streamflow gages)
gages_gdf = make_gages_gdf(input_csv)
# Create a filtered version with only the *outlets*
outlets_gdf = gages_gdf[gages_gdf['is_outlet'] == True]
# Get the megabasin(s) in which the points are located
# This returns a dictionary. Key: megabasin, Value: list of outlets that are in the megabasin
# This way, we can process the gages one megabasin at a time, so we only have to read geodata files once.
gage_basins_dict = get_megabasins(outlets_gdf)
G = None
subbasins_gdf = None
myrivers_gdf = None
# Iterate over the megabasins
for megabasin in gage_basins_dict.keys():
# Iterate over the outlets:
outlets = gage_basins_dict[megabasin]
if VERBOSE: print('Reading geodata for unit catchments in megabasin %s' % megabasin)
catchments_gdf = load_gdf("catchments", megabasin, True)
# The _network_ data is in the RIVERS file rather than the CATCHMENTS file
# (this is just how the MERIT-Basins authors did it)
if VERBOSE: print('Reading geodata for rivers in megabasin %s' % megabasin)
rivers_gdf = load_gdf("rivers", megabasin, True)
rivers_gdf.set_index('COMID', inplace=True)
# We wish to report the outlet point for each subbasin.
# We can get this information from end point of the river polylines.
rivers_gdf['end_point'] = rivers_gdf['geometry'].apply(lambda x: x.coords[0])
rivers_gdf['lng'] = rivers_gdf['end_point'].apply(lambda x: x[0])
rivers_gdf['lat'] = rivers_gdf['end_point'].apply(lambda x: x[1])
for outlet in outlets:
wshed_gages_gdf = get_wshed_rows(gages_gdf, outlet)
wshed_G, wshed_subbasins_gdf, wshed_rivers_gdf = get_watershed(wshed_gages_gdf, megabasin,
catchments_gdf, rivers_gdf)
# Merge the results with the master
if G is not None:
G = nx.compose(G, wshed_G)
subbasins_gdf = pd.concat([subbasins_gdf, wshed_subbasins_gdf])
myrivers_gdf = pd.concat([myrivers_gdf, wshed_rivers_gdf])
else:
G = wshed_G
subbasins_gdf = wshed_subbasins_gdf
myrivers_gdf = wshed_rivers_gdf
# Finally, write the results to disk
gages_list = gages_gdf['id'].tolist()
write_outputs(G, myrivers_gdf, subbasins_gdf, gages_list, output_prefix)
if NETWORK_DIAGRAMS:
draw_graph(G, f'plots/{output_prefix}_network_final')
if VERBOSE:
print("Ran succesfully!")
def get_watershed(gages_gdf: gpd.GeoDataFrame, megabasin: int, catchments_gdf, rivers_gdf):
"""
Finds the watershed and subbasins upstream of an outlet, including any intermediate locations
(such as gages) where the subbasins should be split.
Inputs:
gages_gdf: GeoDataFrame with the gages to delineate subbasins for
:return:
"""
def addnode(B: list, node_id):
""""
Recursive function to assemble the list of upstream unit catchments
B is a Python List of the unit catchments that make up our watershed.
B is for BASIN...
List items are `comid`s, unique identifiers of each unit catchment.
"""
# first, append the node to the list that is in the Basin, upstream_comids
B.append(node_id)
# next, check whether the fields up1, up2, up3, and up4 contain a node ID
up1 = rivers_gdf['up1'].loc[node_id]
if up1 != 0:
addnode(B, up1)
up2 = rivers_gdf['up2'].loc[node_id]
if up2 != 0:
addnode(B, up2)
up3 = rivers_gdf['up3'].loc[node_id]
if up3 != 0:
addnode(B, up3)
up4 = rivers_gdf['up4'].loc[node_id]
if up4 != 0:
addnode(B, up4)
# Perform an overlay analysis on gages (points) and the unit catchments (polygons)
# to find the corresponding unit catchment in which each gages is located.
# Adds the fields COMID and unitarea to `gages_gdf`
gages_list = gages_gdf['id'].tolist() # Get the list before doing the join.
num_gages = len(gages_list)
if VERBOSE:
print(f"Performing overlay analysis on {num_gages} outlet points in basin #{megabasin}")
catchments_gdf.reset_index(inplace=True)
try:
catchments_gdf.drop(columns=['index'], inplace=True)
except:
pass
gages_gdf = gpd.overlay(gages_gdf, catchments_gdf, how="intersection", make_valid=True)
gages_gdf.set_index('id', inplace=True)
gages_gdf.set_crs(crs=PROJ_WGS84)
# For any gages for which we could not find a unit catchment, add issue a warning
# Basically checking which rows do not appear after doing the overlay
gages_matched_list = gages_gdf.index.tolist()
for id in gages_list:
if id not in gages_matched_list:
raise Warning(f"Could not assign to a unit catchment to gage with id {id}")
# First, let us find the set of unit catchments upstream of the outlet.
terminal_node_id = gages_gdf.index[0]
# The terminal comid is the unit catchment that contains (overlaps) the outlet point
terminal_comid = gages_gdf['COMID'].iloc[0]
# Let upstream_comids be the list of unit catchments (and river reaches) that are in the basin
upstream_comids = []
# Add the first node, and the rest will be added recursively
addnode(upstream_comids, terminal_comid)
# Next, check that all the other points provided by the user are
# fall in unit catchments that are in this set. Otherwise, it means they are not upstream
# of the outlet, and therefore we cannot process them to get expected results.
gage_list = gages_gdf.index.tolist()
for id in gage_list:
comid = gages_gdf.at[id, 'COMID']
if comid not in upstream_comids:
gages_gdf.drop(id, inplace=True)
raise Warning(f"The point with id = {id} is not contained in the watershed of the first point.")
# subbasins_gdf is the set of unit catchments in our watershed. This will ultimately become our output
catchments_gdf.set_index('COMID', inplace=True)
subbasins_gdf = catchments_gdf.loc[upstream_comids]
# Add lat, lng, and NextDownID to subbasins_gdf.
subbasins_gdf = subbasins_gdf.join(rivers_gdf[['lat', 'lng', 'NextDownID']])
# Re-name the NextDownID field, and make sure it is an integer
subbasins_gdf.rename(columns={'NextDownID': 'nextdown'}, inplace=True)
subbasins_gdf['nextdown'] = subbasins_gdf['nextdown'].astype(int)
subbasins_gdf['custom'] = False # Adds a column that shows whether a subbasin is connected to a custom pour point
if PLOTS: plot_basins(subbasins_gdf, gages_gdf, 'before')
# For debugging mostly
# G = make_river_network(subbasins_gdf, terminal_comid)
# if NETWORK_DIAGRAMS: draw_graph(G, f'plots/{OUTPUT_PREFIX}_network_before')
# Create two copies of river network data!
# `allrivers_gdf` will contain all the available polylines in the watershed, a nice
# visual representation of the river network.
# `myrivers_gdf` will contain the topologically correct network where there is exactly
# one polyline per unit catchment.
# TODO: Will need to update this feature if we want to keep it
# if OUTPUT_ALL_RIVERS:
# allrivers_gdf = rivers_gdf.loc[upstream_comids]
# fname = f"{OUTPUT_DIR}/{output_prefix}_allrivers.{OUTPUT_EXT}"
# write_geodata(allrivers_gdf, fname)
# del allrivers_gdf
# With this version, we will try to create a topologically correct visual representation
# of the river network, where there is a 1:1 mapping of river reaches to subbasins.
myrivers_gdf = rivers_gdf.loc[upstream_comids]
# For now, put the split polygon geometry into a field in `gages_gdf`
gages_gdf['polygon'] = None
gages_gdf['polygon_area'] = 0
# Iterate over the gages, and run `split_catchment()` for every gage
for gage_id in gages_gdf.index:
comid = gages_gdf.at[gage_id, 'COMID']
lat = gages_gdf.at[gage_id, 'lat']
lng = gages_gdf.at[gage_id, 'lng']
catchment_poly = subbasins_gdf.loc[comid, 'geometry']
is_leaf = rivers_gdf.at[comid, 'up1'] == 0 # A leaf is an unit catchment with no upstream neighbor.
# SPLIT
node_poly, lat_snap, lng_snap = split_catchment(gage_id, megabasin, lat, lng, catchment_poly, is_leaf)
gages_gdf.at[gage_id, 'polygon'] = node_poly
gages_gdf.at[gage_id, 'lat_snap'] = lat_snap
gages_gdf.at[gage_id, 'lng_snap'] = lng_snap
# Find the area of the clipped unit catchment and insert value into GeoDataFrame
area = calc_area(node_poly)
gages_gdf.at[gage_id, 'polygon_area'] = round(area, 1)
subbasins_gdf = update_split_catchment_geo(gage_id, gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gdf)
# Now, we no longer need the downstream portion of the terminal unit catchment
# so remove its row from the subbasins GeoDataFrame
subbasins_gdf = subbasins_gdf.drop(terminal_comid)
subbasins_gdf.at[terminal_node_id, 'nextdown'] = 0
# Create a NETWORK GRAPH of the river basin.
if VERBOSE: print("Creating Network GRAPH")
G = make_river_network(subbasins_gdf, terminal_comid)
if VERBOSE: show_area_stats(G)
G = calculate_shreve_stream_order(G)
G = calculate_strahler_stream_order(G)
# Add an attribute to the Graph, identifying the 'custom' nodes added by the script
for gage in gage_list:
G.nodes[gage]['custom'] = True
# Draw the network before consolidating? Mostly useful for debugging.
# if NETWORK_DIAGRAMS: draw_graph(G, f'plots/{OUTPUT_PREFIX}_premerge')
# CHECK FOR Null Geometries.
# If the user has placed one of their points very close to an existing basin outlet,
# we can get a null geometry. This then creates problems. So remove it.
null_nodes = subbasins_gdf.index[subbasins_gdf['geometry'].is_empty].to_list()
subbasins_gdf.drop(null_nodes, inplace=True)
# Remove the null nodes from the graph
for node in null_nodes:
prune_node(G, node)
if len(null_nodes) > 0:
if VERBOSE: print(f"Pruned {len(null_nodes)} empty unit catchments from the river network.")
print(null_nodes)
# if NETWORK_DIAGRAMS: draw_graph(G, f'plots/{OUTPUT_PREFIX}_network_pruned')
null_rivers = myrivers_gdf.index[myrivers_gdf['geometry'].is_empty].tolist()
myrivers_gdf.drop(null_rivers, inplace=True)
# SIMPLIFY the geodata?
# Can speed up subsequent geo operations, but sometimes also causes problems!
if SIMPLIFY:
topo = topojson.Topology(subbasins_gdf, prequantize=False)
subbasins_gdf = topo.toposimplify(SIMPLIFY_TOLERANCE).to_gdf()
# The simplification often creates topology errors (ironic!), so try my trick to fix.
subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: buffer(p))
topo = topojson.Topology(myrivers_gdf, prequantize=False)
myrivers_gdf = topo.toposimplify(SIMPLIFY_TOLERANCE).to_gdf()
# Add attributes for subbasin area and river reach length to the Graph
# We will keep track of these quantities as we delete and merge nodes
for node in list(G.nodes):
area = subbasins_gdf.at[node, 'unitarea']
G.nodes[node]['area'] = round(area, 1)
try:
length = myrivers_gdf.at[node, 'lengthkm']
except Exception:
length = 0
G.nodes[node]['length'] = round(length, 1)
# If the user wants larger subbasins, we can merge adjacent unit catchments.
# I call this "consolidating the river network."
if CONSOLIDATE:
G, MERGES, rivers2merge, rivers2delete = consolidate_network(G, threshold_area=MAX_AREA)
# if NETWORK_DIAGRAMS: draw_graph(G, f'plots/{OUTPUT_PREFIX}_network_final')
subbasins_gdf['target'] = subbasins_gdf.index
# Dissolve unit catchments based on information in MERGES.
for node, target in MERGES.items():
subbasins_gdf.at[node, 'target'] = target
agg = {
'unitarea': 'sum',
'lat': 'last',
'lng': 'last',
}
if VERBOSE: print("Dissolving geometries. Can be slow. Please wait...")
subbasins_gdf = subbasins_gdf.dissolve(by="target", aggfunc=agg)
subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: buffer(p))
subbasins_gdf.geometry = subbasins_gdf.geometry.apply(lambda p: close_holes(p, 0))
subbasins_gdf.reset_index(inplace=True)
subbasins_gdf.rename(columns={'target': 'comid'}, inplace=True)
subbasins_gdf.set_index('comid', inplace=True)
# After the dissolve operation, no way to preserve correct information in column 'nextdown'
# But it is present in the Graph, so update the GeoDataFrame subbasins_gdf with that information
# Add column `nextdownid` and the stream orders based on data in the graph
subbasins_gdf['nextdown'] = 0
for idx in subbasins_gdf.index:
try:
nextdown = list(G.successors(idx))[0]
except Exception:
nextdown = 0
subbasins_gdf.at[idx, 'nextdown'] = nextdown
# Round all the areas to four decimals (just for appearances)
subbasins_gdf['unitarea'] = subbasins_gdf['unitarea'].round(1)
subbasins_gdf['lat'] = subbasins_gdf['lat'].round(4)
subbasins_gdf['lng'] = subbasins_gdf['lng'].round(4)
if CONSOLIDATE:
# Now handle the river reaches, deleting some rows, dissolving others.
myrivers_gdf.drop(rivers2delete, inplace=True)
if len(rivers2merge) > 0:
myrivers_gdf['target'] = myrivers_gdf.index
for target, node_list in rivers2merge.items():
for node in node_list:
myrivers_gdf.at[node, 'target'] = target
agg = {'lengthkm': 'sum'}
myrivers_gdf = myrivers_gdf.dissolve(by="target", aggfunc=agg)
myrivers_gdf.reset_index(inplace=True)
myrivers_gdf.rename(columns={'target': 'comid'}, inplace=True)
myrivers_gdf.set_index('comid', inplace=True)
# We can now delete the river reach segment that belonged to the downstream portion of the
# terminal unit catchment, as we no longer need it.
try:
myrivers_gdf.drop(terminal_comid, inplace=True)
except Exception:
pass
# Add the fields `nextdown` and the stream orders to the rivers.
for idx in myrivers_gdf.index:
try:
nextdown = list(G.successors(idx))[0]
except Exception:
nextdown = 0
myrivers_gdf.at[idx, 'nextdown'] = nextdown
myrivers_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
myrivers_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']
# Update the lat/lng coordinates of the subbasin outlets
subbasins_gdf['custom'] = False
subbasins_gdf['strahler_order'] = 0
subbasins_gdf['shreve_order'] = 0
for idx in subbasins_gdf.index:
subbasins_gdf.at[idx, 'strahler_order'] = G.nodes[idx]['strahler_order']
subbasins_gdf.at[idx, 'shreve_order'] = G.nodes[idx]['shreve_order']
if idx in rivers_gdf.index:
subbasins_gdf.at[idx, 'lat'] = rivers_gdf.at[idx, 'lat']
subbasins_gdf.at[idx, 'lng'] = rivers_gdf.at[idx, 'lng']
for gage in gage_list:
subbasins_gdf.at[gage, 'custom'] = True
# Before exporting geodata, make 'comid' a regular column
try:
subbasins_gdf.drop(columns=['COMID'], inplace=True)
except Exception:
pass
subbasins_gdf.reset_index(inplace=True)
subbasins_gdf.rename(columns={'index': 'comid'}, inplace=True)
myrivers_gdf.reset_index(inplace=True)
myrivers_gdf.rename(columns={'index': 'comid'}, inplace=True)
# The rivers data will no longer be accurate, so drop these columns
cols = ["lengthdir", "sinuosity", "slope", "uparea", "order", "strmDrop_t", "slope_taud", "NextDownID",
"maxup", "up1", "up2", "up3", "up4", "end_point", "lat", "lng"]
try:
myrivers_gdf.drop(columns=cols, inplace=True)
except Exception:
pass
return G, subbasins_gdf, myrivers_gdf
def update_split_catchment_geo(gage_id, gages_gdf, myrivers_gdf, rivers_gdf, subbasins_gdf):
# Handle the case where we had more than one gage point in a single unit catchment.
# (1) figure out their order from upstream to downstream based on split catchment area
# (2) clip the polygons as appropriate
# (3) insert this data into subbasins_gdf, including polygon geometry and `nextdown` gage id
# Get a list of unit catchments that contains outlet points (may contain duplicate items)
comids = gages_gdf['COMID'].tolist()
# Get a list of unit catchments which contains more than one outlet point
repeats = find_repeated_elements(comids)
# Get a list of unit catchments that contain a single outlet point
singles = [item for item in comids if item not in repeats]
# Drop columns that we no longer need.
gages_gdf.drop(columns=['lat', 'lng', 'geometry', 'unitarea'], inplace=True)
# Rename certain columns to make it identical to `subbasins_gdf` so we can concatenate the rows.
rnmap = {'lat_snap': 'lat',
'lng_snap': 'lng',
'polygon_area': 'unitarea',
'polygon': 'geometry'
}
gages_gdf.rename(columns=rnmap, inplace=True)
gages_gdf.set_crs(crs=PROJ_WGS84)
gages_gdf.set_geometry(col="geometry")
# First, handle the gages where there is only one gage in a unit catchment (standard treatment)
# The new unit catchments (or nodes in the network) will always be upstream of the unit catchment
# that we are inserting it into
# insert these rows into `subbasins_gdf`
if len(singles) > 0:
selected_rows = gages_gdf[gages_gdf['COMID'].isin(singles)]
selected_rows.set_crs(crs=PROJ_WGS84) # Just needed to eliminate an annoying warning
# This creates the dictionary `new_nodes` that maps gage id : unit catchment comid
new_nodes = selected_rows['COMID'].to_dict()
selected_rows.drop(columns=['COMID'], inplace=True)
# Add the column `nextdown` to our temporary GeoDataFrame selected_rows to prevent type problems below
selected_rows['nextdown'] = -999
# Here is where we add the newly-created split catchments corresponding to our outlet points.
subbasins_gdf = pd.concat([subbasins_gdf, selected_rows])
# Find and fix the 'nextdown' attribute for any nodes that were u/s of terminal catchments
# into which we just inserted our nodes.
# Add the column `nextdown`; we will populate it below
for node, comid in new_nodes.items():
rows = subbasins_gdf['nextdown'] == comid
subbasins_gdf.loc[rows, 'nextdown'] = node
subbasins_gdf.at[node, 'nextdown'] = comid
# Subtract the polygon geometry of the split catchment from its parent unit catchment
comid_poly = subbasins_gdf.loc[comid, 'geometry']
node_poly = subbasins_gdf.loc[node, 'geometry']
updated_poly = comid_poly.difference(node_poly)
updated_poly = fix_polygon(updated_poly)
subbasins_gdf.loc[comid, 'geometry'] = updated_poly
# Split the river polyline at the new unit catchment boundary
# First, get the piece in what is left of the old unit catchment
comid_line = rivers_gdf.at[comid, 'geometry']
updated_line = comid_line.intersection(updated_poly)
length = calc_length(updated_line)
myrivers_gdf.at[comid, 'geometry'] = updated_line
myrivers_gdf.at[comid, 'lengthkm'] = length
# Now get the river reach polyline for the new node
node_line = comid_line.intersection(node_poly)
length = calc_length(node_line)
myrivers_gdf.at[node, 'geometry'] = node_line
myrivers_gdf.at[node, 'lengthkm'] = length
# Next, handle the case where there are multiple gages in a single unit catchment (special treatment)
# We need to handle these one unit catchment at a time.
# I call these nested outlets for want of a better name.
# (Note: It is not really possible to understand the following code without a picture of what it is doing!!!)
for comid in repeats:
# Find all the gages that fall in this unit catchment.
gages_set = gages_gdf[gages_gdf['COMID'] == comid]
gages_set.set_crs(crs=PROJ_WGS84)
# We want to handle these in order from downstream to upstream.
# This is the same as largest area to smallest area
gages_set.sort_values(by='unitarea', inplace=True)
# This one-liner fills in the correct network connection information for nested outlets
gages_set['nextdown'] = gages_set.index.to_series().shift(-1).fillna(comid)
gages_set.sort_values(by='unitarea', ascending=False, inplace=True)
subbasins_gdf = pd.concat([subbasins_gdf, gages_set])
# Get the (whole) river polyline in the original, unsplit unit catchment, so we can split into pieces
comid_poly = subbasins_gdf.at[comid, 'geometry']
comid_line = rivers_gdf.at[comid, 'geometry']
# Get a list of the gages
gages = gages_set.index
# Update the catchment polygon for the base unit catchment by subtracting the *first* clipped catchment
first_node = gages[0]
poly1 = gages_set.at[first_node, 'geometry']
updated_poly = comid_poly.difference(poly1)
subbasins_gdf.at[comid, 'geometry'] = updated_poly
area = calc_area(updated_poly)
subbasins_gdf.at[comid, 'unitarea'] = area
# Update the river polyline for the base unit catchment
node_line = comid_line.intersection(updated_poly)
myrivers_gdf.at[comid, 'geometry'] = node_line
length = calc_length(node_line)
myrivers_gdf.at[comid, 'lengthkm'] = length
# Update the last (most upstream, smallest) unit catchment's polygon and line
n = len(gages)
last_node = gages[n - 1]
last_node_poly = gages_set.at[last_node, 'geometry']
subbasins_gdf.at[last_node, 'geometry'] = last_node_poly
area = calc_area(last_node_poly)
subbasins_gdf.at[last_node, 'unitarea'] = area
node_line = comid_line.intersection(last_node_poly)
myrivers_gdf.at[last_node, 'geometry'] = node_line
length = calc_length(node_line)
myrivers_gdf.at[last_node, 'lengthkm'] = length
for i in range(0, n - 1):
# Handle the unit catchments from largest to smallest
node_poly = gages_set.at[gages[i], 'geometry']
nextup_poly = gages_set.at[gages[i + 1], 'geometry']
updated_poly = node_poly.difference(nextup_poly)
updated_poly = fix_polygon(updated_poly)
subbasins_gdf.at[gages[i], 'geometry'] = updated_poly
area = calc_area(updated_poly)
subbasins_gdf.at[gage_id, 'unitarea'] = area
# Insert the new, clipped river reach polyline into our rivers GeoDataFrame
node_line = comid_line.intersection(updated_poly)
myrivers_gdf.at[gages[i], 'geometry'] = node_line
length = calc_length(node_line)
myrivers_gdf.at[gages[i], 'lengthkm'] = length
# After the last nested gage, we need to fix the connection info for
# any rows that previously had the unit catchment with comid as its 'nextdown'
rows = subbasins_gdf['nextdown'] == comid
indices = list(rows.index[rows])
indices.remove(first_node)
subbasins_gdf.loc[indices, 'nextdown'] = last_node
return subbasins_gdf
def make_gages_gdf(input_csv: str) -> gpd.GeoDataFrame:
"""
Reads user data from a CSV file containing information about the desired watershed outlet points.
and returns a GeoPandas GeoDataFrame where the geometry field contains XY points in unprojected
lat, lng (CRS 4326).
"""
# Check that the CSV file is there
if not isfile(input_csv):
raise Exception(f"Could not find your outlets file at: {input_csv}")
if VERBOSE: print(f"Reading your outlets data in: {input_csv}")
gages_df = pd.read_csv(input_csv, header=0, skipinitialspace=True,
dtype={'id': 'str', 'lat': 'float', 'lng': 'float'})
# Check that the CSV file includes at a minimum: id, lat, lng and that all values are appropriate
validate(gages_df)
# Convert gages_df to a GeoPandas GeoDataFrame (adds geography, lets us do geo. operations)
coordinates = [Point(xy) for xy in zip(gages_df['lng'], gages_df['lat'])]
gages_gdf = gpd.GeoDataFrame(gages_df, crs=PROJ_WGS84, geometry=coordinates)
return gages_gdf
def write_outputs(G, myrivers_gdf, subbasins_gdf, gages_list, output_prefix):
# (0) Save the river network GRAPH
if SAVE_NETWORK:
save_network(G, output_prefix, NETWORK_FILE_EXT)
# Save the GEODATA for (1) subbasins, (2) outlets, and (3) rivers
# (1) SUBBASINS
fname = f"{OUTPUT_DIR}/{output_prefix}_subbasins.{OUTPUT_EXT}"
write_geodata(subbasins_gdf, fname)
# (2) Get the OUTLETS data and write it to disk
outlets_gdf = subbasins_gdf.copy()
outlets_gdf['geometry'] = outlets_gdf.apply(lambda row: Point(row['lng'], row['lat']), axis=1)
fname = f"{OUTPUT_DIR}/{output_prefix}_outlets.{OUTPUT_EXT}"
write_geodata(outlets_gdf, fname)
# (3) Write the RIVERS data to disk.
fname = f"{OUTPUT_DIR}/{output_prefix}_rivers.{OUTPUT_EXT}"
write_geodata(myrivers_gdf, fname)
# Finally, return the larger watersheds for each outlet point, if desired
if WATERSHEDS:
create_watersheds(G, gages_list, subbasins_gdf)
def create_watersheds(G, gages_list, subbasins_gdf):
"""
Creates the watersheds (single polygon) upstream of each user outlet point
and writes geodata to disk as watershed_##.gpkg
"""
subbasins_gdf.set_index('comid', inplace=True)
if VERBOSE: print(f"Creating MERGED watersheds geodata for {len(gages_list)} outlets.")
# Iterate over each of the user's nodes
for node in gages_list:
if VERBOSE: print(f"Creating watershed for outlet: {node}")
upnodes = upstream_nodes(G, node)
upnodes.append(node)
mysubs_gdf = subbasins_gdf.loc[upnodes]
if len(upnodes) == 1:
watershed_gdf = mysubs_gdf
else:
mysubs_gdf.geometry = mysubs_gdf.geometry.apply(lambda p: buffer(p))
watershed_gs = dissolve_geopandas(mysubs_gdf)
watershed_gdf = gpd.GeoDataFrame(watershed_gs, columns=['geometry'])
watershed_gdf['id'] = node
if FILL:
watershed_gdf.geometry = watershed_gdf.geometry.apply(lambda p: close_holes(p, FILL_AREA_MAX))
fname = f"{OUTPUT_DIR}/wshed_{node}.{OUTPUT_EXT}"
write_geodata(watershed_gdf, fname)
def _run_from_terminal():
"""
Routine which is run when you call subbasins.py from the command line.
should be run like:
python subbasins.py outlets.csv run_name
"""
# Create the parser for command line inputs.
description = "Delineate subbasins using data in and input CSV file. Writes " \
"a set of output files beginning with the output prefix string."
parser = argparse.ArgumentParser(description=description)
# Add the arguments
parser.add_argument('input_csv', help="Input CSV filename, for example 'gages.csv'")
parser.add_argument('output_prefix', help="Output prefix, a string. The output files will start with this string")
# Parse the arguments
args = parser.parse_args()
# Call the main function, passing the command line arguments
delineate(args.input_csv, args.output_prefix)
def main():
# Run directly, for convenience or during development and debugging
input_csv = 'outlets.csv'
out_prefix = 'iceland'
delineate(input_csv, out_prefix)
if __name__ == "__main__":
if len(sys.argv) > 1:
# Run with command-line arguments
_run_from_terminal()
else:
main()