-
Notifications
You must be signed in to change notification settings - Fork 5
/
hxcd.py
201 lines (152 loc) · 5.87 KB
/
hxcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 25 15:09:51 2021
@author: heatherkay
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame
from scipy.optimize import curve_fit
import geopandas as gpd
import glob
from os import path
from scipy.stats import gaussian_kde
def grid_hxcd(folderin, fileout, naming=4, eco_loc=2):
"""
Function to calculate mean of top 10% of canopy height and canopy density
for each polygon (intersection of 1 degree grid and wwf ecoregions)
Parameters
----------
folderin: string
Filepath for folder with files ready for analysis
naming: int
Section of filename to obtain ID (here grid number). Obtained
by splitting filename by '_' and indexing
Default = 3
eco_loc: int
Section of filename to obtain ecoregion (if applicable).
Obtained as with naming
Default = 2
fileout: string
Filepath for results file ending '.csv'
folderout: string
Filepath for folder to save the figures
"""
#using 'file' to title plot
fileList = glob.glob(folderin + '*.shp')
#create df for results
results = pd.DataFrame(columns = ['eco', 'ID', 'height', 'cd', 'result', 'deg_free'])
#resultsb = pd.DataFrame(columns = ['eco', 'ID', 'qout', 'r_sq', 'deg_free', 'rmse'])
for file in fileList:
df = gpd.read_file(file)
if df.empty:
continue
hd, tl = path.split(file)
shp_lyr_name = path.splitext(tl)[0]
name_comp = shp_lyr_name.split('_')
name = name_comp[naming]
eco = name_comp[eco_loc]
print(name)
print(eco)
#remove data with H_100 >= 0 prior to logging
test2 = df[df['i_h100']>=0]
#means x is just the h100 data - needs logging to normalise (not skewed)
x = test2['i_h100']
#create new column in df with log of H_100
y = np.log(x)
test2a = test2.assign(log_i_h100 = y)
if test2a.empty:
continue
#get quantiles
a = np.quantile(test2a['log_i_h100'],0.95)
b = np.quantile(test2a['log_i_h100'],0.05)
#remove data outside of 5% quantiles
test3 = test2a[test2a.log_i_h100 >b]
final = test3[test3.log_i_h100 <a]
if final.empty:
continue
del a, b, x, y, test2, test2a, test3
#get 10% quantiles
height = np.quantile(final['i_h100'], 0.10)
cd = np.quantile(final['i_cd'], 0.10)
mean_h = height.mean()
mean_cd = cd.mean()
result = mean_h * mean_cd
footprints = len(final['i_h100'])
if footprints < 100:
print(name +'_' + eco)
continue
results = results.append({'eco': eco, 'ID': name, 'height': mean_h,
'cd': mean_cd, 'result': result,
'deg_free': footprints}, ignore_index=True)
results.to_csv(fileout)
def grid_mean_hxcd(folderin, fileout, naming=4, eco_loc=2):
"""
Function to calculate mean canopy height and canopy density
for each polygon (intersection of 1 degree grid and wwf ecoregions)
Parameters
----------
folderin: string
Filepath for folder with files ready for analysis
naming: int
Section of filename to obtain ID (here grid number). Obtained
by splitting filename by '_' and indexing
Default = 3
eco_loc: int
Section of filename to obtain ecoregion (if applicable).
Obtained as with naming
Default = 2
fileout: string
Filepath for results file ending '.csv'
folderout: string
Filepath for folder to save the figures
"""
#using 'file' to title plot
fileList = glob.glob(folderin + '*.shp')
#create df for results
results = pd.DataFrame(columns = ['eco', 'ID', 'height', 'cd', 'result', 'deg_free'])
#resultsb = pd.DataFrame(columns = ['eco', 'ID', 'qout', 'r_sq', 'deg_free', 'rmse'])
for file in fileList:
df = gpd.read_file(file)
if df.empty:
continue
hd, tl = path.split(file)
shp_lyr_name = path.splitext(tl)[0]
name_comp = shp_lyr_name.split('_')
name = name_comp[naming]
eco = name_comp[eco_loc]
print(name)
print(eco)
#remove data with H_100 >= 0 prior to logging
test2 = df[df['i_h100']>=0]
#means x is just the h100 data - needs logging to normalise (not skewed)
x = test2['i_h100']
#create new column in df with log of H_100
y = np.log(x)
test2a = test2.assign(log_i_h100 = y)
if test2a.empty:
continue
#get quantiles
a = np.quantile(test2a['log_i_h100'],0.95)
b = np.quantile(test2a['log_i_h100'],0.05)
#remove data outside of 5% quantiles
test3 = test2a[test2a.log_i_h100 >b]
final = test3[test3.log_i_h100 <a]
if final.empty:
continue
del a, b, x, y, test2, test2a, test3
footprints = len(final['i_h100'])
if footprints < 100:
print(name +'_' + eco)
continue
height = final['i_h100']
cd = final['i_cd']
mean_h = height.mean()
mean_cd = cd.mean()
result = mean_h * mean_cd
results = results.append({'eco': eco, 'ID': name, 'height': mean_h,
'cd': mean_cd, 'result': result,
'deg_free': footprints}, ignore_index=True)
results.to_csv(fileout)