forked from geocompx/geocompr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
_12-ex.Rmd
240 lines (210 loc) · 9.34 KB
/
_12-ex.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
```{asis 12-ex-asis1, message=FALSE}
The solutions assume the following packages are attached (other packages will be attached when needed):
```
```{r 12-ex-e0, message=FALSE, warning=FALSE}
library(dplyr)
# library(kernlab)
library(mlr3)
library(mlr3learners)
library(mlr3extralearners)
library(mlr3spatiotempcv)
library(mlr3tuning)
library(qgisprocess)
library(terra)
# library(rlang)
library(sf)
library(tmap)
```
E1. Compute the following terrain attributes from the `elev` dataset loaded with `terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))$elev` with the help of R-GIS bridges (see this [Chapter](https://r.geocompx.org/gis.html#gis)):
- Slope
- Plan curvature
- Profile curvature
- Catchment area
```{r 12-ex-e1-1, eval=FALSE}
# attach data
dem = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))$elev
algs = qgisprocess::qgis_algorithms()
dplyr::filter(algs, grepl("curvature", algorithm))
alg = "saga:slopeaspectcurvature"
qgisprocess::qgis_show_help(alg)
qgisprocess::qgis_arguments(alg)
# terrain attributes (ta)
out_nms = paste0(tempdir(), "/", c("slope", "cplan", "cprof"),
".sdat")
args = rlang::set_names(out_nms, c("SLOPE", "C_PLAN", "C_PROF"))
out = qgis_run_algorithm(alg, ELEVATION = dem, METHOD = 6,
UNIT_SLOPE = "[1] degree",
!!!args,
.quiet = TRUE
)
ta = out[names(args)] |> unlist() |> terra::rast()
names(ta) = c("slope", "cplan", "cprof")
# catchment area
dplyr::filter(algs, grepl("[Cc]atchment", algorithm))
alg = "saga:catchmentarea"
qgis_show_help(alg)
qgis_arguments(alg)
carea = qgis_run_algorithm(alg,
ELEVATION = dem,
METHOD = 4,
FLOW = file.path(tempdir(), "carea.sdat"))
# transform carea
carea = terra::rast(carea$FLOW[1])
log10_carea = log10(carea)
names(log10_carea) = "log10_carea"
# add log_carea and dem to the terrain attributes
ta = c(ta, dem, log10_carea)
```
E2. Extract the values from the corresponding output rasters to the `lsl` data frame (`data("lsl", package = "spDataLarge"`) by adding new variables called `slope`, `cplan`, `cprof`, `elev` and `log_carea` (see this [section](https://r.geocompx.org/spatial-cv.html#case-landslide) for details).
```{r 12-ex-e2, eval=FALSE}
# attach terrain attribute raster stack (in case you have skipped the previous
# exercise)
data("lsl", package = "spDataLarge")
ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
lsl = dplyr::select(lsl, x, y, lslpts)
# extract values to points, i.e., create predictors
lsl[, names(ta)] = terra::extract(ta, lsl[, c("x", "y")]) |>
dplyr::select(-ID)
```
E3. Use the derived terrain attribute rasters in combination with a GLM to make a spatial prediction map similar to that shown in this [Figure](https://r.geocompx.org/spatial-cv.html#fig:lsl-susc).
Running `data("study_mask", package = "spDataLarge")` attaches a mask of the study area.
```{r 12-ex-e3, eval=FALSE}
# attach data (in case you have skipped exercises 1) and 2)
# landslide points with terrain attributes and terrain attribute raster stack
data("lsl", "study_mask", package = "spDataLarge")
ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
# fit the model
fit = glm(lslpts ~ slope + cplan + cprof + elev + log10_carea,
data = lsl, family = binomial())
# make the prediction
pred = terra::predict(object = ta, model = fit, type = "response")
# make the map
lsl_sf = sf::st_as_sf(lsl, coords = c("x", "y"), crs = 32717)
lsl_sf = sf::st_as_sf(lsl, coords = c("x", "y"), crs = 32717)
hs = terra::shade(ta$slope * pi / 180,
terra::terrain(ta$elev, v = "aspect", unit = "radians"))
rect = tmaptools::bb_poly(raster::raster(hs))
bbx = tmaptools::bb(raster::raster(hs), xlim = c(-0.00001, 1),
ylim = c(-0.00001, 1), relative = TRUE)
# white raster to only plot the axis ticks, otherwise gridlines would be visible
tm_shape(hs, bbox = bbx) +
tm_grid(col = "black", n.x = 1, n.y = 1, labels.inside.frame = FALSE,
labels.rot = c(0, 90)) +
tm_raster(palette = "white", legend.show = FALSE) +
# hillshade
tm_shape(terra::mask(hs, study_mask), bbox = bbx) +
tm_raster(palette = gray(0:100 / 100), n = 100,
legend.show = FALSE) +
# prediction raster
tm_shape(terra::mask(pred, study_mask)) +
tm_raster(alpha = 0.5, palette = "Reds", n = 6, legend.show = TRUE,
title = "Susceptibility") +
# rectangle and outer margins
qtm(rect, fill = NULL) +
tm_layout(outer.margins = c(0.04, 0.04, 0.02, 0.02), frame = FALSE,
legend.position = c("left", "bottom"),
legend.title.size = 0.9)
```
E4. Compute a 100-repeated 5-fold non-spatial cross-validation and spatial CV based on the GLM learner and compare the AUROC values from both resampling strategies with the help of boxplots (see this [Figure](https://r.geocompx.org/spatial-cv.html#fig:boxplot-cv)).
Hint: You need to specify a non-spatial resampling strategy.
Another hint: You might want to solve Excercises 4 to 6 in one go with the help of `mlr3::benchmark()` and `mlr3::benchmark_grid()` (for more information, please refer to https://mlr3book.mlr-org.com/performance.html#benchmarking).
When doing so, keep in mind that the computation can take very long, probably several days.
This, of course, depends on your system.
Computation time will be shorter the more RAM and cores you have at your disposal.
```{r 12-ex-e4, eval=FALSE}
# attach data (in case you have skipped exercises 1) and 2)
data("lsl", package = "spDataLarge") # landslide points with terrain attributes
# create task
task = TaskClassifST$new(
id = "lsl_ecuador",
backend = mlr3::as_data_backend(lsl), target = "lslpts", positive = "TRUE",
coordinate_names = c("x", "y"),
coords_as_features = FALSE,
crs = 32717
)
# construct learners (for all subsequent exercises)
# GLM
lrn_glm = lrn("classif.log_reg", predict_type = "prob")
lrn_glm$fallback = lrn("classif.featureless", predict_type = "prob")
# SVM
# construct SVM learner (using ksvm function from the kernlab package)
lrn_ksvm = lrn("classif.ksvm", predict_type = "prob", kernel = "rbfdot",
type = "C-svc")
lrn_ksvm$fallback = lrn("classif.featureless", predict_type = "prob")
# specify nested resampling and adjust learner accordingly
# five spatially disjoint partitions
tune_level = rsmp("spcv_coords", folds = 5)
# use 50 randomly selected hyperparameters
terminator = trm("evals", n_evals = 50)
tuner = tnr("random_search")
# define the outer limits of the randomly selected hyperparameters
ps = ps(
C = p_dbl(lower = -12, upper = 15, trafo = function(x) 2^x),
sigma = p_dbl(lower = -15, upper = 6, trafo = function(x) 2^x)
)
at_ksvm = AutoTuner$new(
learner = lrn_ksvm,
resampling = tune_level,
measure = msr("classif.auc"),
search_space = ps,
terminator = terminator,
tuner = tuner
)
# QDA
lrn_qda = lrn("classif.qda", predict_type = "prob")
lrn_qda$fallback = lrn("classif.featureless", predict_type = "prob")
# SVM without tuning hyperparameters
vals = lrn_ksvm$param_set$values
lrn_ksvm_notune = lrn_ksvm$clone()
lrn_ksvm_notune$param_set$values = c(vals, C = 1, sigma = 1)
# define resampling strategies
# specify the reampling method, i.e. spatial CV with 100 repetitions and 5 folds
# -> in each repetition dataset will be splitted into five folds
# method: repeated_spcv_coords -> spatial partioning
rsmp_sp = rsmp("repeated_spcv_coords", folds = 5, repeats = 100)
# method: repeated_cv -> non-spatial partitioning
rsmp_nsp = rsmp("repeated_cv", folds = 5, repeats = 100)
# (spatial) cross-validataion
#****************************
# create your design
grid = benchmark_grid(tasks = task,
learners = list(lrn_glm, at_ksvm, lrn_qda,
lrn_ksvm_notune),
resamplings = list(rsmp_sp, rsmp_nsp))
# execute the cross-validation
library(future)
# execute the outer loop sequentially and parallelize the inner loop
future::plan(list("sequential", "multisession"),
workers = floor(availableCores() / 2))
set.seed(021522)
bmr = benchmark(grid,
store_backends = FALSE,
store_models = FALSE,
encapsulate = "evaluate")
# stop parallelization
future:::ClusterRegistry("stop")
# save your result, e.g. to
# saveRDS(bmr, file = "extdata/12-bmr.rds")
# plot your result
autoplot(bmr, measure = msr("classif.auc"))
```
E5. Model landslide susceptibility using a quadratic discriminant analysis (QDA).
Assess the predictive performance of the QDA.
What is the a difference between the spatially cross-validated mean AUROC value of the QDA and the GLM?
```{r 12-ex-e5, eval=FALSE}
# attach data (in case you have skipped exercise 4)
bmr = readRDS("extdata/12-bmr.rds")
# plot your result
autoplot(bmr, measure = msr("classif.auc"))
# QDA has higher AUROC values on average than GLM which indicates moderately
# non-linear boundaries
```
E6. Run the SVM without tuning the hyperparameters.
Use the `rbfdot` kernel with $\sigma$ = 1 and *C* = 1.
Leaving the hyperparameters unspecified in **kernlab**'s `ksvm()` would otherwise initialize an automatic non-spatial hyperparameter tuning.
```{r 12-ex-e6, eval=FALSE}
# attach data (in case you have skipped exercise 4)
bmr = readRDS("extdata/12-bmr.rds")
# plot your result
autoplot(bmr, measure = msr("classif.auc"))
```