forked from JuliaAI/MLJClusteringInterface.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
runtests.jl
108 lines (88 loc) · 2.81 KB
/
runtests.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import Clustering
import Distances
import LinearAlgebra: norm
using MLJBase
using MLJTestIntegration
using MLJClusteringInterface
using Random: seed!
using Test
seed!(132442)
X, y = @load_crabs
# # K_MEANS
@testset "KMeans" begin
barekm = KMeans()
fitresult, cache, report = fit(barekm, 1, X)
R = matrix(transform(barekm, fitresult, X))
X_array = matrix(X)
# distance from first point to second center
@test R[1, 2] ≈ norm(view(X_array, 1, :) .- view(fitresult[1], :, 2))^2
@test R[10, 3] ≈ norm(view(X_array, 10, :) .- view(fitresult[1], :, 3))^2
p = predict(barekm, fitresult, X)
@test argmin(R[1, :]) == p[1]
@test argmin(R[10, :]) == p[10]
end
# # K_MEDOIDS
@testset "KMedoids" begin
barekm = KMedoids()
fitresult, cache, report = fit(barekm, 1, X)
X_array = matrix(X)
R = matrix(transform(barekm, fitresult, X))
@test R[1, 2] ≈ Distances.evaluate(
barekm.metric, view(X_array, 1, :), view(fitresult[1], :, 2)
)
@test R[10, 3] ≈ Distances.evaluate(
barekm.metric, view(X_array, 10, :), view(fitresult[1], :, 3)
)
p = predict(barekm, fitresult, X)
@test all(report.assignments .== p)
end
# # DBSCAN
@testset "DBSCAN" begin
# five spot pattern
X = [
0.0 0.0
1.0 0.0
1.0 1.0
0.0 1.0
0.5 0.5
] |> MLJBase.table
# radius < √2 ==> 5 clusters
dbscan = DBSCAN(radius=0.1)
yhat1, report1 = predict(dbscan, nothing, X)
@test report1.nclusters == 5
@test report1.point_types == fill('B', 5)
@test Set(yhat1) == Set(unique(yhat1))
@test Set(report1.cluster_labels) == Set(unique(yhat1))
# DbscanCluster fields:
@test propertynames(report1.clusters[1]) == (:size, :core_indices, :boundary_indices)
# radius > √2 ==> 1 cluster
dbscan = DBSCAN(radius=√2+eps())
yhat, report = predict(dbscan, nothing, X)
@test report.nclusters == 1
@test report.point_types == fill('C', 5)
@test length(unique(yhat)) == 1
# radius < √2 && min_cluster_size = 2 ==> all points are noise
dbscan = DBSCAN(radius=0.1, min_cluster_size=2)
yhat, report = predict(dbscan, nothing, X)
@test report.nclusters == 0
@test report.point_types == fill('N', 5)
@test length(unique(yhat)) == 1
# MLJ integration:
model = DBSCAN(radius=0.1)
mach = machine(model) # no training data
yhat = predict(mach, X)
@test yhat == yhat1
@test MLJBase.report(mach).point_types == report1.point_types
@test MLJBase.report(mach).nclusters == report1.nclusters
end
@testset "MLJ interface" begin
models = [KMeans, KMedoids, DBSCAN]
failures, summary = MLJTestIntegration.test(
models,
X;
mod=@__MODULE__,
verbosity=0,
throw=false, # set to true to debug
)
@test isempty(failures)
end