forked from barko/dawg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
d_feat_map.ml
205 lines (174 loc) · 5.66 KB
/
d_feat_map.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
(* a feature map backed by the read-append Dog_io.RW *)
type t = {
num_observations : int;
dog_ra : Dog_io.RW.t;
active_id_to_feature : Dog_io.RW.qfeature Utils.IntMap.t;
inactive_id_to_feature : Dog_io.RW.qfeature Utils.IntMap.t;
}
let create dog_ra =
{
dog_ra;
active_id_to_feature = Utils.IntMap.empty;
inactive_id_to_feature = Utils.IntMap.empty;
num_observations = Dog_io.RW.num_observations dog_ra;
}
exception FeatureIdNotFound of Dog_t.feature_id
let add t feature_id vector status =
if Utils.IntMap.mem feature_id t.active_id_to_feature then
t (* silently drop *)
else if Utils.IntMap.mem feature_id t.inactive_id_to_feature then
t (* silently drop *)
else
try
let feature = Dog_io.RW.find t.dog_ra feature_id in
let () = Dog_io.RW.write t.dog_ra feature_id vector in
match status with
| `Active ->
let active_id_to_feature = Utils.IntMap.add feature_id
feature t.active_id_to_feature in
{ t with active_id_to_feature }
| `Inactive ->
let inactive_id_to_feature = Utils.IntMap.add feature_id
feature t.inactive_id_to_feature in
{ t with inactive_id_to_feature }
with (Dog_io.RW.FeatureIdNotFound _) ->
raise (FeatureIdNotFound feature_id)
let activate t feature_id =
try
let feature = Utils.IntMap.find feature_id t.inactive_id_to_feature in
let active_id_to_feature = Utils.IntMap.add feature_id feature
t.active_id_to_feature in
{ t with active_id_to_feature }
with Not_found ->
if Utils.IntMap.mem feature_id t.active_id_to_feature then
t (* already active: nothing to do *)
else
raise (FeatureIdNotFound feature_id)
let deactivate t feature_id =
try
let feature = Utils.IntMap.find feature_id t.active_id_to_feature in
let inactive_id_to_feature = Utils.IntMap.add feature_id feature
t.inactive_id_to_feature in
{ t with inactive_id_to_feature }
with Not_found ->
if Utils.IntMap.mem feature_id t.inactive_id_to_feature then
t (* already inactive: nothing to do *)
else
raise (FeatureIdNotFound feature_id)
let deactivate_if t f =
let active_id_to_feature, inactive_id_to_feature = Utils.IntMap.fold (
fun feature_id feature (active, inactive) ->
if f feature then
let inactive = Utils.IntMap.add feature_id feature inactive in
active, inactive
else
let active = Utils.IntMap.add feature_id feature active in
active, inactive
) t.active_id_to_feature (Utils.IntMap.empty, Utils.IntMap.empty)
in
{ t with active_id_to_feature; inactive_id_to_feature }
let q_find t feature_id =
try
Utils.IntMap.find feature_id t.active_id_to_feature
with Not_found ->
try
Utils.IntMap.find feature_id t.inactive_id_to_feature
with Not_found ->
raise (FeatureIdNotFound feature_id)
let map_vector t = function
| `Dense { Dog_io.RW.vector_id } ->
`Dense {
Vec.length = t.num_observations;
array = Dog_io.RW.array t.dog_ra;
offset = vector_id;
}
| `RLE { Dog_io.RW.vector_id } ->
`RLE {
Vec.length = t.num_observations;
array = Dog_io.RW.array t.dog_ra;
offset = vector_id;
}
let q_to_a t = function
| `Cat {
Dog_t.c_feature_id;
c_feature_name_opt;
c_anonymous_category;
c_categories;
c_cardinality;
c_vector;
} ->
`Cat {
Dog_t.c_feature_id;
c_feature_name_opt;
c_anonymous_category;
c_categories;
c_cardinality;
c_vector = map_vector t c_vector;
}
| `Ord {
Dog_t.o_feature_id;
o_feature_name_opt;
o_cardinality;
o_breakpoints;
o_vector;
} ->
`Ord {
Dog_t.o_feature_id;
o_feature_name_opt;
o_cardinality;
o_breakpoints;
o_vector = map_vector t o_vector;
}
let a_find_by_id t feature_id =
let qfeature = q_find t feature_id in
q_to_a t qfeature
let fold_active t f x0 =
Utils.IntMap.fold (
fun feature_id feature x ->
f (q_to_a t feature) x
) t.active_id_to_feature x0
let best_split_of_features t splitter =
fold_active t (
fun feature best_opt ->
let s_opt = splitter#best_split feature in
match best_opt, s_opt with
| Some (_, best_loss, best_split), Some (loss, split) ->
if best_loss < loss then
(* still superior *)
best_opt
else
(* new champ *)
Some (feature, loss, split)
| None, Some (loss, split) ->
(* first guy's always champ *)
Some (feature, loss, split)
| Some _, None -> best_opt
| None, None -> None
) None
let q_find_all_by_name feature_name map =
(* since feature names are not unique, we may have multiple features
satisfying the query *)
Utils.IntMap.fold (
fun _ feature features ->
match Feat_utils.name_of_feature feature with
| Some fn ->
if fn = feature_name then
feature :: features
else
features
| None -> features
) map []
let q_find_all_by_name t feature_name =
(q_find_all_by_name feature_name t.active_id_to_feature) @
(q_find_all_by_name feature_name t.inactive_id_to_feature)
let a_find_all_by_name t feature_name =
List.map (q_to_a t) (q_find_all_by_name t feature_name)
let a_find_all t = function
| `Id feature_id -> [a_find_by_id t feature_id]
| `Name feature_name -> a_find_all_by_name t feature_name
let num_observations { num_observations } =
num_observations
let num_active { active_id_to_feature } =
Utils.IntMap.cardinal active_id_to_feature
let num_inactive { inactive_id_to_feature } =
Utils.IntMap.cardinal inactive_id_to_feature