-
-
Notifications
You must be signed in to change notification settings - Fork 7.3k
/
trie_multiple_search.cpp
469 lines (418 loc) · 16.9 KB
/
trie_multiple_search.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/**
* @file
* @brief [Trie
* datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/)
* with search variants
* @details
* This provides multiple variants of search functions
* on a trie structure utilizing STL. The trie is valid
* for only English alphabets.
* @author [Ghanashyam](https://github.com/g-s-k-zoro)
*/
#include <algorithm> /// for std::count
#include <cassert> /// for assert
#include <cctype> /// for tolower
#include <cstdint> /// for std::uint32_t
#include <cstring> /// for string operations
#include <iostream> /// for IO Operations
#include <queue> /// for std::priority_queue
/**
* @namespace operations_on_datastructures
* @brief Operations on data structures
*/
namespace operations_on_datastructures {
/**
* @namespace trie_operations
* @brief Functions for [Trie
* datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/)
* implementation
*/
namespace trie_operations {
/**
* @brief Class defining the structure of trie node and containing the methods
* to perform operations on them.
*/
class Tnode {
private:
static constexpr uint8_t ENGLISH_ALPHABET_SIZE = 26;
// pointers to alphabets
std::vector<Tnode *> english;
// To mark the end of word
bool endOfWord;
// To store the frequency of searches for the word
uint32_t frequency;
public:
Tnode() {
english.resize(ENGLISH_ALPHABET_SIZE, nullptr);
endOfWord = false;
frequency = 0;
}
// Copy Constructor
Tnode(const Tnode &node) {
english = node.english;
endOfWord = node.endOfWord;
frequency = node.frequency;
}
Tnode &operator=(const Tnode &node) = default;
Tnode(Tnode &&) = default;
Tnode &operator=(Tnode &&) = default;
/**
* @brief Function to count the number of children a node in the trie has
* @param node a trie node whose children need to be counted
* @return count of the number of children of the given node (max 26)
*/
inline uint8_t numberOfChildren(Tnode *node) {
return ENGLISH_ALPHABET_SIZE -
std::count(node->english.begin(), node->english.end(), nullptr);
}
// Functions to perform operations on trie
void Insert(const std::string &entry);
void Delete(std::string entry);
void DeleteFrom(Tnode *delete_from, std::string delete_string,
int remove_index);
bool SearchPresence(const std::string &key);
void SuggestAutocomplete(Tnode *new_root, const std::string &prefix);
void SearchSuggestions(const std::string &key);
void SuggestFreqAutocomplete(
Tnode *new_root, const std::string &prefix,
std::priority_queue<std::pair<int, std::string> > *suggestions);
void SearchFreqSuggestions(const std::string &key);
void SelectionTop_3(
std::priority_queue<std::pair<int, std::string> > *suggestions);
// To free up the dynamically allocated objects
~Tnode() {
int i = 0;
for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) {
if (english[i]) {
delete english[i];
}
}
}
};
/**
* @brief Function to insert a word in the trie
* @param entry string entry to be inserted in the trie
*/
void Tnode::Insert(const std::string &entry) {
Tnode *cur_pos = this;
int letter_index = 0;
for (auto &i : entry) {
// To ignore case
letter_index = tolower(i) - 97;
// Allocate a node for each character of entry if not present in the
// trie
if (cur_pos->english[letter_index] == nullptr) {
cur_pos->english[letter_index] = new Tnode();
}
cur_pos = cur_pos->english[letter_index];
}
// cur_pos points to the last char, mark it as end of word
cur_pos->endOfWord = true;
}
/**
* @brief Function recursively deletes the substring character by
* character iterating through the string to be deleted. It traverses till the
* end of word in a recursive fashion, from there it deletes characters one by
* one till it reaches back to the initial call.
* @param delete_from the acting root to the required suffix to be deleted
* @param delete_string the string to be deleted from the trie
* @param remove_index index denoting the beginning of the substring to be
* deleted
*/
void Tnode::DeleteFrom(Tnode *delete_from, std::string delete_string,
int remove_index) {
if (delete_string.size() == remove_index) {
int letter_index = tolower(delete_string[remove_index]) - 97;
DeleteFrom(delete_from->english[letter_index], delete_string,
remove_index + 1);
delete delete_from;
}
}
/**
* @brief Function to verify presence and hence delete an entry from the trie
* @param entry string entry to be deleted from the trie
*/
void Tnode::Delete(std::string entry) {
Tnode *cur_pos = this,
*delete_from = this; // Current pointer pointing to root
int letter_index = 0, delete_from_index = 0, i = 0, n = entry.size();
for (i = 0; i < n; i++) {
// To ignore case
letter_index = tolower(entry[i]) - 97;
// Display error message when given entry is not present in the tree
if (cur_pos->english[letter_index] == nullptr) {
std::cout << "Entry not Found" << std::endl;
return;
}
// If the current node is end of word for the current prefix or if it
// has 2 or more branches It cannot be deleted while deleting the
// required entry.
if (numberOfChildren(cur_pos) > 1 || cur_pos->endOfWord) {
delete_from = cur_pos; // denotes the beginning of the shortest
// suffix that is allowed to be deleted
delete_from_index = i - 1; // Beginning index of the suffix
// corresponding to the 'entry'
}
// Traversing through the entry
cur_pos = cur_pos->english[letter_index];
}
// cur_pos now points to the last char of entry. Display message if that
// entry does not exist
if (!cur_pos->endOfWord) {
std::cout << "Entry not Found" << std::endl;
return;
}
// If cur_pos is not a leaf node, unmark end of word and assign 0 to it's
// frequency for deletion
if (numberOfChildren(cur_pos)) {
cur_pos->endOfWord = false;
cur_pos->frequency = 0;
return;
}
// The first character of the suffix to be deleted
letter_index = tolower(entry[delete_from_index + 1]) - 97;
// Point cur_pos to the next node
cur_pos = delete_from->english[letter_index];
// Sever the connection from the main trie
delete_from->english[letter_index] = nullptr;
// If number of characters in the suffix are more than 1, recursively delete
// each character starting from cur_pos using the helper function
if (n > delete_from_index + 2) {
DeleteFrom(cur_pos, entry, delete_from_index + 2);
}
// If the suffix is only 1 char in length
else {
delete cur_pos;
}
}
/**
* @brief Function to check a word's presence in the trie (Basic)
* @param key the string key to be searched in the trie
* @return true if the key is found
* @return false if the key is not found
*/
bool Tnode::SearchPresence(const std::string &key) {
Tnode *cur_pos = this;
int letter_index = 0;
for (auto &i : key) {
letter_index = tolower(i) - 97;
// If any character in the order of the key is absent, word not found!
if (cur_pos->english[letter_index] == nullptr) {
return false;
}
cur_pos = cur_pos->english[letter_index];
}
// Word is only present in the trie if the key is a valid complete entry and
// not just a prefix.
if (cur_pos->endOfWord) {
(cur_pos->frequency)++;
return true;
} else {
return false;
}
}
/**
* @brief Recursive function to suggest all the entries of trie
* which have a given common prefix
* @param new_root pointer pointing to the node corresponding to the last char
* of prefix
* @param prefix the common prefix that all the suggestions must have
*/
void Tnode::SuggestAutocomplete(Tnode *new_root, const std::string &prefix) {
// Iterate through all 26 nodes as we have to print all strings with the
// given prefix
int i = 0;
for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) {
if (new_root->english[i] != nullptr) {
// Print the sugestion only if it's a valid complete entry and not
// just a prefix
if (new_root->english[i]->endOfWord) {
std::cout << prefix + char(i + 97) << std::endl;
}
SuggestAutocomplete(new_root->english[i], prefix + char(i + 97));
}
}
}
/**
* @brief Lists out all the words in trie with the longest prefix
* of the search key that is present in the trie. For example - if trie contains
* "abc", "abcde", "abcdefg", "abcddef" and if the search key is "abcdezz", then
* the longest common prefix is "abcde" and hence search results will be
* "abcde", "abcdefg".
* @param key the string key to be searched for suggestions
*/
void Tnode::SearchSuggestions(const std::string &key) {
Tnode *cur_pos = nullptr, *prev_pos = nullptr;
cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root
int letter_index = 0;
std::string prefix =
""; // variable storing the updated value of longest common prefix
for (auto &i : key) {
letter_index = tolower(i) - 97;
prev_pos = cur_pos; // Previous pointer updated to point to the last
// char of the longest common prefix
// When the node for the character does not exist, longest prefix has
// been determined and SuggestAutocomplete is called
if (cur_pos->english[letter_index] == nullptr) {
SuggestAutocomplete(prev_pos, prefix);
std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - "
<< std::endl;
return;
}
// Updating the longest common prefix
prefix += char(tolower(i));
cur_pos = cur_pos->english[letter_index];
}
// If the key is a valid entry of trie, display it @ top of the suggestions
if (cur_pos->endOfWord) {
std::cout << key << std::endl;
(cur_pos->frequency)++;
}
(void)prev_pos; // Idiom to ignore previous pointer
// Call for suggestions when the search key is present as an entry/a prefix
// in the trie
SuggestAutocomplete(cur_pos, prefix);
std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - "
<< std::endl;
return;
}
/**
* @brief Function to display the 3 suggestions with highest frequency
* of search hits
* @param suggestions a max heap that contains pairs of (frequency, word)
* heapified based on frequency
*/
void Tnode::SelectionTop_3(
std::priority_queue<std::pair<int, std::string> > *suggestions) {
// Display Either top 3 or total number of suggestions, whichever is smaller
int n = suggestions->size(), Top = 0;
Top = n < 3 ? n : 3;
while (Top--) {
std::cout << suggestions->top().second << std::endl;
suggestions->pop();
}
}
/**
* @brief Recursive function to suggest most frequently
* searched entries of trie which have a given common prefix
* @param new_root pointer pointing to the node corresponding to the last char
* of prefix
* @param prefix the common prefix that all the suggestions must have
* @param suggestions a max heap that contains pairs of (frequency, word)
* heapified based on frequency
*/
void Tnode::SuggestFreqAutocomplete(
Tnode *new_root, const std::string &prefix,
std::priority_queue<std::pair<int, std::string> > *suggestions) {
int i = 0;
for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) {
if (new_root->english[i] != nullptr) {
// Add to sugestions only if it's a valid complete entry and not
// just a prefix
if (new_root->english[i]->endOfWord) {
suggestions->push(std::make_pair(
new_root->english[i]->frequency, prefix + char(i + 97)));
}
SuggestFreqAutocomplete(new_root->english[i], prefix + char(i + 97),
suggestions);
}
}
}
/**
* @brief Lists out the most frequent words in trie with the
* longest prefix of the search key that is present in the trie. For example -
* if trie contains "abc", "abcde", "abcdefg", "abcddef" and they have been
* previously searched for 3, 1, 2, 4 times respectively, if the search key is
* "ab", then the longest common prefix is "ab" and only the top 3 frequencies
* among the matches would be displayed viz. "abcddef", "abc", "abcdefg".
* @param key the string key to be searched for suggestions
*/
void Tnode::SearchFreqSuggestions(const std::string &key) {
Tnode *cur_pos = nullptr, *prev_pos = nullptr;
cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root
int letter_index = 0;
std::string prefix =
""; // variable storing the updated value of longest common prefix
std::priority_queue<std::pair<int, std::string> >
suggestions; // max heap to store (frequency, word) in descending order
// of freq
std::priority_queue<std::pair<int, std::string> > *Suggestions =
&suggestions;
for (auto &i : key) {
letter_index = tolower(i) - 97;
prev_pos = cur_pos; // Previous pointer updated to point to the last
// char of the longest common prefix
// When the node for the character does not exist, longest prefix has
// been determined and SuggestFreqAutocomplete is called
if (cur_pos->english[letter_index] == nullptr) {
SuggestFreqAutocomplete(prev_pos, prefix, Suggestions);
// To display the top 3 results
SelectionTop_3(Suggestions);
std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - "
<< std::endl;
return;
}
// Updating the longest common prefix
prefix += char(tolower(i));
cur_pos = cur_pos->english[letter_index];
}
// If the key is a valid entry of trie, display it @ top of the suggestions
if (cur_pos->endOfWord) {
(cur_pos->frequency)++;
std::cout << key << std::endl;
}
(void)prev_pos; // Idiom to ignore previous pointer
// Call for Suggestions when the search key is present as an entry/a prefix
// in the trie
SuggestFreqAutocomplete(cur_pos, prefix, Suggestions);
// Display the top 3 results
SelectionTop_3(Suggestions);
std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - "
<< std::endl;
return;
}
} // namespace trie_operations
} // namespace operations_on_datastructures
/**
* @brief Function to test a simple search before and after deleting
* an entry. And to test out the multiple variants of search.
*/
static void test() {
auto root = new operations_on_datastructures::trie_operations::Tnode();
std::vector<std::string> inputs = {
"abcde", "sss", "ssss", "ssst", "sssu", "sssv",
"sst", "ssts", "sstt", "sstu", "tutu", "tutuv",
"tutuu", "tutuvs", "tutus", "tvst", "tvsu", "vvvv"};
for (auto &i : inputs) {
root->Insert(i);
}
// Search an existing entry
assert(root->SearchPresence("vvvv"));
std::cout << root->SearchPresence("vvvv") << std::endl;
// Delete it
root->Delete("vvvv");
// Search for the entry again
assert(!root->SearchPresence("vvvv"));
std::cout << root->SearchPresence("vvvv") << std::endl;
std::cout << root->SearchPresence("tutu") << std::endl;
root->SearchSuggestions("tutu");
std::cout << root->SearchPresence("tutu") << std::endl;
root->SearchSuggestions("tutuv");
std::cout << root->SearchPresence("tutuv") << std::endl;
root->SearchSuggestions("tutuvs");
root->SearchFreqSuggestions(
"tu"); // The top 3 frequent entries with prefix tu are tutu, tutuv &
// tutuvs respectively
root->SearchSuggestions(
""); // Empty search to list all the entries in the trie
}
/**
* @brief Main function
* @param argc commandline argument count (ignored)
* @param argv commandline array of arguments (ignored)
* @returns 0 on exit
*/
int main(int argc, char const *argv[]) {
test(); // run self-test implementations
return 0;
}