-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.c
156 lines (121 loc) · 4.05 KB
/
search.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <friso/friso_API.h>
#include <friso/friso.h>
#include "data_struct/hash.h"
#include "data_struct/zset.h"
#include "search.h"
#define SEARCH_ZSET "search_zset"
#define INTER_ZSET "inter_zset"
#define URL_HASH "url_hash"
#define CONTENT_DB 2
#define TITLE_DB 1
#define DEFAULT_DB 0
#define MAX_URL_LEN 64*1024
#define MAX_TITLE_LEN 1024
#define MAX_KEY_LEN 16
#define FRISO_PATH "friso/friso.ini"
#define GET_BIT(x, n) ((x) & (1U << (n)))
#define SET_BIT(x, n) ((x) = (x) | (1U << (n)))
int combo_search(char (*namev)[16], size_t namec, size_t num);
unsigned int next_k_bit(unsigned int x);
int main(int argc, char* argv[])
{
if(argc <= 1)
return 0;
char title[MAX_TITLE_LEN], query[ZSET_MAX_INTER][MAX_KEY_LEN];
size_t nquery;
unsigned long index;
friso_t friso;
friso_config_t config;
friso_task_t task;
init_hash();
init_zset();
//init friso
config = friso_new_config();
friso = friso_new();
task = friso_new_task();
if(friso_init_from_ifile(friso, config, FRISO_PATH) != 1) {
printf("fail to initialize friso and config.");
goto clear;
}
config->clr_stw = 0;
//process search string
select_zsetdb(DEFAULT_DB);
for(int i = 1; i < argc; ++i) {
friso_set_text(task, argv[i]);
while((config->next_token(friso, config, task)) != NULL) {
add_to_zset(task->token->word, SEARCH_ZSET);
}
}
printf("\nwill search the db using the combination of the following key words;\n");
nquery = 0;
while(sizeof_zset(SEARCH_ZSET) > 0) {
if(nquery + 1 > ZSET_MAX_INTER) {
printf("\ntoo many key words, your query string was truncated\n");
break;
}
get_zset(query[nquery], SEARCH_ZSET);
printf("%s ", query[nquery]);
nquery++;
}
putchar('\n');
//search in title db
select_zsetdb(TITLE_DB);
printf("\nhere are the pages whose title contains all or part of your key words:\n");
for(int i = nquery; i >= 1; --i) {
combo_search(query, nquery, i);
}
//search in content db
select_zsetdb(CONTENT_DB);
printf("\nhere are the pages whose contents contains all or part of your key words:\n");
for(int i = nquery; i >= 1; --i) {
combo_search(query, nquery, i);
}
clear:
close_hash();
close_zset();
friso_free_config(config);
friso_free(friso);
return 0;
}
int combo_search(char (*namev)[16], size_t namec, size_t num)
{
unsigned int bitmap = 0, end_bitmap = 0;
char *query[ZSET_MAX_INTER], url[MAX_URL_LEN], value[16];
size_t nquery = 0;
for(int i = 0; i < num; ++i){
SET_BIT(bitmap, i);
SET_BIT(end_bitmap, (namec - 1) - i);
}
while(1) {
nquery = 0;
for(int i = 0; i < namec; ++i) {
if(GET_BIT(bitmap, i)) {
query[nquery] = namev[i];
nquery++;
}
}
inter_zset(query, nquery, INTER_ZSET);
while(sizeof_zset(INTER_ZSET) > 0) {
get_zset(value, INTER_ZSET);
lookup_hash(value, url, URL_HASH);
printf("\nurl: %s\n", url);
}
if(bitmap == end_bitmap)
break;
bitmap = next_k_bit(bitmap);
};
}
//get the next integer with k bits on
//see http://realtimecollisiondetection.net/blog/?p=78 for derivation
unsigned int next_k_bit(unsigned int x)
{
unsigned int b, t, c, m;
b = x & -x;
t = x + b;
c = t ^ x;
m = (c >> 2) / b;
return t | m;
}