-
Notifications
You must be signed in to change notification settings - Fork 0
/
spell.rb
executable file
·193 lines (176 loc) · 4.55 KB
/
spell.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/ruby
#
# simple spell checker library, as a small challenge
#
# (C) 2012, Carlos Puchol <cpg at rocketmail dot com>
#
# features:
# - if the word is in the dictionary, returns that one first
# - type the word crank to get it it start cranking words and checking them
# it will generate 1000 words, check them, then print a dot
# it will print a ! if it cannot march the generated word
# - start with ./spell.rb -g to generate words from the dictionary
# - start with ./spell.rb -c to consume lines with words from stdin and print the resulting match
#
# this program entry point has three modes of operation:
# - generate words (with -g)
# - consume and check words from stdin (with -c)
# - interactive. type the word crank to start generating and checking forver
# spell checker class on which the program is based
class Spell
# dictionary with buckets of words, one per letter in the alphabet
attr_reader :dictionary
def initialize(wordfile = "/usr/share/dict/words")
@dictionary = { }
IO.readlines(wordfile).map do |w|
word = w.chomp
fc = word[0,1].downcase
@dictionary[fc] = [] unless @dictionary[fc]
# save the regexp for the word, along with the word, in the dict
@dictionary[fc] << [make_regexp(word), word]
end
end
# check the word and find the closest spelling in the dict
def check(word)
result = find_word(word)
return "NO SUGGESTION" if result.empty?
# if the word is in the results, just return it
return word if result.include? word
matches = result.grep(/^#{word}$/i)
# if the word is there with swapped caps, return that
return matches.first if matches.size > 0
# else return the first one
# FIXME: simple selection of a "good" match. must improve it!
result.first
end
# generate a bunch of words mangled from the dictionary
def generate(num = 100)
results = []
1.upto(num) { results << mangle_word(pick_a_word) }
results
end
#private
# pick a word
def pick_a_word
bucket = @dictionary[@dictionary.keys[rand @dictionary.size]]
pair = bucket[rand bucket.size]
pair.last
end
def mangle_word(word)
# FIXME: not all possible combinations here, e.g. twist the vowel and repeat
word.split(//).map do |letter|
case rand 4
when 0
# leave as is
letter
when 1
# mangle case
letter.swapcase
when 2
# repeat the letter some random amount of times
([letter] * (rand(15)+1)).join
else
letter =~ /[aeiou]/ ? "aeiouAEIOU"[rand(10), 1] : letter
end
end.join
end
# do the hard work of trying to find the given word
def find_word(word)
fc = word[0,1].downcase
# check if it's a vowel
if fc =~ /[aeiou]/
# if so, then check in each bucket
"aeiou".split(//).map do |letter|
find_matches(@dictionary[letter], word)
end.flatten
else
# else we do the search directly
find_matches(@dictionary[fc], word)
end
end
# find the maches of a word in a dictionary bucket
def find_matches(bucket, word)
matches = bucket.map do |exp, match|
word =~ exp ? match : nil
end
matches.compact
end
# make a regular expression for the string we are trying to match
def make_regexp(word)
exps = word.scan(/./).map do |c|
if c =~ /[aeiou]/i
"[aeiou]+"
else
"#{c}+"
end
end
# make it a regular expression and ignore the case, also match the whole expression
Regexp.new("^#{exps.join}$", Regexp::IGNORECASE)
end
end
def generate_forever(num = 1000)
spell = Spell.new
while true
words = spell.generate(num)
words.each { |word| puts word }
$stdout.flush
end
end
def check_forever
spell = Spell.new
while true
input = readline.chomp
match = spell.check(input)
puts "BEST MATCH: #{match}\t\tINPUT: #{input}"
end
end
def generate_forever_interactive(checker, num = 100)
while true
words = checker.generate(num)
begin
words.each do |word|
result = checker.check word
raise "problem!" if result == "NO SUGGESTION"
end
print '.'
rescue
print "!"
end
$stdout.flush
end
end
# interactive input loop
def interactive_loop
print "Reading system dictionary ..."
$stdout.flush
spell = Spell.new
puts " done."
begin
while true
print "> "
word = readline.chomp
generate_forever_interactive(spell) if word == 'crank'
found = spell.check(word)
puts found
end
rescue
# shhhh, ended due to EOF or ctrl-c typically
end
end
# main program entry point
def main
consume = ARGV[0] == '-c'
generate = ARGV[0] == '-g'
ARGV.shift
ARGV.shift
interactive = ! (consume or generate)
if generate
generate_forever
elsif consume
check_forever
else
# interactive mode
interactive_loop
end
end
main