This repository has been archived by the owner on Jul 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
helper.py
136 lines (110 loc) · 4.61 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import io
import itertools
import json
import logging as log
import re
import string
import credentials
import formatter
from cardDB import CardDB
class SpellChecker():
"""Find and fix simple spelling errors.
based on Peter Norvig
https://norvig.com/spell-correct.html
"""
def __init__(self, names):
self.model = set(name.lower() for name in names)
def __known(self, words):
for w in words:
if w in self.model:
return w
return None
def __edits(self, word):
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = (a + b[1:] for a, b in splits if b)
transposes = (a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1)
replaces = (a + c + b[1:] for a, b in splits for c in string.ascii_lowercase if b)
inserts = (a + c + b for a, b in splits for c in string.ascii_lowercase)
return itertools.chain(deletes, transposes, replaces, inserts)
def __edits2(self, word):
# distance 2 errors
# insert: or self.__known(self.__edits2(lword))
return set(e2 for e1 in self.__edits(word) for e2 in self.__edits(e1))
def correct(self, word):
"""returns input word or fixed version if found"""
lword = word.lower()
return self.__known([lword]) or self.__known(self.__edits(lword)) or word
class HSHelper:
"""some convenience methods and wraps cardDB"""
def __init__(self, cardDB, constants):
self.cardDB = cardDB
self.constants = constants
allNames = itertools.chain(cardDB.cardNames(),
self.constants.specialNames,
self.constants.alternativeNames)
self.spellChecker = SpellChecker(allNames)
self.infoTempl = formatter.loadInfoTempl(self.constants.specialNames,
self.constants.alternativeNames,
cardDB.tokens,
cardDB.DUELS_CMD)
def getInfoText(self, author):
"""fill info request answer template"""
return self.infoTempl.format(user=author)
def parseText(self, text):
"""returns found cards and answer text"""
text = HSHelper.removeQuotes(text)
cards = self.__getCards(text)
answer = ''
if cards:
log.debug("found cards: %s", cards)
cards = self.constants.replaceSpecial(cards) #expand
cards = [card for card in cards if card in self.cardDB]
cards = cards[:self.constants.CARD_LIMIT]
answer = formatter.createAnswer(self.cardDB, cards)
return cards, answer
def removeQuotes(text):
"""removes quote blocks"""
lines = []
for line in io.StringIO(text):
line = line.strip()
if line and line[0] != '>':
lines.append(line)
return ' '.join(lines)
def __getCards(self, text):
"""look for [[cardname]]s in text and collect them"""
cards = []
if len(text) < 6:
return cards
# regex for escaped (new reddit and some apps) and unescaped brackets
for card in re.finditer(r'\\?\[\\?\[([^\]\\]{1,32})\\?\]\\?\]', text):
card = card.group(1)
log.debug("adding a card: %s", card)
duelsRequested = card.startswith(self.cardDB.DUELS_CMD)
if duelsRequested:
card = card[len(self.cardDB.DUELS_CMD):]
vanillaRequested = card.startswith(self.cardDB.VANILLA_CMD)
if vanillaRequested:
card = card[len(self.cardDB.VANILLA_CMD):]
cleanCard = CardDB.cleanName(card)
if cleanCard:
log.debug("cleaned card name: %s", cleanCard)
# slight spelling error?
checkedCard = self.spellChecker.correct(cleanCard)
if cleanCard != checkedCard:
log.info("spelling fixed: %s -> %s",
cleanCard, checkedCard)
# is alternative name?
checkedCard = self.constants.translateAlt(checkedCard)
if duelsRequested:
checkedCard = self.cardDB.DUELS_CMD + checkedCard
if vanillaRequested:
checkedCard = self.cardDB.VANILLA_CMD + checkedCard
# add cardname
if checkedCard not in cards:
cards.append(checkedCard)
else:
log.info("duplicate card: %s (%s)", card, checkedCard)
# sometimes cards are removed, get more to fill limit
if len(cards) >= self.constants.CARD_LIMIT * 2:
break
return cards