-
Notifications
You must be signed in to change notification settings - Fork 0
/
change_tense.py
156 lines (134 loc) · 5.41 KB
/
change_tense.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: utf-8 -*-
import string
import spacy
from spacy.symbols import NOUN
from pattern.en import conjugate, PAST, PRESENT, SINGULAR, PLURAL
input_string = '''Phil, Bob and Tom are not going to the school and they have fun doing this.'''
tense = 'past'
SUBJ_DEPS = {'agent', 'csubj', 'csubjpass', 'expl', 'nsubj', 'nsubjpass'}
nlp = spacy.load('en_core_web_sm')
# Remove spaces before/after punctuation
def fix_punctuation(text_out: str):
for char in string.punctuation:
if char in """(<['""":
text_out = text_out.replace(char+' ', char)
else:
text_out = text_out.replace(' '+char, char)
for char in ["-", "“", "‘"]:
text_out = text_out.replace(char+' ', char)
for char in ["…", "”", "'s", "n't"]:
text_out = text_out.replace(' '+char, char)
return text_out
# Returns subjects that are in conjunction with another subject
def _get_conjuncts(tok):
return [right for right in tok.rights if right.dep_ == 'conj']
# Returns True if noun is in plural
def is_plural_noun(token):
if token.doc.has_annotation("TAG") is False:
raise ValueError('token is not POS-tagged')
return True if token.pos == NOUN and token.lemma != token.lower else False
# Returns all subjects (plus the conjuncted ones)
def get_subjects_of_verb(verb):
if verb.dep_ == "aux" and list(verb.ancestors):
return get_subjects_of_verb(list(verb.ancestors)[0])
subjs = [tok for tok in verb.lefts if tok.dep_ in SUBJ_DEPS]
subjs.extend(tok for subj in subjs for tok in _get_conjuncts(subj))
if not len(subjs):
ancestors = list(verb.ancestors)
if len(ancestors) > 0:
return get_subjects_of_verb(ancestors[0])
return subjs
# Checks if verb needs to be pluralized because there is more than one subject or the subject is in plural
def is_plural_verb(token):
if token.doc.has_annotation("TAG") is False:
raise ValueError('token is not POS-tagged')
subjects = get_subjects_of_verb(token)
if not len(subjects):
return False
plural_score = sum([is_plural_noun(x) for x in subjects])/len(subjects)
multiple_subjects_singular = len(subjects)
return max(plural_score, multiple_subjects_singular) > 1.5
def preserve_caps(word, newWord):
if word[0] >= 'A' and word[0] <= 'Z':
newWord = newWord.capitalize()
return newWord
def identify_person(words):
subjects = [x.text for x in get_subjects_of_verb(words[-1])]
if ('I' in subjects) or ('we' in subjects) or ('We' in subjects):
person = 1
elif ('you' in subjects) or ('You' in subjects):
person = 2
else:
person = 3
return person
def identify_number(words):
if is_plural_verb(words[-1]):
number = PLURAL
else:
number = SINGULAR
return number
def negation_handler(words, out, tense):
if words[-2].text + words[-1].text in ('didnot', 'donot', 'willnot', "didn't", "don't", "won't"):
if tense == PAST:
out[-2] = 'did'
elif tense == PRESENT:
out[-2] = 'do'
else:
out.pop(-2)
return out
def future_perfect_and_progressives_handler(words, out):
if words[-1].text in ('have', 'has') and len(list(words[-1].ancestors)) and words[-1].dep_ == 'aux':
out.pop(-1)
return out
def transform_if_future(words, out, to_tense):
if to_tense == 'future':
if not (out[-1] == 'will' or out[-1] == 'be'):
out.append('will')
if words[-2].text == 'will' and words[-2].tag_ == 'NN':
out.append('will')
return out
def is_verb_for_modification(words):
return (words[-2].text == 'will' and words[-2].tag_ == 'MD' and words[-1].tag_ == 'VB') or \
words[-1].tag_ in ('VBD', 'VBP', 'VBZ', 'VBN') or \
(not words[-2].text in ('to', 'not') and words[-1].tag_ == 'VB')
def lookup_tense(to_tense):
tense_lookup = {'future': 'inf', 'present': PRESENT, 'past': PAST}
tense = tense_lookup[to_tense]
return tense
def if_not_future_will_remover(words, out):
if (words[-2].text == 'will' and words[-2].tag_ == 'MD') or words[-2].text == 'had':
out.pop(-1)
return out
def set_this_tense(words, tense):
if words[-2].text in ('were', 'am', 'is', 'are', 'was') or (words[-2].text == 'be' and len(words) > 2 and words[-3].text == 'will'):
this_tense = tense_lookup['past']
else:
this_tense = tense
return this_tense
def conjugation_params(words, tense):
tense = set_this_tense(words, tense)
person = identify_person(words)
number = identify_number(words)
return tense, person, number
def deal_with_future_tense(words, out, to_tense):
out = if_not_future_will_remover(words, out)
out = transform_if_future(words, out, to_tense)
return out
def change_tense(text, to_tense, nlp=nlp):
doc = nlp(text)
tense = lookup_tense(to_tense)
out = []
words = []
out.append(doc[0].text)
for word in doc:
words.append(word)
if len(words) == 1:
continue
if(is_verb_for_modification(words)):
out = deal_with_future_tense(words, out, to_tense)
out.append(preserve_caps(words[-1].text, conjugate(words[-1].text, conjugation_params(words, tense))))
else:
out.append(words[-1].text)
out = negation_handler(words, out, tense)
out = future_perfect_and_progressives_handler(words, out)
return fix_punctuation(' '.join(out))