forked from srvCodes/Gender-Classification-of-Blog-Author
-
Notifications
You must be signed in to change notification settings - Fork 0
/
genderDifferencesFeatures.py
227 lines (178 loc) · 5.77 KB
/
genderDifferencesFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import nltk
def genderDifferencesFeatures(text):
groom = """cleaner clean washer wash perfume shave shaved shampoo cleansing soap shower
toothpaste cream facewash moisturizer nail lipstick makeup"""
sleep = """tiresome sleeping dazed sleeps insomnia napping nap siesta nightmare dream dreams bed
pillow"""
i = """me myself mine my i"""
eating = """fat dinner tasting drunken fed breakfast cookie eat tasted skinny cookbook"""
disgust = """sickening revolting horror sick offensive obscene nauseous wicked"""
fear = """suspense creep dismay fright terrible terror afraid scare alarmed panicked panic"""
sewing = """mending stiching knitting knitter knit mend tailor suture embroidery seamstress needle"""
purpleness = """purple mauve magenta lilac lavender orchid violet mulberry purply"""
sweetness = """syrup honey sugar bakery nectar sweet frost sugary dessert glaze nut"""
brownness = """coffee biscuit biscuits walnut rust berry brown brunette cinnamon mahogany caramel chocolate"""
chastity = """shame elegant decent virtue virgin delicate faithful faithfulness platonic purity spotless"""
relig = """bless satanism angel communion spirit lord immortal theology prayers"""
metaph = """suicide meditation cemetary temples drained immortalized mercy mourning"""
tv = """show ad comedies comedy tv actors drama soaps video theatres commercials commercial film films"""
job = """credentials department financials desktop manage employee work career"""
oponent = """finalist rival enemy competitor foe opposite defendant player dissident"""
theology = """creed scholastic religious secularism theology religion divine faith dogma"""
uniformity = """evenness constancy constant persistence accordance steadiness steady firm firmness stable stability"""
engineering = """automotive process industrial manufacture measure construction technician"""
influence = """power force weak weakness inflexible ineffective charisma charm wimpy"""
countGroom = countSleep = countI = countEating = countDisgust = countFear = countSewing = 0
countPurpleness = countSweetness = countBrownness = countChastity = countRelig = countInfluence = 0
countMetaph = countTV = countJob = countOponent = countTheology = countUniformity = countEngineering = 0
totalWords = len(text.split())
#print(totalWords)
text = text.lower()
text = nltk.word_tokenize(text)
groom = nltk.word_tokenize(groom)
sleep = nltk.word_tokenize(sleep)
i = nltk.word_tokenize(i)
eating = nltk.word_tokenize(eating)
disgust = nltk.word_tokenize(disgust)
fear = nltk.word_tokenize(fear)
sewing = nltk.word_tokenize(sewing)
purpleness = nltk.word_tokenize(purpleness)
sweetness = nltk.word_tokenize(sweetness)
brownness = nltk.word_tokenize(brownness)
chastity = nltk.word_tokenize(chastity)
relig = nltk.word_tokenize(relig)
influence = nltk.word_tokenize(influence)
metaph = nltk.word_tokenize(metaph)
tv = nltk.word_tokenize(tv)
job = nltk.word_tokenize(job)
oponent = nltk.word_tokenize(oponent)
theology = nltk.word_tokenize(theology)
uniformity = nltk.word_tokenize(uniformity)
engineering = nltk.word_tokenize(engineering)
for word in text:
if word in groom:
countGroom += 1
if word in sleep:
countSleep += 1
if word in i:
countI += 1
if word in eating:
countEating += 1
if word in disgust:
countDisgust += 1
if word in fear:
countFear += 1
if word in sewing:
countSewing += 1
if word in purpleness:
countPurpleness += 1
if word in sweetness:
countSweetness += 1
if word in brownness:
countBrownness += 1
if word in chastity:
countChastity += 1
if word in relig:
countRelig += 1
if word in metaph:
countMetaph += 1
if word in tv:
countTV += 1
if word in job:
countJob += 1
if word in oponent:
countOponent += 1
if word in theology:
countTheology += 1
if word in uniformity:
countUniformity += 1
if word in engineering:
countEngineering += 1
if word in influence:
countInfluence += 1
try:
countGroom /= 1.0 * totalWords
except:
countGroom = 0
try:
countSleep /= 1.0 * totalWords
except:
countSleep = 0
try:
countI /= 1.0
except:
countI = 0
try:
countEating /= 1.0 * totalWords
except:
countEating = 0
try:
countDisgust /= 1.0 *totalWords
except:
countDisgust = 0
try:
countFear /= 1.0 * totalWords
except:
countFear = 0
try:
countSewing /= 1.0 * totalWords
except:
countSewing = 0
try:
countPurpleness /= 1.0 * totalWords
except:
countPurpleness = 0
try:
countBrownness /= 1.0 * totalWords
except:
countBrownness = 0
try:
countSweetness /= 1.0 * totalWords
except:
countSweetness = 0
try:
countChastity /= 1.0 * totalWords
except:
countChastity = 0
try:
countRelig /= 1.0 * totalWords
except:
countRelig = 0
try:
countMetaph /= 1.0 * totalWords
except:
countMetaph = 0
try:
countJob /= 1.0 * totalWords
except:
countJob = 0
try:
countTV /= 1.0 * totalWords
except:
countTV = 0
try:
countOponent /= 1.0 * totalWords
except:
countOponent = 0
try:
countTheology /= 1.0 * totalWords
except:
countTheology = 0
try:
countUniformity /= 1.0 * totalWords
except:
countUniformity = 0
try:
countEngineering /= 1.0 * totalWords
except:
countEngineering = 0
try:
countInfluence /= 1.0 * totalWords
except:
countInfluence = 0
return(countGroom, countSleep, countI, countEating, countDisgust, countFear, countSewing, countPurpleness,
countSweetness, countBrownness, countChastity, countRelig, countMetaph, countJob, countTV, countOponent,
countTheology, countUniformity, countEngineering, countInfluence)
text = """This is hopeless countless priceless and I am indecisive. so sorry sorry I am feeling terrible
that I am unable to fulfil a WONderful TV mathematical brutal vicious terrific problem."""
print(genderDifferencesFeatures(text))