forked from textstat/textstat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
421 lines (284 loc) · 11.4 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
"""Test suite for textstat
"""
import textstat
short_test = "Cool dogs wear da sunglasses."
long_test = (
"Playing ... games has always been thought to be "
"important to the development of well-balanced and "
"creative children; however, what part, if any, "
"they should play in the lives of adults has never "
"been researched that deeply. I believe that "
"playing games is every bit as important for adults "
"as for children. Not only is taking time out to "
"play games with our children and other adults "
"valuable to building interpersonal relationships "
"but is also a wonderful way to release built up "
"tension.\n"
"There's nothing my husband enjoys more after a "
"hard day of work than to come home and play a game "
"of Chess with someone. This enables him to unwind "
"from the day's activities and to discuss the highs "
"and lows of the day in a non-threatening, kick back "
"environment. One of my most memorable wedding "
"gifts, a Backgammon set, was received by a close "
"friend. I asked him why in the world he had given "
"us such a gift. He replied that he felt that an "
"important aspect of marriage was for a couple to "
"never quit playing games together. Over the years, "
"as I have come to purchase and play, with other "
"couples & coworkers, many games like: Monopoly, "
"Chutes & Ladders, Mastermind, Dweebs, Geeks, & "
"Weirdos, etc. I can reflect on the integral part "
"they have played in our weekends and our "
"\"shut-off the T.V. and do something more "
"stimulating\" weeks. They have enriched my life and "
"made it more interesting. Sadly, many adults "
"forget that games even exist and have put them "
"away in the cupboards, forgotten until the "
"grandchildren come over.\n"
"All too often, adults get so caught up in working "
"to pay the bills and keeping up with the "
"\"Joneses'\" that they neglect to harness the fun "
"in life; the fun that can be the reward of "
"enjoying a relaxing game with another person. It "
"has been said that \"man is that he might have "
"joy\" but all too often we skate through life "
"without much of it. Playing games allows us to: "
"relax, learn something new and stimulating, "
"interact with people on a different more "
"comfortable level, and to enjoy non-threatening "
"competition. For these reasons, adults should "
"place a higher priority on playing games in their "
"lives"
)
easy_text = (
"Anna and her family love doing puzzles. Anna is best at "
"little puzzles. Anna and her brother work on medium size "
"puzzles together. Anna's Brother likes puzzles with cars "
"in them. When the whole family does a puzzle, they do really "
"big puzzles. It can take them a week to finish a really "
"big puzzle. Last year they did a puzzle with 500 pieces! "
"Anna tries to finish one small puzzle a day by her. "
"Her puzzles have about 50 pieces. They all glue their "
"favorite puzzles together and frame them. The puzzles look "
"so nice on the wall."
)
long_spanish_text = (
"Muchos años después, frente al pelotón de fusilamiento,"
"el coronel Aureliano Buendía había de recordar aquella "
"tarde remota en que su padre lo llevó a conocer el hielo."
"Macondo era entonces una aldea de veinte casas de barro y"
"cañabrava construidas a la orilla de un río de aguas"
"diáfanas que se precipitaban por un lecho de piedras pulidas,"
"blancas y enormes como huevos prehistóricos. El mundo era tan"
"reciente, que muchas cosas carecían de nombre, y para mencionarlas"
"había que señalarlas con el dedo. Todos los años, por el mes de marzo,"
"una familia de gitanos desarrapados plantaba su carpa cerca"
"de la aldea, y con un grande alboroto de pitos y timbales daban a"
"conocer los nuevos inventos. Primero llevaron el imán."
"Un gitano corpulento, de barba montaraz y manos de gorrión, que se"
"presentó con el nombre de Melquíades, hizo una truculenta demostración"
"pública de lo que él mismo llamaba la octava maravilla de"
"los sabios alquimistas de Macedonia."
)
easy_spanish_text = "Hoy es un lindo día"
difficult_word = "Regardless"
easy_word = "Dog"
def test_char_count():
textstat.set_lang("en_US")
count = textstat.char_count(long_test)
count_spaces = textstat.char_count(
long_test, ignore_spaces=False
)
assert count == 1750
assert count_spaces == 2123
def test_letter_count():
textstat.set_lang("en_US")
count = textstat.letter_count(long_test)
count_spaces = textstat.letter_count(
long_test, ignore_spaces=False
)
assert count == 1688
assert count_spaces == 2061
def test_lexicon_count():
textstat.set_lang("en_US")
count = textstat.lexicon_count(long_test)
count_punc = textstat.lexicon_count(long_test, removepunct=False)
assert count == 372
assert count_punc == 376
def test_syllable_count():
textstat.set_lang("en_US")
count = textstat.syllable_count(long_test)
assert count == 521
def test_sentence_count():
textstat.set_lang("en_US")
count = textstat.sentence_count(long_test)
assert count == 16
def test_avg_sentence_length():
textstat.set_lang("en_US")
avg = textstat.avg_sentence_length(long_test)
assert avg == 23.3
def test_avg_syllables_per_word():
textstat.set_lang("en_US")
avg = textstat.avg_syllables_per_word(long_test)
assert avg == 1.4
def test_avg_letter_per_word():
textstat.set_lang("en_US")
avg = textstat.avg_letter_per_word(long_test)
assert avg == 4.54
def test_avg_sentence_per_word():
textstat.set_lang("en_US")
avg = textstat.avg_sentence_per_word(long_test)
assert avg == 0.04
def test_flesch_reading_ease():
textstat.set_lang("en_US")
score = textstat.flesch_reading_ease(long_test)
assert score == 64.75
textstat.set_lang("de_DE")
score = textstat.flesch_reading_ease(long_test)
assert score == 63.1
textstat.set_lang("es_ES")
score = textstat.flesch_reading_ease(long_test)
assert score == 84.37
textstat.set_lang("fr_FR")
score = textstat.flesch_reading_ease(long_test)
assert score == 80.31
textstat.set_lang("it_IT")
score = textstat.flesch_reading_ease(long_test)
assert score == 89.27
textstat.set_lang("nl_NL")
score = textstat.flesch_reading_ease(long_test)
assert score == 61.97
textstat.set_lang("ru_RU")
score = textstat.flesch_reading_ease(long_test)
assert score == 116.45
def test_flesch_kincaid_grade():
textstat.set_lang("en_US")
score = textstat.flesch_kincaid_grade(long_test)
assert score == 10.0
def test_polysyllabcount():
textstat.set_lang("en_US")
count = textstat.polysyllabcount(long_test)
assert count == 32
def test_smog_index():
textstat.set_lang("en_US")
index = textstat.smog_index(long_test)
assert index == 11.2
def test_coleman_liau_index():
textstat.set_lang("en_US")
index = textstat.coleman_liau_index(long_test)
assert index == 9.35
def test_automated_readability_index():
textstat.set_lang("en_US")
index = textstat.automated_readability_index(long_test)
assert index == 12.3
def test_linsear_write_formula():
textstat.set_lang("en_US")
result = textstat.linsear_write_formula(long_test)
assert result == 14.5
def test_difficult_words():
textstat.set_lang("en_US")
result = textstat.difficult_words(long_test)
assert result == 49
def test_difficult_words_list():
textstat.set_lang("en_US")
result = textstat.difficult_words_list(short_test)
assert result == ["sunglasses"]
def test_is_difficult_word():
textstat.set_lang("en_US")
result = textstat.is_difficult_word(difficult_word)
assert result is True
def test_is_easy_word():
textstat.set_lang("en_US")
result = textstat.is_easy_word(easy_word)
assert result is True
def test_dale_chall_readability_score():
textstat.set_lang("en_US")
score = textstat.dale_chall_readability_score(long_test)
assert score == 6.87
def test_gunning_fog():
textstat.set_lang("en_US")
score = textstat.gunning_fog(long_test)
assert score == 11.26
# FOG-PL
textstat.set_lang("pl_PL")
score_pl = textstat.gunning_fog(long_test)
assert score_pl == 10.40
def test_lix():
textstat.set_lang("en_US")
score = textstat.lix(long_test)
assert score == 45.11
def test_rix():
textstat.set_lang("en_US")
score = textstat.rix(long_test)
assert score == 5.13
def test_text_standard():
textstat.set_lang("en_US")
standard = textstat.text_standard(long_test)
assert standard == "9th and 10th grade"
standard = textstat.text_standard(short_test)
assert standard == "2nd and 3rd grade"
def test_reading_time():
textstat.set_lang("en_US")
score = textstat.reading_time(long_test)
assert score == 25.68
def test_lru_caching():
textstat.set_lang("en_US")
# Clear any cache
textstat.sentence_count.cache_clear()
textstat.avg_sentence_length.cache_clear()
# Make a call that uses `sentence_count`
textstat.avg_sentence_length(long_test)
# Test that `sentence_count` was called
assert textstat.sentence_count.cache_info().misses == 1
# Call `avg_sentence_length` again, but clear it's cache first
textstat.avg_sentence_length.cache_clear()
textstat.avg_sentence_length(long_test)
# Test that `sentence_count` wasn't called again
assert textstat.sentence_count.cache_info().hits == 1
def test_changing_lang_clears_cache():
textstat.set_lang("en_US")
# Clear any cache and call reading ease
textstat.flesch_reading_ease.cache_clear()
textstat.flesch_reading_ease(short_test)
# Check the cache has only been missed once
assert textstat.flesch_reading_ease.cache_info().misses == 1
# Change the language and recall reading ease
textstat.set_lang("fr")
textstat.flesch_reading_ease(short_test)
# Check the cache hasn't been hit again
assert textstat.flesch_reading_ease.cache_info().misses == 1
def test_unicode_support():
textstat.set_lang("en_US")
textstat.text_standard(
"\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059")
textstat.text_standard("ありがとうございます")
def test_spache_readability():
textstat.set_lang("en_US")
spache = textstat.spache_readability(easy_text, False)
assert spache == 2
def test_dale_chall_readability_score_v2():
textstat.set_lang("en_US")
score = textstat.dale_chall_readability_score_v2(long_test)
assert score == 6.87
def test_fernandez_huerta():
textstat.set_lang("es")
score = textstat.fernandez_huerta(long_spanish_text)
assert score == 43.1
def test_szigriszt_pazos():
textstat.set_lang("es")
score = textstat.szigriszt_pazos(long_spanish_text)
assert score == 41.45
def test_gutierrez_polini():
textstat.set_lang("es")
score = textstat.gutierrez_polini(easy_spanish_text)
assert score == 64.35
def test_crawford():
textstat.set_lang("es")
score = textstat.crawford(long_spanish_text)
assert score == 6.1
def test_default_lang_configs():
# Config from default en_US should be used
textstat.set_lang("en_GB")
score = textstat.flesch_reading_ease(long_test)
assert score == 64.75