-
Notifications
You must be signed in to change notification settings - Fork 0
/
texts_details.py
executable file
·95 lines (94 loc) · 3.32 KB
/
texts_details.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Basic info about the text that should be processed
input_texts_details = [
{
"input_filename": "ludove_rozpravky.txt",
"full_name": "Pavol Dobsinsky - Ludove rozpravky",
"output_prefix_name": "dobsinsky_ludove_rozpravky",
"language": "sk"
},
{
"input_filename": "krasa_obycajnosti.txt",
"full_name": "Palea Ulla - Krasa Obycajnosti",
"output_prefix_name": "ulla_krasa_obycajnosti",
"language": "sk"
},
{
"input_filename": "essentia_mortem.txt",
"full_name": "Peter Stec - Esentia Mortem",
"output_prefix_name": "stec_essentia_mortem",
"language": "sk"
},
{
"input_filename": "uvod_do_klinickej_psychologie.txt",
"full_name": "Monika Hricova - Uvod do klinickej psychologie",
"output_prefix_name": "hricova_uvod_do_klinickej_psychologie",
"language": "sk"
},
{
"input_filename": "deti_vyhorlatu.txt",
"full_name": "Brandon McYntire - Deti Vyhorlatu",
"output_prefix_name": "mcyntire_deti_vyhorlatu",
"language": "sk"
},
{
"input_filename": "vedecka_komunikacia_manazment_vyskumnych_udajov.txt",
"full_name": "Vedecka komunikacia a manazment vyskumnych udajov",
"output_prefix_name": "vedecka_komunikacia_manazment_vyskumnych_udajov",
"language": "sk"
},
{
"input_filename": "never_the_twain_shall_meet.txt",
"full_name": "Peter B. Kyne - Never the twain shall meet",
"output_prefix_name": "never_the_twain_shall_meet",
"language": "en"
},
{
"input_filename": "on_the_origin_of_species.txt",
"full_name": "Charles Darwin - On the origin of species",
"output_prefix_name": "on_the_origin_of_species",
"language": "en"
},
{
"input_filename": "together.txt",
"full_name": "Norman Douglas - Together",
"output_prefix_name": "together",
"language": "en"
},
{
"input_filename": "oliver_october.txt",
"full_name": "George Barr McCutcheon - Oliver October",
"output_prefix_name": "oliver_october",
"language": "en"
},
{
"input_filename": "corpus_slk.txt",
"full_name": "Korpus textov Ministerstva kultúry Slovenskej republiky",
"output_prefix_name": "corpus_slk",
"language": "sk"
},
{
"input_filename": "collection_historicky_casopis.txt",
"full_name": "Kolekcia volnepristupnych clankov z castopisu Historicky casopis",
"output_prefix_name": "collection_historicky_casopis",
"language": "sk"
},
{
"input_filename": "collection_UPJS_free_books.txt",
"full_name": "Kolekcia volnepristupnych knih z kniznice UPJS.",
"output_prefix_name": "collection_UPJS_free_books",
"language": "sk"
},
# this is not free book, so though I own it and analysed it, I can't push any part of it to github
# {
# "input_filename": "obycajne_zazraky.txt",
# "full_name": "Samuel Kovacik - Obycajne Zazraky.",
# "output_prefix_name": "obycajne_zazraky",
# "language": "sk"
# },
{
"input_filename": "dom_v_strani.txt",
"full_name": "M. Kukucin - Dom v strani",
"output_prefix_name": "dom_v_strani",
"language": "sk"
},
]