-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yml
executable file
·69 lines (67 loc) · 2.62 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
word_embeddings:
# Two types of word embedding algorithm (word2vec and glove) are supported.
# Just set the default to empty string to disable the word embeddings
default: [glove, word2vec]
word2vec:
path: /home/xxy/Desktop/cnn-EA/data/input/word_embeddings/GoogleNews-vectors-negative300.bin
dimension: 300
binary: True
glove:
path: /home/xxy/Desktop/cnn-EA/data/input/word_embeddings/glove.6B.300d.txt
dimension: 300
length: 400000
datasets:
# Support currently 3 datasets: mrpolarity, 20newsgroup and localdata
default: [20newsgroup, subjectivity]
mrpolarity:
positive_data_file:
path: "/home/xxy/Desktop/cnn-EA/data/rt-polaritydata/rt-polarity.pos"
info: "Data source for the positive data"
negative_data_file:
path: "/home/xxy/Desktop/cnn-EA/data/rt-polaritydata/rt-polarity.neg"
info: "Data source for the negative data"
subjectivity:
subjective:
path: "/home/xxy/Desktop/cnn-EA/data/rotten_imdb/quote.tok.gt9.5000"
info: "Data source for the subjective sentences"
objective:
path: "/home/xxy/Desktop/cnn-EA/data/rotten_imdb/plot.tok.gt9.5000"
info: "Data source for the objective sentences"
book_review:
positive_data_file:
path: "/home/xxy/Desktop/cnn-EA/data/book_review/data_en.pos"
info:
negative_data_file:
path: "/home/xxy/Desktop/cnn-EA/data/book_review/data_en.neg"
20newsgroup:
# The dataset includes following 20 newsgroups:
# alt.atheism, comp.windows.x, rec.sport.hockey, soc.religion.christian
# comp.graphics, misc.forsale, sci.crypt, talk.politics.guns
# comp.os.ms-windows.misc, rec.autos, sci.electronics, talk.politics.mideast
# comp.sys.ibm.pc.hardware, rec.motorcycles, sci.med, talk.politics.misc
# comp.sys.mac.hardware, rec.sport.baseball, sci.space, talk.religion.misc
categories:
- alt.atheism
- comp.graphics
- sci.med
- soc.religion.christian
shuffle: True
random_state: 42
localdata:
# Load text files with categories as subfolder names.
# Individual samples are assumed to be files stored
# a two levels folder structure such as the following:
# container_folder/
# category_1_folder/
# file_1.txt file_2.txt ... file_42.txt
# category_2_folder/
# file_43.txt file_44.txt ...
#
# As an example, a SentenceCorpus dataset from
# https://archive.ics.uci.edu/ml/datasets/Sentence+Classification
# has been used. The dataset includes following 3 domains:
# arxiv, jdm and plos
container_path: ../../data/input/SentenceCorpus
categories:
shuffle: True
random_state: 42