-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yaml
99 lines (84 loc) · 1.68 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# WandB.
wandb_config:
WandB: false # Whether to run this is WandB platform.
project_name: 'Online IPV Classification'
entity: 'sagun-shakya'
run_name: "nepsa_non_concat"
# DATA
data_file: ./data/raw
data_path: ./data/text_classification
shuffle: true
kfold: 5
verbose: true
data_filename: ipv
cache_dir: ./cache_dir
# EMBEDDINGS
pretrained: false
emb_dir: ./embeddings
emb_file: model_fasttext_300d__NNC.bin
embedding_dim: 300
freeze_embedding: true
# OUTPUT_DIR
output_dir: ./saved_model_dir
results_dir: ./results
log_dir: ./logs
# TRAIN
batch_size: 8
epochs: 80
log_interval: 100
# OPTIM
learning_rate: 1e-1
weight_decay: 0.00001
momentum: 0.0
clip_max_norm_use: False
clip_max_norm: None
use_lr_decay: True
lr_rate_decay: noam_step
learning_rate_warmup_steps: 100
min_lrate: 0.000005
early_max_patience: 8
# MODELs.
rnn:
bidirection: true
num_layers: 2
hidden_dim: 256
dropout_embed: 0.0
dropout: 0.1
cnn:
num_filters: 100
filter_sizes: 2,3,4
dropout_embed: 0.0
dropout: 0.5
bert:
model_name: 'google/muril-base-cased' # Choices : ['google/muril-base-cased', 'bert-base-multilingual-cased']
unfreeze: false
# Vectorizer.
vectorizer:
mode: tfidf
max_features: 1000
# SVM.
svm:
kernel: rbf
C: 10.0
# Multinomial-NB.
nb:
alpha: 1.0
# Random Forest.
random_forest:
n_estimators: 100
criterion: entropy
min_samples_split: 3
min_samples_leaf: 5
random_state: 1000
# Logistic Regression.
logistic_regression:
C: 0.11
max_iter: 100
# AdaBoost Classifier.
adaboost:
n_estimators: 150
learning_rate: 0.01
random_state: 100
# EVALUATION
average: weighted
auc_multiclass: ovr