Skip to content

Commit

Permalink
Merge branch 'perseus' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
jesus-villalba authored Apr 27, 2021
2 parents 9a3bc05 + 9346f04 commit b8a0948
Show file tree
Hide file tree
Showing 51 changed files with 3,010 additions and 25 deletions.
20 changes: 20 additions & 0 deletions egs/sre20-cts/v1/conf/linfbank64_stmn_8k.pyconf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--feats-audio-feat
logfb
--feats-sample-frequency
8000
--feats-frame-length
25
--feats-fb-type
linear
--feats-low-freq
20
--feats-high-freq
3700
--feats-num-filters
64
--feats-snip-edges
false
--feats-use-energy
false
--mvn-context
150
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@ nnet=$nnet_dir/model_ep0060.pth
ft_batch_size_1gpu=4
ft_eff_batch_size=128 # effective batch size
ft_min_chunk=10
ft_max_chunk=60
ft_max_chunk=10
ft_ipe=1
ft_lr=0.05
ft_nnet_num_epochs=21
ft_nnet_num_epochs=30
ft_margin=0.3
ft_margin_warmup=3

ft_opt_opt="--opt-optimizer sgd --opt-lr $ft_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v2
ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
ft_nnet=$ft_nnet_dir/model_ep0020.pth
ft_nnet=$ft_nnet_dir/model_ep0030.pth


# xvector last-layer finetuning alllangs
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Res2Net50 x-vector with mixed precision training

# acoustic features
feat_config=conf/fbank64_mvn_8k.pyconf
feat_type=fbank64_stmn


# x-vector training
nnet_data=alllangs_nocv_nocnceleb
nnet_num_augs=4
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yml --val-aug-cfg conf/reverb_noise_aug.yml"

batch_size_1gpu=8
eff_batch_size=512 # effective batch size
ipe=$nnet_num_augs
min_chunk=4
max_chunk=4
lr=0.01

nnet_type=res2net50
dropout=0
embed_dim=256
width_factor=1.625
scale=4
ws_tag=w26s4

s=30
margin_warmup=20
margin=0.3

nnet_opt="--resnet-type $nnet_type --in-feats 64 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --norm-layer instance-norm-affine --head-norm-layer layer-norm --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"

opt_opt="--opt-optimizer adam --opt-lr $lr --opt-beta1 0.9 --opt-beta2 0.95 --opt-weight-decay 1e-5 --opt-amsgrad" # --use-amp"
lrs_opt="--lrsch-lrsch-type exp_lr --lrsch-decay-rate 0.5 --lrsch-decay-steps 10000 --lrsch-hold-steps 40000 --lrsch-min-lr 1e-5 --lrsch-warmup-steps 1000 --lrsch-update-lr-on-opt-step"

nnet_name=${feat_type}_${nnet_type}${ws_tag}_eina_hln_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
nnet_num_epochs=50
nnet_dir=exp/xvector_nnets/$nnet_name
nnet=$nnet_dir/model_ep0050.pth


# xvector full net finetuning with out-of-domain
ft_batch_size_1gpu=4
ft_eff_batch_size=128 # effective batch size
ft_min_chunk=10
ft_max_chunk=20
ft_ipe=1
ft_lr=0.05
ft_nnet_num_epochs=21
ft_nnet_num_epochs=45
ft_margin=0.3
ft_margin_warmup=3

ft_opt_opt="--opt-optimizer sgd --opt-lr $ft_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v2
ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
ft_nnet=$ft_nnet_dir/model_ep0014.pth


# xvector last-layer finetuning realtel
reg_layers_classif=0
reg_layers_enc="0 1 2 3 4"
nnet_adapt_data=realtel
ft2_batch_size_1gpu=16
ft2_eff_batch_size=128 # effective batch size
ft2_ipe=1
ft2_lr=0.01
ft2_nnet_num_epochs=35
ft2_margin_warmup=3
ft2_reg_weight_embed=0.1
ft2_min_chunk=10
ft2_max_chunk=60

ft2_opt_opt="--opt-optimizer sgd --opt-lr $ft2_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft2_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft2_nnet_name=${ft_nnet_name}.ft_eaffine_rege_w${ft2_reg_weight_embed}_${ft2_min_chunk}_${ft2_max_chunk}_sgdcos_lr${ft2_lr}_b${ft2_eff_batch_size}_amp.v2.$nnet_adapt_data
ft2_nnet_dir=exp/xvector_nnets/$ft2_nnet_name
ft2_nnet=$ft2_nnet_dir/model_ep0015.pth


# xvector full nnet finetuning
ft3_batch_size_1gpu=2
ft3_eff_batch_size=128 # effective batch size
ft3_ipe=1
ft3_lr=0.01
ft3_nnet_num_epochs=10
ft3_margin_warmup=20
ft3_reg_weight_embed=0.1
ft3_reg_weight_enc=0.1
ft3_min_chunk=10
ft3_max_chunk=60

ft3_opt_opt="--opt-optimizer sgd --opt-lr $ft3_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft3_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft3_nnet_name=${ft2_nnet_name}.ft_reg_wenc${ft3_reg_weight_enc}_we${ft3_reg_weigth_embed}_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_name=${ft2_nnet_name}.ft_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_dir=exp/xvector_nnets/$ft3_nnet_name
ft3_nnet=$ft3_nnet_dir/model_ep0010.pth


# back-end
plda_aug_config=conf/noise_aug.yml
plda_num_augs=0
# if [ $plda_num_augs -eq 0 ]; then
# plda_data=sre_tel
# plda_adapt_data=sre18_cmn2_adapt_lab
# else
# plda_data=sre_tel_augx${plda_num_augs}
# plda_adapt_data=sre18_cmn2_adapt_lab_augx${plda_num_augs}
# fi
# plda_type=splda
# lda_dim=200
# plda_y_dim=150
# plda_z_dim=200

Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# ResNet34 x-vector with mixed precision training

# acoustic features
feat_config=conf/fbank64_mvn_8k.pyconf
feat_type=fbank64_stmn


# x-vector training
nnet_data=alllangs_nocv_nocnceleb
nnet_num_augs=4
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yml --val-aug-cfg conf/reverb_noise_aug.yml"

batch_size_1gpu=32
eff_batch_size=512 # effective batch size
ipe=$nnet_num_augs
min_chunk=4
max_chunk=4
lr=0.01

nnet_type=resnet34
dropout=0
embed_dim=256

s=30
margin_warmup=20
margin=0.3

nnet_opt="--resnet-type $nnet_type --in-feats 64 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --norm-layer instance-norm-affine --head-norm-layer layer-norm --pool-type scaled-dot-prod-att-v1 --pool-num-heads 64 --pool-d-k 128 --pool-d-v 128 --pool-bin-attn"

opt_opt="--opt-optimizer adam --opt-lr $lr --opt-beta1 0.9 --opt-beta2 0.95 --opt-weight-decay 1e-5 --opt-amsgrad" # --use-amp"
lrs_opt="--lrsch-lrsch-type exp_lr --lrsch-decay-rate 0.5 --lrsch-decay-steps 10000 --lrsch-hold-steps 40000 --lrsch-min-lr 1e-5 --lrsch-warmup-steps 1000 --lrsch-update-lr-on-opt-step"

nnet_name=${feat_type}_${nnet_type}_eina_hln_bmhah64d8192_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
nnet_num_epochs=50
nnet_dir=exp/xvector_nnets/$nnet_name
nnet=$nnet_dir/model_ep0050.pth


# xvector full net finetuning with out-of-domain
ft_batch_size_1gpu=4
ft_eff_batch_size=128 # effective batch size
ft_min_chunk=10
ft_max_chunk=60
ft_ipe=1
ft_lr=0.05
ft_nnet_num_epochs=21
ft_margin=0.3
ft_margin_warmup=3

ft_opt_opt="--opt-optimizer sgd --opt-lr $ft_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v2
ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
ft_nnet=$ft_nnet_dir/model_ep0021.pth


# xvector last-layer finetuning realtel
reg_layers_classif=0
reg_layers_enc="0 1 2 3 4"
nnet_adapt_data=realtel
ft2_batch_size_1gpu=16
ft2_eff_batch_size=128 # effective batch size
ft2_ipe=1
ft2_lr=0.01
ft2_nnet_num_epochs=35
ft2_margin_warmup=3
ft2_reg_weight_embed=0.1
ft2_min_chunk=10
ft2_max_chunk=60

ft2_opt_opt="--opt-optimizer sgd --opt-lr $ft2_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft2_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft2_nnet_name=${ft_nnet_name}.ft_eaffine_rege_w${ft2_reg_weight_embed}_${ft2_min_chunk}_${ft2_max_chunk}_sgdcos_lr${ft2_lr}_b${ft2_eff_batch_size}_amp.v2.$nnet_adapt_data
ft2_nnet_dir=exp/xvector_nnets/$ft2_nnet_name
ft2_nnet=$ft2_nnet_dir/model_ep0015.pth


# xvector full nnet finetuning
ft3_batch_size_1gpu=2
ft3_eff_batch_size=128 # effective batch size
ft3_ipe=1
ft3_lr=0.01
ft3_nnet_num_epochs=10
ft3_margin_warmup=20
ft3_reg_weight_embed=0.1
ft3_reg_weight_enc=0.1
ft3_min_chunk=10
ft3_max_chunk=60

ft3_opt_opt="--opt-optimizer sgd --opt-lr $ft3_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft3_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft3_nnet_name=${ft2_nnet_name}.ft_reg_wenc${ft3_reg_weight_enc}_we${ft3_reg_weigth_embed}_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_name=${ft2_nnet_name}.ft_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_dir=exp/xvector_nnets/$ft3_nnet_name
ft3_nnet=$ft3_nnet_dir/model_ep0010.pth


# back-end
plda_aug_config=conf/noise_aug.yml
plda_num_augs=0
# if [ $plda_num_augs -eq 0 ]; then
# plda_data=sre_tel
# plda_adapt_data=sre18_cmn2_adapt_lab
# else
# plda_data=sre_tel_augx${plda_num_augs}
# plda_adapt_data=sre18_cmn2_adapt_lab_augx${plda_num_augs}
# fi
# plda_type=splda
# lda_dim=200
# plda_y_dim=150
# plda_z_dim=200

Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# ResNet34 x-vector with mixed precision training

# acoustic features
feat_config=conf/fbank64_mvn_8k.pyconf
feat_type=fbank64_stmn


# x-vector training
nnet_data=alllangs_nocv_nocnceleb
nnet_num_augs=4
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yml --val-aug-cfg conf/reverb_noise_aug.yml"

batch_size_1gpu=32
eff_batch_size=512 # effective batch size
ipe=$nnet_num_augs
min_chunk=4
max_chunk=4
lr=0.01

nnet_type=resnet34
dropout=0
embed_dim=256

s=30
margin_warmup=20
margin=0.3

attstats_inner=128

nnet_opt="--resnet-type $nnet_type --in-feats 64 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --norm-layer instance-norm-affine --head-norm-layer layer-norm --pool-type ch-wise-att-mean-stddev --pool-inner-feats $attstats_inner"

opt_opt="--opt-optimizer adam --opt-lr $lr --opt-beta1 0.9 --opt-beta2 0.95 --opt-weight-decay 1e-5 --opt-amsgrad" # --use-amp"
lrs_opt="--lrsch-lrsch-type exp_lr --lrsch-decay-rate 0.5 --lrsch-decay-steps 10000 --lrsch-hold-steps 40000 --lrsch-min-lr 1e-5 --lrsch-warmup-steps 1000 --lrsch-update-lr-on-opt-step"

nnet_name=${feat_type}_${nnet_type}_eina_hln_chattstatsi128_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp.v1.$nnet_data
nnet_num_epochs=50
nnet_dir=exp/xvector_nnets/$nnet_name
nnet=$nnet_dir/model_ep0050.pth


# xvector full net finetuning with out-of-domain
ft_batch_size_1gpu=4
ft_eff_batch_size=128 # effective batch size
ft_min_chunk=10
ft_max_chunk=60
ft_ipe=1
ft_lr=0.05
ft_nnet_num_epochs=21
ft_margin=0.3
ft_margin_warmup=3

ft_opt_opt="--opt-optimizer sgd --opt-lr $ft_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft_nnet_name=${nnet_name}.ft_${ft_min_chunk}_${ft_max_chunk}_arcm${ft_margin}_sgdcos_lr${ft_lr}_b${ft_eff_batch_size}_amp.v2
ft_nnet_dir=exp/xvector_nnets/$ft_nnet_name
ft_nnet=$ft_nnet_dir/model_ep0021.pth


# xvector last-layer finetuning realtel
reg_layers_classif=0
reg_layers_enc="0 1 2 3 4"
nnet_adapt_data=realtel
ft2_batch_size_1gpu=16
ft2_eff_batch_size=128 # effective batch size
ft2_ipe=1
ft2_lr=0.01
ft2_nnet_num_epochs=35
ft2_margin_warmup=3
ft2_reg_weight_embed=0.1
ft2_min_chunk=10
ft2_max_chunk=60

ft2_opt_opt="--opt-optimizer sgd --opt-lr $ft2_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft2_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft2_nnet_name=${ft_nnet_name}.ft_eaffine_rege_w${ft2_reg_weight_embed}_${ft2_min_chunk}_${ft2_max_chunk}_sgdcos_lr${ft2_lr}_b${ft2_eff_batch_size}_amp.v2.$nnet_adapt_data
ft2_nnet_dir=exp/xvector_nnets/$ft2_nnet_name
ft2_nnet=$ft2_nnet_dir/model_ep0015.pth


# xvector full nnet finetuning
ft3_batch_size_1gpu=2
ft3_eff_batch_size=128 # effective batch size
ft3_ipe=1
ft3_lr=0.01
ft3_nnet_num_epochs=10
ft3_margin_warmup=20
ft3_reg_weight_embed=0.1
ft3_reg_weight_enc=0.1
ft3_min_chunk=10
ft3_max_chunk=60

ft3_opt_opt="--opt-optimizer sgd --opt-lr $ft3_lr --opt-momentum 0.9 --opt-weight-decay 1e-5 --use-amp --var-batch-size"
ft3_lrs_opt="--lrsch-lrsch-type cos_lr --lrsch-t 2500 --lrsch-t-mul 2 --lrsch-warm-restarts --lrsch-gamma 0.75 --lrsch-min-lr 1e-4 --lrsch-warmup-steps 100 --lrsch-update-lr-on-opt-step"
ft3_nnet_name=${ft2_nnet_name}.ft_reg_wenc${ft3_reg_weight_enc}_we${ft3_reg_weigth_embed}_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_name=${ft2_nnet_name}.ft_${ft3_min_chunk}_${ft3_max_chunk}_sgdcos_lr${ft3_lr}_b${ft3_eff_batch_size}_amp.v2
ft3_nnet_dir=exp/xvector_nnets/$ft3_nnet_name
ft3_nnet=$ft3_nnet_dir/model_ep0010.pth


# back-end
plda_aug_config=conf/noise_aug.yml
plda_num_augs=0
# if [ $plda_num_augs -eq 0 ]; then
# plda_data=sre_tel
# plda_adapt_data=sre18_cmn2_adapt_lab
# else
# plda_data=sre_tel_augx${plda_num_augs}
# plda_adapt_data=sre18_cmn2_adapt_lab_augx${plda_num_augs}
# fi
# plda_type=splda
# lda_dim=200
# plda_y_dim=150
# plda_z_dim=200

Loading

0 comments on commit b8a0948

Please sign in to comment.