Skip to content

Commit

Permalink
Merge pull request #61 from hyperion-ml/lachesis
Browse files Browse the repository at this point in the history
Lachesis
  • Loading branch information
jesus-villalba authored Jun 25, 2021
2 parents fb3af32 + 6a2678f commit c77f0ef
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 14 deletions.
4 changes: 2 additions & 2 deletions egs/chime5_spkdet/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/sre18/v1.8k/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/sre19-cmn2/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/voices_challenge/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
4 changes: 2 additions & 2 deletions egs/voxceleb/adv.v2/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
10 changes: 9 additions & 1 deletion egs/voxceleb/v1.1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 0.96 | 0.065 | 0.110 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.04 | 0.071 | 0.118 |
| | | | Cosine | 0.93 | 0.067 | 0.108 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 0.90 | 0.067 | 0.118 |
| | | | Cosine | 0.85 | 0.060 | 0.094 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.44 | 0.102 | 0.169 |
| | | | Cosine | 1.29 | 0.084 | 0.140 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.071 | 0.116 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.074 | 0.116 |
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.09 | 0.081 | 0.150 |


### VoxCeleb 1 Entire-Clean trial list
Expand Down Expand Up @@ -153,10 +156,13 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 1.05 | 0.069 | 0.121 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 1.18 | 0.075 | 0.131 |
| | | | Cosine | 0.98 | 0.063 | 0.110 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp_swa.v1.sh | Res2Net50 width=26x8 | + SWA | PLDA | 1.17 | 0.072 | 0.123 |
| | | | Cosine | 0.94 | 0.061 | 0.107 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 1.56 | 0.095 | 0.166 |
| | | | Cosine | 1.27 | 0.079 | 0.142 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 1.19 | 0.077 | 0.137 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.12 | 0.073 | 0.129 |
| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | TSE-Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 1.05 | 0.068 | 0.120 |


### VoxCeleb 1 Hard-Clean trial list
Expand Down Expand Up @@ -190,8 +196,10 @@ run_040_eval_be.sh --config-file config_fbank80_stmn_resnet34_arcs30m0.3_adam_lr
| | | | Cosine | 1.99 | 0.119 | 0.196 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1.sh | Res2Net50 width=26x8 | ArcFace s=30/m=0.3 | PLDA | 2.18 | 0.127 | 0.211 |
| | | | Cosine | 1.89 | 0.112 | 0.184 |
| config_fbank80_stmn_res2net50w26s8_arcs30m0.3_adam_lr0.05_amp.v1_swa.sh | Res2Net50 width=26x8 | + SWA | PLDA | 2.14 | 0.125 | 0.209 |
| | | | Cosine | 1.84 | 0.110 | 0.186 |
| config_fbank80_stmn_spinenet49s_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49S | ArcFace s=30/m=0.3 | PLDA | 2.78 | 0.156 | 0.252 |
| | | | Cosine | 2.26 | 0.134 | 0.214 |
| config_fbank80_stmn_spinenet49_arcs30m0.3_adam_lr0.05_amp.v1.sh | SpineNet49 | ArcFace s=30/m=0.3 | Cosine | 2.24 | 0.134 | 0.221 |
| config_fbank80_stmn_spine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.20 | 0.132 | 0.219 |

| config_fbank80_stmn_tsespine2net49_arcs30m0.3_adam_lr0.05_amp.v1.sh | Spine2Net49 | ArcFace s=30/m=0.3 | Cosine | 2.02 | 0.123 | 0.203 |
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Res2Net50 w26s8 x-vector with mixed precision training

# acoustic features
feat_config=conf/fbank80_stmn_16k.yaml
feat_type=fbank80_stmn

#vad
vad_config=conf/vad_16k.yaml

# x-vector training
nnet_data=voxceleb2cat_train
nnet_num_augs=6
aug_opt="--train-aug-cfg conf/reverb_noise_aug.yaml --val-aug-cfg conf/reverb_noise_aug.yaml"

batch_size_1gpu=24
eff_batch_size=512 # effective batch size
ipe=$nnet_num_augs
min_chunk=4
max_chunk=4
lr=0.05

nnet_type=res2net50
dropout=0
embed_dim=256
width_factor=3.25
scale=8
ws_tag=w26s8

s=30
margin_warmup=20
margin=0.3

nnet_opt="--resnet-type $nnet_type --in-feats 80 --in-channels 1 --in-kernel-size 3 --in-stride 1 --no-maxpool --res2net-width-factor $width_factor --res2net-scale $scale"

opt_opt="--optim.opt-type adam --optim.lr $lr --optim.beta1 0.9 --optim.beta2 0.95 --optim.weight-decay 1e-5 --optim.amsgrad --use-amp --swa-start 70 --swa-lr 1e-3 --swa-anneal-epochs 5"
lrs_opt="--lrsched.lrsch-type exp_lr --lrsched.decay-rate 0.5 --lrsched.decay-steps 8000 --lrsched.hold-steps 40000 --lrsched.min-lr 1e-5 --lrsched.warmup-steps 1000 --lrsched.update-lr-on-opt-step"

nnet_name=${feat_type}_${nnet_type}${ws_tag}_e${embed_dim}_arcs${s}m${margin}_do${dropout}_adam_lr${lr}_b${eff_batch_size}_amp_swa.v1
nnet_num_epochs=90
nnet_dir=exp/xvector_nnets/$nnet_name
nnet=$nnet_dir/swa_model_ep0091.pth


# back-end
plda_aug_config=conf/reverb_noise_aug.yaml
plda_num_augs=6
if [ $plda_num_augs -eq 0 ]; then
plda_data=voxceleb2cat_train
else
plda_data=voxceleb2cat_train_augx${plda_num_augs}
fi
plda_type=splda
lda_dim=200
plda_y_dim=150
plda_z_dim=200

4 changes: 2 additions & 2 deletions egs/voxceleb/v1/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@

foreach (@rec_dirs) {
my $rec_id = $_;
my $file_list = "$out_dir/lists_cat/$rec_id.txt";
my $utt_id = "$spkr_id-$rec_id";
my $file_list = "$out_dir/lists_cat/$utt_id.txt";
if (system("find $dataset_path/$spkr_id/$rec_id -name \"*.m4a\" -printf \"file %p\\n\" > $file_list") != 0){
die "Error creating $file_list";
}
my $wav = "ffmpeg -v 8 -f concat -safe 0 -i $file_list -f wav -acodec pcm_s16le -|";
if($fs == 8){
$wav = $wav." sox -t wav - -t wav -r 8k - |"
}
my $utt_id = "$spkr_id-$rec_id";
print WAV "$utt_id", " $wav", "\n";
print SPKR "$utt_id", " $spkr_id", "\n";
}
Expand Down
2 changes: 1 addition & 1 deletion hyp_utils/xvectors/extract_xvectors_from_wav.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ if [ $stage -le 0 ];then
--part-idx JOB --num-parts $nj \
--input $data_dir/wav.scp \
--model-path $nnet_file --chunk-length $chunk_length \
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp || exit 1;
--output ark,scp:$output_dir/xvector.JOB.ark,$output_dir/xvector.JOB.scp
set -e
fi

Expand Down

0 comments on commit c77f0ef

Please sign in to comment.