Skip to content

Commit

Permalink
Merge pull request #57 from hyperion-ml/lachesis
Browse files Browse the repository at this point in the history
Lachesis
  • Loading branch information
jesus-villalba authored Jun 10, 2021
2 parents b8a0948 + fe2e1b8 commit 6e32c91
Show file tree
Hide file tree
Showing 1,106 changed files with 53,876 additions and 15,150 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,15 @@ tools/ibm_art/adversarial-robustness-toolbox/
tools/cudnn/cudnn-*
tools/kaldi/kaldi
tools/nccl/nccl_*
tools/path.sh

rirs_noises.zip
RIRS_NOISES
egs/*/*/exp
egs/*/*/data
kk
kk

egs/voxceleb/*/*.o*
egs/voxceleb/*/*.e*
egs/voxceleb/*/scores
egs/voxceleb/*/q
15 changes: 15 additions & 0 deletions egs/chime5_spkdet/v0/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export train_cmd="queue.pl --mem 4G -l hostname=\"[bc][01]*\""


File renamed without changes.
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/hyp_utils
File renamed without changes.
File renamed without changes.
48 changes: 48 additions & 0 deletions egs/chime5_spkdet/v0/local/calibrate_chime5_spkdet_v1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
# Copyright 2019 Johns Hopkins University (Jesus Villalba)
# Apache 2.0.
#
set -e

cmd=run.pl
prior=0.01

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;


if [ $# -ne 1 ]; then
echo "Usage: $0 <score-dir>"
exit 1;
fi

score_dir=$1

cal_score_dir=${score_dir}_cal_v1

mkdir -p $cal_score_dir

echo "$0 calibrate on chime5 close-talk"

model_file=$cal_score_dir/cal_chime5.h5
train_scores=$score_dir/chime5_spkdet_scores
train_key=data/chime5_spkdet_test/trials_BIN.SUM

$cmd $cal_score_dir/train_cal_chime5.log \
steps_be/train-calibration-v1.py --score-file $train_scores \
--key-file $train_key --model-file $model_file --prior $prior



echo "$0 eval calibration for all chime5 conditions"

scores_i=chime5_spkdet_scores
scores_in=$score_dir/$scores_i
scores_out=$cal_score_dir/$scores_i
ndx=data/chime5_spkdet_test/trials
$cmd $cal_score_dir/eval_cal_chime5.log \
steps_be/eval-calibration-v1.py --in-score-file $scores_in \
--ndx-file $ndx --model-file $model_file --out-score-file $scores_out



94 changes: 94 additions & 0 deletions egs/chime5_spkdet/v0/local/make_chime5_spkdet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/bash
# Copyright 2019 Johns Hopkins University (Jesus Villalba)
# Apache 2.0

if [ $# != 2 ]; then
echo "$0 <db-path> <output_path>"
exit 1
fi

input_path=$1
output_path=$2

echo "$0 making chime5 spkdet enroll"
enroll_segm=$input_path/dgr_chime5_enroll_segments
data_out=$output_path/chime5_spkdet_enroll
audio_dir=$input_path/enroll/BIN.SUM
mkdir -p $data_out

awk '{ split($2,f,"_"); s=f[1]; print $2,s }' \
$enroll_segm | sort -u > $data_out/utt2spk
utils/utt2spk_to_spk2utt.pl $data_out/utt2spk > $data_out/spk2utt

awk '{ print $1,"'$audio_dir'/"$1".wav" }' \
$data_out/utt2spk > $data_out/wav.scp

awk '{ print $0 }' $enroll_segm > $data_out/diarization_segments
awk '{ split($2,f,"_"); spk=f[1]; print "SPEAKER",$2,"1",$3,$4-$3,"<NA> <NA>",spk,"<NA> <NA>"}' $data_out/diarization_segments > $data_out/rttm

utils/fix_data_dir.sh $data_out

models=$data_out/spk2utt

echo "$0 making chime5 spkdet test"

test_segm=$input_path/dgr_chime5_test_segments
data_out=$output_path/chime5_spkdet_test
audio_dir=$input_path/test
mkdir -p $data_out
awk '!/P20_S07_.*_0024/ {
split($2,f,"_"); s=f[1]; print $2,s }' \
$test_segm | sort -u > $data_out/utt2spk
utils/utt2spk_to_spk2utt.pl $data_out/utt2spk > $data_out/spk2utt

awk '{ split($1,f,"_"); dir=f[3];
print $1,"'$audio_dir'/"dir"/"$1".wav" }' \
$data_out/utt2spk > $data_out/wav.scp

awk '!/P20_S07_.*_0024/ { print $0 }' $test_segm > $data_out/diarization_segments

awk -v fm=$models '
function merge_sessions(sess_id) {
#merge sessions with same spks
sub(/04/,"03", sess_id);
sub(/06/,"05", sess_id);
sub(/17/,"07", sess_id);
sub(/16/,"08", sess_id);
sub(/13/,"12", sess_id);
sub(/20/,"19", sess_id);
sub(/22/,"18", sess_id);
sub(/24/,"23", sess_id);
return sess_id
}
BEGIN{
n_models=0;
while(getline < fm)
{
split($2,f,"_"); sess=f[2];
sess=merge_sessions(sess);
v_mod[n_models]=$1;
v_sess[n_models]=sess;
n_models++;
}
}
{
split($1,f,"_"); spk=f[1]; sess=f[2];
sess=merge_sessions(sess);
for(i=0;i<n_models;i++)
{
if(spk==v_mod[i]){
print v_mod[i],$1,"target";
}
else{
if (sess!=v_sess[i])
print v_mod[i],$1,"nontarget";
}
}
}' $data_out/utt2spk | sort -k1,2 > $data_out/trials

for cond in BIN.SUM U01.CH1 U02.CH1 U04.CH1 U06.CH1
do
awk '$3=="nontarget" || $2 ~ /'$cond'/ ' $data_out/trials > $data_out/trials_$cond
done

utils/fix_data_dir.sh $data_out
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_musan.py
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_musan.sh
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_mx6.sh
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_sitw_train.sh
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_voxceleb1cat.pl
1 change: 1 addition & 0 deletions egs/chime5_spkdet/v0/local/make_voxceleb2cat.pl
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import print_function
from six.moves import xrange

import sys
import os
Expand All @@ -15,7 +12,7 @@

def write_vad(f, file_id, vad):
f.write('%s [ ' % (file_id))
for i in xrange(len(vad)):
for i in range(len(vad)):
f.write('%d ' % vad[i])
f.write(']\n')

Expand All @@ -40,12 +37,12 @@ def rttm2vad_file(file_id, rttm, num_frames, fvad, fu2o, fseg, min_dur):


total_dur = np.zeros((num_spks,), dtype=float)
for i in xrange(num_spks):
for i in range(num_spks):
idx = spk_ids == i
total_dur[i] = np.sum(rttm.tdur.loc[idx])

do_all = np.all(total_dur < min_dur)
for i in xrange(num_spks):
for i in range(num_spks):
if total_dur[i] >= min_dur or do_all:
vad = np.zeros((num_frames,), dtype=int)
idx = spk_ids == i
Expand All @@ -54,7 +51,7 @@ def rttm2vad_file(file_id, rttm, num_frames, fvad, fu2o, fseg, min_dur):
fbeg = np.round(tbeg/frame_shift).astype('int')
fend = np.round(tend/frame_shift).astype('int')
file_dir_id = '%s-d%03d' % (file_id, i)
for j in xrange(len(tbeg)):
for j in range(len(tbeg)):
vad[fbeg[j]:fend[j]+1] = 1
if fseg is not None:
fseg.write('%s-%03d %s %.3f %.3f %s\n' % (file_dir_id,j,file_id,tbeg[j],tend[j], file_dir_id))
Expand Down
32 changes: 32 additions & 0 deletions egs/chime5_spkdet/v0/local/score_chime5_spkdet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# Copyright 2019 Johns Hopkins University (Jesus Villalba)
# Apache 2.0.
#
if [ $# -ne 2 ]; then
echo "Usage: $0 <data-root> <score-dir>"
exit 1;
fi

set -e

data_dir=$1
score_dir=$2

# chime5 trials
trials=$data_dir/trials

echo "Chime5 global"
python local/score_dcf.py --key-file $trials --score-file $score_dir/chime5_spkdet_scores --output-path $score_dir/chime5_spkdet &

for cond in BIN.SUM U01.CH1 U02.CH1 U04.CH1 U06.CH1
do
echo "Chime5 $cond"
key=${trials}_$cond

#Compute performance
python local/score_dcf.py --key-file $key --score-file $score_dir/chime5_spkdet_scores --output-path $score_dir/chime5_spkdet_${cond} &
done
wait



67 changes: 67 additions & 0 deletions egs/chime5_spkdet/v0/local/score_dcf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python
"""
Copyright 2018 Johns Hopkins University (Author: Jesus Villalba)
Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
"""

import sys
import os
import argparse
import time
import logging

import numpy as np

from hyperion.hyp_defs import float_cpu, config_logger
from hyperion.utils.trial_scores import TrialScores
from hyperion.utils.trial_key import TrialKey
from hyperion.metrics import fast_eval_dcf_eer as fast_eval


def score_dcf(key_file, score_file, output_path):

logging.info('Load key: %s' % key_file)
key = TrialKey.load_txt(key_file)
logging.info('Load scores: %s' % score_file)
scr = TrialScores.load_txt(score_file)
tar, non = scr.get_tar_non(key)

priors = np.array([0.001, 0.005, 0.01, 0.05 ])
min_dcf, act_dcf, eer, _ = fast_eval(tar, non, priors)

output_dir = os.path.dirname(output_path)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)

output_file = output_path + '_results'
with open(output_file, 'w') as f:
s = 'EER: {0:.2f} DCF5e-2: {1:.3f} / {2:.3f} DCF1e-2: {3:.3f} / {4:.3f} DCF5e-3: {5:.3f} / {6:.3f} DCF1e-3: {7:.3f} / {8:.3f}'.format(
eer * 100, min_dcf[3], act_dcf[3],
min_dcf[2], act_dcf[2],
min_dcf[1], act_dcf[1],
min_dcf[0], act_dcf[0])
f.write(s)
logging.info(s)


if __name__ == "__main__":

parser=argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
fromfile_prefix_chars='@',
description='Computes EER and DCF')

parser.add_argument('--key-file', dest='key_file', required=True)
parser.add_argument('--score-file', dest='score_file', required=True)
parser.add_argument('--output-path', dest='output_path', required=True)
parser.add_argument('-v', '--verbose', dest='verbose', default=1,
choices=[0, 1, 2, 3], type=int)

args=parser.parse_args()
config_logger(args.verbose)
del args.verbose
logging.debug(args)

score_dcf(**vars(args))


33 changes: 33 additions & 0 deletions egs/chime5_spkdet/v0/path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

export HYP_ROOT=$(readlink -f `pwd -P`/../../..)
export TOOLS_ROOT=$HYP_ROOT/tools

export KALDI_ROOT=$TOOLS_ROOT/kaldi/kaldi
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C

LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH
if [ ! -d /usr/local/cuda/lib64 ]; then
LD_LIBRARY_PATH=$HOME/usr/local/cuda/lib64:$LD_LIBRARY_PATH
fi

export MPLBACKEND="agg"
export PATH=$HYP_ROOT/hyperion/bin:/usr/local/cuda/bin:$PATH
export PYTHONPATH=$HYP_ROOT:$PYTHONPATH
export LD_LIBRARY_PATH
export LC_ALL=C

wait_file() {
local file="$1"; shift
local wait_seconds="${2:-30}"; shift # 10 seconds as default timeout
for((i=0; i<$wait_seconds; i++)); do
[ -f $file ] && return 1
sleep 1s
done
return 0
}

export -f wait_file
Loading

0 comments on commit 6e32c91

Please sign in to comment.