-
Notifications
You must be signed in to change notification settings - Fork 46
/
main.m
93 lines (82 loc) · 6.21 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Sample program for blind source separation using independent low-rank %
% matrix analysis (ILRMA) %
% %
% Coded by D. Kitamura (d-kitamura@ieee.org) %
% %
% Copyright 2020 Daichi Kitamura %
% %
% These programs are distributed only for academic research at %
% universities and research institutions. %
% It is not allowed to use or modify these programs for commercial or %
% industrial purpose without our permission. %
% When you use or modify these programs and write research articles, %
% cite the following references: %
% %
% # Original paper (The algorithm was called "Rank-1 MNMF" in this paper) %
% D. Kitamura, N. Ono, H. Sawada, H. Kameoka, H. Saruwatari, "Determined %
% blind source separation unifying independent vector analysis and %
% nonnegative matrix factorization," IEEE/ACM Trans. ASLP, vol. 24, %
% no. 9, pp. 1626-1641, September 2016. %
% %
% # Book chapter (The algorithm was renamed as "ILRMA") %
% D. Kitamura, N. Ono, H. Sawada, H. Kameoka, H. Saruwatari, "Determined %
% blind source separation with independent low-rank matrix analysis," %
% Audio Source Separation. Signals and Communication Technology., %
% S. Makino, Ed. Springer, Cham, pp. 125-155, March 2018. %
% %
% See also: %
% http://d-kitamura.net %
% http://d-kitamura.net/demo-ILRMA_en.html %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear;
close all;
% Set parameters
seed = 1; % pseudo random seed
refMic = 1; % reference microphone for back projection
resampFreq = 16000; % resampling frequency [Hz]
nSrc = 2; % number of sources
fftSize = 4096; % window length in STFT [points]
shiftSize = 2048; % shift length in STFT [points]
windowType = "hamming"; % window function used in STFT
nBases = 10; % number of bases (for ilrmaType=1, nBases is # of bases for "each" source. for ilrmaType=2, nBases is # of bases for "all" sources)
nIter = 100; % number of iterations (define by checking convergence behavior with drawConv=true)
ilrmaType = 1; % 1 or 2 (1: ILRMA w/o partitioning function, 2: ILRMA with partitioning function)
dofParam = 1; % degree-of-freedom parameter of Student's t distribution (positive value, for t-ILRMA, 1: Cauchy)
sigDom = 2; % domain of signal for low-rank source model (positive value, for t-ILRMA, 2: power, 1: amplitude)
applyNormalize = 1; % 0 or 1 or 2 (0: do not apply normalization in each iteration, 1: apply average-power-based normalization in each iteration to improve numerical stability (the monotonic decrease of the cost function may be lost), 2: apply back projection in each iteration)
applyWhitening = false; % true or false (true: apply whitening to the observed multichannel spectrograms)
drawConv = true; % true or false (true: plot cost function values in each iteration and show convergence behavior, false: faster and do not plot cost function values)
% Fix random seed
RandStream.setGlobalStream(RandStream('mt19937ar','Seed',seed))
% Input data and resample
[srcSig(:,:,1), sampFreq] = audioread('./input/drums.wav'); % signal x channel x source (source image)
[srcSig(:,:,2), sampFreq] = audioread('./input/piano.wav'); % signal x channel x source (source image)
srcSigResample(:,:,1) = resample(srcSig(:,:,1), resampFreq, sampFreq, 100); % resampling for reducing computational cost
srcSigResample(:,:,2) = resample(srcSig(:,:,2), resampFreq, sampFreq, 100); % resampling for reducing computational cost
% Mix source images of each channel to produce observed mixture signal
mixSig(:,1) = srcSigResample(:,1,1) + srcSigResample(:,1,2);
mixSig(:,2) = srcSigResample(:,2,1) + srcSigResample(:,2,2);
if abs(max(max(mixSig))) > 1 % check clipping
error('Cliping detected while mixing.\n');
end
% Blind source separation based on ILRMA
[estSig, cost] = ILRMA(mixSig, nSrc, resampFreq, nBases, fftSize, shiftSize, windowType, nIter, ilrmaType, refMic, applyNormalize, applyWhitening, drawConv);
% Blind source separation based on t-ILRMA
% [estSig, cost] = tILRMA(mixSig, nSrc, resampFreq, nBases, dofParam, sigDom, fftSize, shiftSize, windowType, nIter, refMic, applyNormalize, applyWhitening, drawConv);
% Blind source separation based on consistent ILRMA
% [estSig, cost] = consistentILRMA(mixSig, nSrc, resampFreq, nBases, fftSize, shiftSize, windowType, nIter, refMic, applyWhitening, drawConv);
% Blind source separation based on ILRMA-ISS
% [estSig, cost] = ILRMAISS(mixSig, nSrc, resampFreq, nBases, fftSize, shiftSize, windowType, nIter, ilrmaType, refMic, applyNormalize, applyWhitening, drawConv);
% Output separated signals
outputDir = sprintf('./output');
if ~isdir( outputDir )
mkdir( outputDir );
end
audiowrite(sprintf('%s/observedMixture.wav', outputDir), mixSig, resampFreq); % observed signal
audiowrite(sprintf('%s/originalSource1.wav', outputDir), srcSigResample(:,refMic,1), resampFreq); % source signal 1
audiowrite(sprintf('%s/originalSource2.wav', outputDir), srcSigResample(:,refMic,2), resampFreq); % source signal 2
audiowrite(sprintf('%s/estimatedSignal1.wav', outputDir), estSig(:,1), resampFreq); % estimated signal 1
audiowrite(sprintf('%s/estimatedSignal2.wav', outputDir), estSig(:,2), resampFreq); % estimated signal 2
fprintf('The files are saved in "./output".\n');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%