MLSpeech · iskunk · Jan 17, 2020
diff --git a/README.md b/README.md
@@ -24,9 +24,7 @@ Download the code. The code is based on signal processing package in Python call
 Dependencies:
 Run these lines in a terminal to install everything necessary for feature extraction.
 ```
-sudo apt-get install python-numpy python-scipy python-nose python-pip
-
-sudo pip install scikits.talkbox 
+sudo apt-get install python3-numpy python3-scipy python3-nose
 ```
 Next for the installation of Torch for loading the models run this.
 ```
@@ -37,30 +35,31 @@ cd ~/torch; bash install-deps;
 ./install.sh
 ```
 ```
-luarocks install rnn
+git clone https://github.com/Element-Research/rnn.git old-rnn
+cd old-rnn; luarocks make rocks/rnn-scm-1.rockspec
 ```
-The Estimation model can be downloaded here and because of size constraints the Tracking model can be abtained by download from this link
-[tracking_model.mat] (https://drive.google.com/open?id=0Bxkc5_D0JjpiZWx4eTU1d0hsVXc)
+The Estimation model can be downloaded here and because of size constraints the Tracking model can be obtained by download from this link:
+[tracking_model.mat](https://drive.google.com/open?id=0Bxkc5_D0JjpiZWx4eTU1d0hsVXc)
 
 ## How to use:
 
 For vowel formant estimation, call the main script in a terminal with the following inputs: wav file, formant output filename, and the vowel begin and end times:
 
 ```
-python formants.py data/Example.wav data/ExamplePredictions.csv --begin 1.2 --end 1.3
+python3 formants.py data/Example.wav data/ExamplePredictions.csv --begin 1.2 --end 1.3
 ```
 
 or the vowel begin and end times can be taken from a TextGrid file (here the name of the TextGrid is Example.TextGrid and the vowel is taken from a tier called "VOWEL"):
 
 ```
-python formants.py data/Example.wav data/examplePredictions.csv --textgrid_filename data/Example.TextGrid \
+python3 formants.py data/Example.wav data/examplePredictions.csv --textgrid_filename data/Example.TextGrid \
           --textgrid_tier VOWEL
 ```
 
 For formant tracking, just call the script with the wav file and output filename:
 
 ```
-python formants.py data/Example.wav data/ExamplePredictions.csv
+python3 formants.py data/Example.wav data/ExamplePredictions.csv
 ```
 
 

diff --git a/extract_features.py b/extract_features.py
@@ -9,9 +9,9 @@
 import math
 from scipy.fftpack.realtransforms import dct
 from scipy.signal import lfilter, hamming
-from copy import deepcopy
 from scipy.fftpack import fft, ifft
-from scikits.talkbox.linpred import lpc
+#from scikits.talkbox.linpred import lpc  # obsolete
+from helpers.conch_lpc import lpc
 import shutil
 from helpers.utilities import *
 
@@ -88,9 +88,9 @@ def periodogram(x, nfft=None, fs=1):
 
     pxx = np.abs(fft(x, nfft)) ** 2
     if nfft % 2 == 0:
-        pn = nfft / 2 + 1
+        pn = nfft // 2 + 1
     else:
-        pn = (nfft + 1 )/ 2
+        pn = (nfft + 1) // 2
 
     fgrid = np.linspace(0, fs * 0.5, pn)
     return pxx[:pn] / (n * fs), fgrid
@@ -137,9 +137,9 @@ def arspec(x, order, nfft=None, fs=1):
 
     # This is not enough to deal correctly with even/odd size
     if nfft % 2 == 0:
-        pn = nfft / 2 + 1
+        pn = nfft // 2 + 1
     else:
-        pn = (nfft + 1 )/ 2
+        pn = (nfft + 1) // 2
 
     px = 1 / np.fft.fft(a, nfft)[:pn]
     pxx = np.real(np.conj(px) * px)
@@ -200,7 +200,6 @@ def preemp(input, p):
 
 
 def arspecs(input_wav,order,Atal=False):
-    epsilon = 0.0000000001
     data = input_wav
     if Atal:
         ar = atal(data, order, 30)
@@ -211,8 +210,10 @@ def arspecs(input_wav,order,Atal=False):
         for k, l in zip(ars[0], ars[1]):
             ar.append(math.log(math.sqrt((k**2)+(l**2))))
         for val in range(0,len(ar)):
-            if ar[val] == 0.0:
-                ar[val] = deepcopy(epsilon)
+            if ar[val] < 0.0:
+                ar[val] = np.nan
+            elif ar[val] == 0.0:
+                ar[val] = epsilon
         mspec1 = np.log10(ar)
         # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
         ar = dct(mspec1, type=2, norm='ortho', axis=-1)
@@ -221,10 +222,10 @@ def arspecs(input_wav,order,Atal=False):
 
 def specPS(input_wav,pitch):
         N = len(input_wav)
-        samps = N/pitch
+        samps = N // pitch
         if samps == 0:
             samps = 1
-        frames = N/samps
+        frames = N // samps
         data = input_wav[0:frames]
         specs = periodogram(data,nfft=4096)
         for i in range(1,int(samps)):
@@ -236,10 +237,11 @@ def specPS(input_wav,pitch):
             specs[0][s] /= float(samps)
         peri = []
         for k, l in zip(specs[0], specs[1]):
-            if k == 0 and l == 0:
-                peri.append(epsilon)
-            else:
-                peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
+            m = math.sqrt((k ** 2) + (l ** 2))
+            if m > 0: m = math.log(m)
+            if m == 0: m = epsilon
+            elif m < 0: m = np.nan
+            peri.append(m)
         # Filter the spectrum through the triangle filterbank
         mspec = np.log10(peri)
         # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)

diff --git a/formants.py b/formants.py
@@ -9,19 +9,19 @@
 
 def predict_from_times(wav_filename, preds_filename, begin, end):
     tmp_features_filename = tempfile._get_default_tempdir() + "/" + next(tempfile._get_candidate_names()) + ".txt"
-    print tmp_features_filename
+    print(tmp_features_filename)
 
     if begin > 0.0 or end > 0.0:
         features.create_features(wav_filename, tmp_features_filename, begin, end)
-        easy_call("th load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
+        easy_call("luajit load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
     else:
         features.create_features(wav_filename, tmp_features_filename)
-        easy_call("th load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
+        easy_call("luajit load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
 
 
 def predict_from_textgrid(wav_filename, preds_filename, textgrid_filename, textgrid_tier):
 
-    print wav_filename
+    print(wav_filename)
 
     if os.path.exists(preds_filename):
         os.remove(preds_filename)

diff --git a/formants.sh b/formants.sh
@@ -4,12 +4,12 @@ if [ $# -eq 2 ]
 then
     tempfile=`mktemp -t txt`
     python extract_features.py $1 $tempfile
-    th load_estimation_model.lua $tempfile $2
+    luajit load_estimation_model.lua $tempfile $2
 elif [ $# -eq 4 ]
 then
     tempfile=`mktemp -t txt`
     python extract_features.py $1 $tempfile --begin $3 --end $4
-    th load_estimation_model.lua $tempfile $2
+    luajit load_estimation_model.lua $tempfile $2
 else
     echo "$0 wav_filename pred_csv_filename [begin_time end_time]"
 fi