-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
76 lines (70 loc) · 3.36 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
import cPickle
import numpy as np
from scipy.io.wavfile import read
from sklearn.mixture import GaussianMixture as GMM
from sklearn import preprocessing
import warnings
warnings.filterwarnings("ignore")
node = True
import os
import python_speech_features as mfcc
modelsPath = "models/" #path to our model file
source = 'trainingData/' #Path of the audio files we will use to create the model
def calculate(array): #I don't need to explain more basic array and things
rows,cols = array.shape
deltas = np.zeros((rows,20))
N = 2
for i in range(rows):
index = []
j = 1
while j <= N:
if i-j < 0:
first =0
else:
first = i-j
if i+j > rows-1:
second = rows-1
else:
second = i+j
index.append((second,first))
j+=1
deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
return deltas
def extract(audio,rate): #Our function to extract the attribute of the audio.
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True) #We called our mfcc function from the #python_speech_features module. And we added diagnostic features
mfcc_feature = preprocessing.scale(mfcc_feature) #preprocessing The package contains several common helper functions and substitution of transformer classes for a representative raw feature vectors that are more suitable for prediction.
delta = calculate(mfcc_feature) #calculate_delta We calculate the variable we specified with mfcc.
combined = np.hstack((mfcc_feature,delta)) #Sort arrays horizontally (as columns).
return combined
# Extraction features for each speaker
features = np.asarray(()) #we created Array
sourceFolder = [os.path.join(name)
for name in os.listdir(source)] #We got the folders in the TrainingData folder.
print("Source Folders: ",sourceFolder)
sources = [] #create a new list. We will take the .wav files in the folders in the training data folder into this list.
for x in sourceFolder:
for name in os.listdir(source + x): #TrainingData/x where x is the folder in it. This function will work for each folder.
if name.endswith('.wav'): #If it is a wav file in TrainingData/x;
nn = "{}".format(x)+"/"+"{}".format(name) #Path
sources.append(nn) #Adding it to our list.
for path in sources:
path = path.strip()
print(path)
# Read the voice
sr,audio = read(source + path)
print(source + path)
# Let's explain the 40-dimensional MFCC and delta MFCC properties
vector = extract(audio,sr)
if features.size == 0: #If we doesn't have any data
features = vector #Features will equal to vector and program ends.
else:
features = np.vstack((features, vector)) #We stack arrays vertically (on a row basis) sequentially.
if node == True:
gmm = GMM(n_components = 16, max_iter = 200, covariance_type='diag',n_init = 3) #We are calling gmm function.
gmm.fit(features)
# We save the models we calculated to the folder
picklefile = path.split("-")[0]+".gmm"
cPickle.dump(gmm,open(modelsPath + picklefile,'w'))
print " >> Modeling complete for file: ",picklefile,' ',"| Data Point = ",features.shape
features = np.asarray(())