Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Audio sampling rate fix #210

Merged
merged 5 commits into from
Dec 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pliers/converters/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@ class VideoToAudioConverter(Converter):
VERSION = '1.0'

def _convert(self, video):
return AudioStim(clip=video.clip.audio, onset=video.onset)
fps = AudioStim.get_sampling_rate(video.filename)
return AudioStim(sampling_rate=fps,
clip=video.clip.audio,
onset=video.onset)
51 changes: 47 additions & 4 deletions pliers/stimuli/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from .base import Stim
from moviepy.audio.io.AudioFileClip import AudioFileClip

import os
import re
import subprocess


class AudioStim(Stim):

Expand All @@ -18,14 +22,17 @@ class AudioStim(Stim):

_default_file_extension = '.wav'

def __init__(self, filename=None, onset=None, sampling_rate=44100, url=None, clip=None):
def __init__(self, filename=None, onset=None, sampling_rate=None, url=None, clip=None):
if url is not None:
filename = url
self.filename = filename

self.sampling_rate = sampling_rate
self.clip = clip
if not self.sampling_rate:
self.sampling_rate = self.get_sampling_rate(self.filename)

if self.clip is None:
self.clip = clip
if not self.clip:
self._load_clip()

# Small default buffer isn't ideal, but moviepy has persistent issues
Expand All @@ -41,6 +48,42 @@ def __init__(self, filename=None, onset=None, sampling_rate=44100, url=None, cli
super(AudioStim, self).__init__(
filename, onset=onset, duration=duration)

@staticmethod
def get_sampling_rate(filename):
''' Use FFMPEG to get the sampling rate, most of this code was
adapted from the moviepy codebase '''
cmd = ['ffmpeg', '-i', filename]

with open(os.devnull, 'rb') as devnull:
creationflags = 0x08000000 if os.name == 'nt' else 0
p = subprocess.Popen(cmd,
stdin=devnull,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
creationflags=creationflags)

_, p_err = p.communicate()
del p

lines = p_err.decode('utf8').splitlines()
if 'No such file or directory' in lines[-1]:
raise IOError(('Error: the file %s could not be found.\n'
'Please check that you entered the correct '
'path.') % filename)

lines_audio = [l for l in lines if ' Audio: ' in l]

if lines_audio:
line = lines_audio[0]
try:
match = re.search(' [0-9]* Hz', line)
return int(line[match.start()+1:match.end()-3])
except:
pass

# Return a sensible default
return 44100

def _load_clip(self):
self.clip = AudioFileClip(self.filename, fps=self.sampling_rate)

Expand All @@ -54,4 +97,4 @@ def __setstate__(self, d):
self._load_clip()

def save(self, path):
self.clip.write_audiofile(path)
self.clip.write_audiofile(path, fps=self.sampling_rate)
4 changes: 3 additions & 1 deletion pliers/stimuli/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from math import ceil
from moviepy.video.io.VideoFileClip import VideoFileClip
from .base import Stim
from .audio import AudioStim
from .image import ImageStim


Expand Down Expand Up @@ -68,7 +69,8 @@ def __init__(self, filename=None, frame_index=None, onset=None, url=None):
duration=duration)

def _load_clip(self):
self.clip = VideoFileClip(self.filename)
audio_fps = AudioStim.get_sampling_rate(self.filename)
self.clip = VideoFileClip(self.filename, audio_fps=audio_fps)

def __iter__(self):
""" Frame iteration. """
Expand Down
10 changes: 10 additions & 0 deletions pliers/tests/converters/test_video_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,14 @@ def test_video_to_audio_converter():
assert audio.history.source_class == 'VideoStim'
assert audio.history.source_file == filename
assert audio.onset == 4.2
assert audio.sampling_rate == 48000
assert np.isclose(video.duration, audio.duration, 1e-2)

filename = join(VIDEO_DIR, 'obama_speech.mp4')
video = VideoStim(filename, onset=1.0)
audio = conv.transform(video)
assert audio.history.source_class == 'VideoStim'
assert audio.history.source_file == filename
assert audio.onset == 1.0
assert audio.sampling_rate == 24000
assert np.isclose(video.duration, audio.duration, 1e-2)
Binary file removed pliers/tests/data/video/obama_speech.wav
Binary file not shown.
Binary file removed pliers/tests/data/video/small.wav
Binary file not shown.
142 changes: 71 additions & 71 deletions pliers/tests/extractors/test_audio_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,163 +47,163 @@ def test_spectral_extractors():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = SpectralCentroidExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 3)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['spectral_centroid'][0], 817.53095)
assert df.shape == (1221, 3)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['spectral_centroid'][0], 1144.98145)

ext2 = SpectralCentroidExtractor(n_fft=1024, hop_length=256)
df = ext2.transform(audio).to_df()
assert df.shape == (9763, 3)
assert np.isclose(df['onset'][1], 0.005805)
assert np.isclose(df['duration'][0], 0.005805)
assert np.isclose(df['spectral_centroid'][0], 1492.00515)
assert df.shape == (2441, 3)
assert np.isclose(df['onset'][1], 0.02322)
assert np.isclose(df['duration'][0], 0.02322)
assert np.isclose(df['spectral_centroid'][0], 866.20176)

ext = SpectralBandwidthExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 3)
assert np.isclose(df['spectral_bandwidth'][0], 1056.66227)
assert df.shape == (1221, 3)
assert np.isclose(df['spectral_bandwidth'][0], 1172.96090)

ext = SpectralContrastExtractor()
ext = SpectralContrastExtractor(fmin=100.0)
df = ext.transform(audio).to_df()
assert df.shape == (4882, 9)
assert np.isclose(df['spectral_contrast_band_4'][0], 25.09001)
assert df.shape == (1221, 9)
assert np.isclose(df['spectral_contrast_band_4'][0], 25.637166)

ext = SpectralRolloffExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 3)
assert np.isclose(df['spectral_rolloff'][0], 1550.39063)
assert df.shape == (1221, 3)
assert np.isclose(df['spectral_rolloff'][0], 2492.46826)


def test_polyfeatures_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = PolyFeaturesExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 4)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['coefficient_0'][0], -7.795e-5)
assert df.shape == (1221, 4)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['coefficient_0'][0], -0.00172077)

ext2 = PolyFeaturesExtractor(order=3)
df = ext2.transform(audio).to_df()
assert df.shape == (4882, 6)
assert np.isclose(df['coefficient_3'][2], 20.77778)
assert df.shape == (1221, 6)
assert np.isclose(df['coefficient_3'][2], 12.32108)


def test_rmse_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"),
onset=1.0)
ext = RMSEExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 3)
assert np.isclose(df['onset'][1], 1.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['rmse'][0], 0.226572)
assert df.shape == (1221, 3)
assert np.isclose(df['onset'][1], 1.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['rmse'][0], 0.25663)

ext2 = RMSEExtractor(frame_length=1024, hop_length=256, center=False)
df = ext2.transform(audio).to_df()
assert df.shape == (9759, 3)
assert np.isclose(df['onset'][1], 1.005805)
assert np.isclose(df['duration'][0], 0.005805)
assert np.isclose(df['rmse'][0], 0.22648)
assert df.shape == (2437, 3)
assert np.isclose(df['onset'][1], 1.02322)
assert np.isclose(df['duration'][0], 0.02322)
assert np.isclose(df['rmse'][0], 0.25649)


def test_zcr_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"),
onset=2.0)
ext = ZeroCrossingRateExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 3)
assert np.isclose(df['onset'][1], 2.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['zero_crossing_rate'][0], 0.0234375)
assert df.shape == (1221, 3)
assert np.isclose(df['onset'][1], 2.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['zero_crossing_rate'][0], 0.069824)

ext2 = ZeroCrossingRateExtractor(frame_length=1024, hop_length=256,
center=False, pad=True)
df = ext2.transform(audio).to_df()
assert df.shape == (9759, 3)
assert np.isclose(df['onset'][1], 2.005805)
assert np.isclose(df['duration'][0], 0.005805)
assert np.isclose(df['zero_crossing_rate'][0], 0.047852)
assert df.shape == (2437, 3)
assert np.isclose(df['onset'][1], 2.02322)
assert np.isclose(df['duration'][0], 0.02322)
assert np.isclose(df['zero_crossing_rate'][0], 0.140625)


def test_chroma_extractors():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = ChromaSTFTExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 14)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['chroma_2'][0], 0.417595)
assert df.shape == (1221, 14)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['chroma_2'][0], 0.53129)

ext2 = ChromaSTFTExtractor(n_chroma=6, n_fft=1024, hop_length=256)
df = ext2.transform(audio).to_df()
assert df.shape == (9763, 8)
assert np.isclose(df['onset'][1], 0.005805)
assert np.isclose(df['duration'][0], 0.005805)
assert np.isclose(df['chroma_5'][0], 0.732480)
assert df.shape == (2441, 8)
assert np.isclose(df['onset'][1], 0.02322)
assert np.isclose(df['duration'][0], 0.02322)
assert np.isclose(df['chroma_5'][0], 0.86870)

ext = ChromaCQTExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 14)
assert np.isclose(df['chroma_cqt_2'][0], 0.286443)
assert df.shape == (1221, 14)
assert np.isclose(df['chroma_cqt_2'][0], 0.355324)

ext = ChromaCENSExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 14)
assert np.isclose(df['chroma_cens_2'][0], 0.217814)
assert df.shape == (1221, 14)
assert np.isclose(df['chroma_cens_2'][0], 0.137765)


def test_melspectrogram_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = MelspectrogramExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 130)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['mel_3'][0], 0.553125)
assert df.shape == (1221, 130)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['mel_3'][0], 0.82194)

ext2 = MelspectrogramExtractor(n_mels=15)
df = ext2.transform(audio).to_df()
assert df.shape == (4882, 17)
assert np.isclose(df['mel_4'][2], 3.24429)
assert df.shape == (1221, 17)
assert np.isclose(df['mel_4'][2], 7.40387)


def test_mfcc_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = MFCCExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 22)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['mfcc_3'][0], 5.98247)
assert df.shape == (1221, 22)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['mfcc_3'][0], 20.84870)

ext2 = MFCCExtractor(n_mfcc=15)
df = ext2.transform(audio).to_df()
assert df.shape == (4882, 17)
assert np.isclose(df['mfcc_14'][2], -7.41533)
assert df.shape == (1221, 17)
assert np.isclose(df['mfcc_14'][2], -22.39406)


def test_tonnetz_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = TonnetzExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 8)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['tonal_centroid_0'][0], -0.0264436)
assert df.shape == (1221, 8)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['tonal_centroid_0'][0], -0.031784)


def test_tempogram_extractor():
audio = AudioStim(join(AUDIO_DIR, "barber.wav"))
ext = TempogramExtractor()
df = ext.transform(audio).to_df()
assert df.shape == (4882, 386)
assert np.isclose(df['onset'][1], 0.01161)
assert np.isclose(df['duration'][0], 0.01161)
assert np.isclose(df['tempo_1'][0], 0.773760)
assert df.shape == (1221, 386)
assert np.isclose(df['onset'][1], 0.04644)
assert np.isclose(df['duration'][0], 0.04644)
assert np.isclose(df['tempo_1'][0], 0.75708)

ext2 = TempogramExtractor(win_length=300)
df = ext2.transform(audio).to_df()
assert df.shape == (4882, 302)
assert np.isclose(df['tempo_1'][2], 0.756967)
assert df.shape == (1221, 302)
assert np.isclose(df['tempo_1'][2], 0.74917)
8 changes: 6 additions & 2 deletions pliers/tests/test_stims.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,16 @@ def test_video_stim():
assert f3.data.shape == (240, 320, 3)


def test_audio_stim(dummy_iter_extractor):
def test_audio_stim():
audio_dir = join(get_test_data_path(), 'audio')
stim = AudioStim(join(audio_dir, 'barber.wav'), sampling_rate=11025)
stim = AudioStim(join(audio_dir, 'barber.wav'))
assert round(stim.duration) == 57
assert stim.sampling_rate == 11025

stim = AudioStim(join(audio_dir, 'homer.wav'))
assert round(stim.duration) == 3
assert stim.sampling_rate == 11025


def test_audio_formats():
audio_dir = join(get_test_data_path(), 'audio')
Expand Down