Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing of low-sample-rate MP3 files. #196

Merged
merged 4 commits into from
Jan 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions pedalboard/juce_overrides/juce_PatchedMP3AudioFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,18 @@ struct MP3Frame {
return ParseSuccessful::yes;
}

int numSamples() const {
switch (layer) {
case 1:
return 384;
case 3:
if (lsf)
return 576;
default:
return 1152;
}
}

int layer, frameSize, numChannels, single;
int lsf; // 0 = mpeg-1, 1 = mpeg-2/LSF
bool mpeg25; // true = mpeg-2.5, false = mpeg-1/2
Expand Down Expand Up @@ -3282,6 +3294,7 @@ class PatchedMP3Reader : public AudioFormatReader {
usesFloatingPointData = true;
sampleRate = stream.frame.getFrequency();
numChannels = (unsigned int)stream.frame.numChannels;
samplesPerFrame = stream.frame.numSamples();
lengthInSamples = findLength(streamPos);
}
}
Expand All @@ -3295,12 +3308,12 @@ class PatchedMP3Reader : public AudioFormatReader {
}

if (currentPosition != startSampleInFile) {
if (!stream.seek((int)(startSampleInFile / 1152 - 1))) {
if (!stream.seek((int)(startSampleInFile / samplesPerFrame - 1))) {
currentPosition = -1;
createEmptyDecodedData();
} else {
decodedStart = decodedEnd = 0;
const int64 streamPos = stream.currentFrameIndex * 1152;
const int64 streamPos = stream.currentFrameIndex * samplesPerFrame;
int toSkip = (int)(startSampleInFile - streamPos);
jassert(toSkip >= 0);

Expand Down Expand Up @@ -3356,6 +3369,7 @@ class PatchedMP3Reader : public AudioFormatReader {
private:
PatchedMP3Stream stream;
int64 currentPosition;
int samplesPerFrame;
enum { decodedDataSize = 1152 };
float decoded0[decodedDataSize], decoded1[decodedDataSize];
int decodedStart, decodedEnd;
Expand All @@ -3370,6 +3384,12 @@ class PatchedMP3Reader : public AudioFormatReader {
bool readNextBlock() {
for (int attempts = 10; --attempts >= 0;) {
int samplesDone = 0;

if (stream.stream.isExhausted()) {
createEmptyDecodedData();
return true;
}

const int result =
stream.decodeNextBlock(decoded0, decoded1, samplesDone);

Expand Down Expand Up @@ -3426,7 +3446,7 @@ class PatchedMP3Reader : public AudioFormatReader {
}
}

return numFrames * 1152;
return numFrames * samplesPerFrame;
}

JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR(PatchedMP3Reader)
Expand Down
Binary file added tests/audio/correct/sample_mono_22050Hz.mp3
Binary file not shown.
Binary file not shown.
68 changes: 67 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@

EXPECTED_DURATION_SECONDS = 5
EXPECT_LENGTH_TO_BE_EXACT = {"wav", "aiff", "caf", "ogg", "m4a", "mp4"}
MP3_FRAME_LENGTH_SAMPLES = 1152

TEST_AUDIO_FILES = {
22050: glob.glob(os.path.join(os.path.dirname(__file__), "audio", "correct", "*22050*")),
44100: glob.glob(os.path.join(os.path.dirname(__file__), "audio", "correct", "*44100*")),
48000: glob.glob(os.path.join(os.path.dirname(__file__), "audio", "correct", "*48000*")),
}
Expand Down Expand Up @@ -830,7 +832,7 @@ def test_write_empty_file(extension: str, samplerate: float, num_channels: int):
# The built-in JUCE MP3 reader (only used on Linux and Windows)
# reads zero-length MP3 files as having exactly one frame.
if "mp3" in extension and platform.system() != "Darwin":
assert af.frames <= 1152
assert af.frames <= MP3_FRAME_LENGTH_SAMPLES
contents = af.read(af.frames)
np.testing.assert_allclose(np.zeros_like(contents), contents)
else:
Expand Down Expand Up @@ -1068,3 +1070,67 @@ def test_seek_accuracy(quality: int, chunk_duration: int, granularity: int, exte
f" {offset:,}"
),
)


@pytest.mark.parametrize(
"audio_filename,samplerate",
[(a, s) for a, s in FILENAMES_AND_SAMPLERATES if s == 22050 and ".mp3" in a],
)
def test_22050Hz_mono_mp3(audio_filename: str, samplerate: float):
"""
File size estimation was broken for 22kHz mono MP3 files.
This test should catch that kind of problem.
"""
af = pedalboard.io.ReadableAudioFile(audio_filename, cross_platform_formats_only=True)
assert af.duration < 30.5
assert af.samplerate == samplerate
data_read_all_at_once = af.read(af.frames)

chunk_size = MP3_FRAME_LENGTH_SAMPLES
chunks = []
af.seek(0)
while af.tell() < af.frames:
chunks.append(af.read(chunk_size))
data_read_in_chunks = np.concatenate(chunks, axis=1)
np.testing.assert_allclose(data_read_all_at_once, data_read_in_chunks)


@pytest.mark.parametrize("quality", [f"V{x}" for x in range(0, 10)] + [320, 64])
@pytest.mark.parametrize(
"samplerate", [8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000]
)
@pytest.mark.parametrize("num_channels", [1, 2])
def test_mp3_at_all_samplerates(quality: str, samplerate: float, num_channels: int):
secs = 2
# Make an audio signal that is equal parts noise and silence to make sure
# we end up with a mixture of bitrates in the file:
signal = np.concatenate(
[np.random.rand(samplerate * secs) - 0.5, np.zeros(samplerate * secs)]
).astype(np.float32)
if num_channels == 2:
signal = np.stack([signal] * num_channels)
else:
signal = np.expand_dims(signal, 0)

buf = io.BytesIO()
buf.name = "test.mp3"
with pedalboard.io.AudioFile(
buf, "w", samplerate, num_channels=num_channels, quality=quality
) as f:
f.write(signal)

read_buf = io.BytesIO(buf.getvalue())

with pedalboard.io.ReadableAudioFile(read_buf, cross_platform_formats_only=True) as af:
# Allow for up to two MP3 frames of padding:
assert af.frames <= (signal.shape[-1] + MP3_FRAME_LENGTH_SAMPLES * 2)
assert af.frames >= signal.shape[-1]
# MP3 is lossy, so we can't expect the waveforms to be comparable;
# but at least make sure that the first half of the signal is loud
# and the second half is silent:
assert np.amax(np.mean(af.read(samplerate * secs), axis=0)) >= np.amax(
signal[:, : samplerate * secs]
)
# skip a couple MP3 frames:
af.read(MP3_FRAME_LENGTH_SAMPLES * 2)
assert np.amax(np.mean(af.read(samplerate * secs), axis=0)) < 0.01