feat: add support for STEM file in the engine

This will allow mix to read the multiple audio stereo channel (stem) of the file and mix them together as part of the deck audio processing
mixxxdj · Apr 16, 2024 · 37573c7 · 37573c7
1 parent 4dc066e
commit 37573c7
Show file tree

Hide file tree

Showing 61 changed files with 1,051 additions and 318 deletions.
diff --git a/src/analyzer/analyzersilence.cpp b/src/analyzer/analyzersilence.cpp
@@ -74,10 +74,11 @@ SINT AnalyzerSilence::findLastSoundInChunk(std::span<const CSAMPLE> samples) {
 // static
 bool AnalyzerSilence::verifyFirstSound(
         std::span<const CSAMPLE> samples,
-        mixxx::audio::FramePos firstSoundFrame) {
+        mixxx::audio::FramePos firstSoundFrame,
+        mixxx::audio::ChannelCount channelCount) {
     const SINT firstSoundSample = findFirstSoundInChunk(samples);
     if (firstSoundSample < static_cast<SINT>(samples.size())) {
-        return mixxx::audio::FramePos::fromEngineSamplePos(firstSoundSample)
+        return mixxx::audio::FramePos::fromEngineSamplePos(firstSoundSample, channelCount)
                        .toLowerFrameBoundary() == firstSoundFrame.toLowerFrameBoundary();
     }
     return false;

diff --git a/src/analyzer/analyzersilence.h b/src/analyzer/analyzersilence.h
@@ -46,7 +46,8 @@ class AnalyzerSilence : public Analyzer {
     /// last analysis run and is an indicator for file edits or decoder
     /// changes/issues
     static bool verifyFirstSound(std::span<const CSAMPLE> samples,
-            mixxx::audio::FramePos firstSoundFrame);
+            mixxx::audio::FramePos firstSoundFrame,
+            mixxx::audio::ChannelCount channelCount);
 
   private:
     UserSettingsPointer m_pConfig;

diff --git a/src/analyzer/constants.h b/src/analyzer/constants.h
@@ -8,7 +8,7 @@ namespace mixxx {
 // depending on the track length. A block size of 4096 frames per block
 // seems to do fine. Signal processing during analysis uses the same,
 // fixed number of channels like the engine does, usually 2 = stereo.
-constexpr audio::ChannelCount kAnalysisChannels = mixxx::kEngineChannelCount;
+constexpr audio::ChannelCount kAnalysisChannels = mixxx::kEngineChannelOutputCount;
 constexpr SINT kAnalysisFramesPerChunk = 4096;
 constexpr SINT kAnalysisSamplesPerChunk =
         kAnalysisFramesPerChunk * kAnalysisChannels;

diff --git a/src/audio/frame.h b/src/audio/frame.h
@@ -35,16 +35,19 @@ class FramePos final {
     /// Return a `FramePos` from a given engine sample position. To catch
     /// "invalid" positions (e.g. when parsing values from control objects),
     /// use `FramePos::fromEngineSamplePosMaybeInvalid` instead.
-    static constexpr FramePos fromEngineSamplePos(double engineSamplePos) {
-        return FramePos(engineSamplePos / mixxx::kEngineChannelCount);
+    static constexpr FramePos fromEngineSamplePos(double engineSamplePos,
+            mixxx::audio::ChannelCount channelCount =
+                    mixxx::kEngineChannelOutputCount) {
+        return FramePos(engineSamplePos / channelCount);
     }
 
     /// Return an engine sample position. The `FramePos` is expected to be
     /// valid. If invalid positions are possible (e.g. for control object
     /// values), use `FramePos::toEngineSamplePosMaybeInvalid` instead.
-    double toEngineSamplePos() const {
+    double toEngineSamplePos(mixxx::audio::ChannelCount channelCount =
+                                     mixxx::kEngineChannelOutputCount) const {
         DEBUG_ASSERT(isValid());
-        double engineSamplePos = value() * mixxx::kEngineChannelCount;
+        double engineSamplePos = value() * channelCount;
         // In the rare but possible instance that the position is valid but
         // the engine sample position is exactly -1.0, we nudge the position
         // because otherwise fromEngineSamplePosMaybeInvalid() will think
@@ -63,11 +66,14 @@ class FramePos final {
     /// for compatibility with our control objects and legacy parts of the code
     /// base. Using a different code path based on the output of `isValid()` is
     /// preferable.
-    static constexpr FramePos fromEngineSamplePosMaybeInvalid(double engineSamplePos) {
+    static constexpr FramePos fromEngineSamplePosMaybeInvalid(
+            double engineSamplePos,
+            mixxx::audio::ChannelCount channelCount =
+                    mixxx::kEngineChannelOutputCount) {
         if (engineSamplePos == kLegacyInvalidEnginePosition) {
             return {};
         }
-        return fromEngineSamplePos(engineSamplePos);
+        return fromEngineSamplePos(engineSamplePos, channelCount);
     }
 
     /// Return an engine sample position. If the `FramePos` is invalid,
@@ -77,11 +83,13 @@ class FramePos final {
     /// for compatibility with our control objects and legacy parts of the code
     /// base. Using a different code path based on the output of `isValid()` is
     /// preferable.
-    double toEngineSamplePosMaybeInvalid() const {
+    double toEngineSamplePosMaybeInvalid(
+            mixxx::audio::ChannelCount channelCount =
+                    mixxx::kEngineChannelOutputCount) const {
         if (!isValid()) {
             return kLegacyInvalidEnginePosition;
         }
-        return toEngineSamplePos();
+        return toEngineSamplePos(channelCount);
     }
 
     /// Return true if the frame position is valid. Any finite value is

diff --git a/src/audio/types.h b/src/audio/types.h
@@ -80,6 +80,14 @@ class ChannelCount {
         return ChannelCount(valueFromInt(value));
     }
 
+    static ChannelCount fromDouble(double value) {
+        const auto channelCount = ChannelCount(static_cast<value_t>(value));
+        // The channel count should always be an integer value
+        // and this conversion is supposed to be lossless.
+        DEBUG_ASSERT(channelCount.toDouble() == value);
+        return channelCount;
+    }
+
     static constexpr ChannelCount mono() {
         return ChannelCount(static_cast<value_t>(1));
     }
@@ -88,6 +96,10 @@ class ChannelCount {
         return ChannelCount(static_cast<value_t>(2));
     }
 
+    static constexpr ChannelCount stem() {
+        return ChannelCount(static_cast<value_t>(8)); // 4 stereo channels
+    }
+
     explicit constexpr ChannelCount(
             value_t value = kValueDefault)
             : m_value(value) {
@@ -115,6 +127,11 @@ class ChannelCount {
         return value();
     }
 
+    // Helper cast for COs
+    constexpr double toDouble() const {
+        return static_cast<double>(value());
+    }
+
   private:
     value_t m_value;
 };

diff --git a/src/engine/bufferscalers/enginebufferscale.cpp b/src/engine/bufferscalers/enginebufferscale.cpp
@@ -2,11 +2,12 @@
 
 #include "engine/engine.h"
 #include "moc_enginebufferscale.cpp"
+#include "soundio/soundmanagerconfig.h"
 
 EngineBufferScale::EngineBufferScale()
         : m_outputSignal(
                   mixxx::audio::SignalInfo(
-                          mixxx::kEngineChannelCount,
+                          mixxx::kEngineChannelOutputCount,
                           mixxx::audio::SampleRate())),
           m_dBaseRate(1.0),
           m_bSpeedAffectsPitch(false),
@@ -16,12 +17,22 @@ EngineBufferScale::EngineBufferScale()
     DEBUG_ASSERT(!m_outputSignal.isValid());
 }
 
-void EngineBufferScale::setSampleRate(
-        mixxx::audio::SampleRate sampleRate) {
+void EngineBufferScale::setOutputSignal(
+        mixxx::audio::SampleRate sampleRate,
+        mixxx::audio::ChannelCount channelCount) {
     DEBUG_ASSERT(sampleRate.isValid());
+    DEBUG_ASSERT(channelCount.isValid());
+    bool changed = false;
     if (sampleRate != m_outputSignal.getSampleRate()) {
         m_outputSignal.setSampleRate(sampleRate);
-        onSampleRateChanged();
+        changed = true;
+    }
+    if (channelCount != m_outputSignal.getChannelCount()) {
+        m_outputSignal.setChannelCount(channelCount);
+        changed = true;
+    }
+    if (changed) {
+        onOutputSignalChanged();
     }
     DEBUG_ASSERT(m_outputSignal.isValid());
 }
diff --git a/src/engine/bufferscalers/enginebufferscale.h b/src/engine/bufferscalers/enginebufferscale.h
@@ -42,9 +42,10 @@ class EngineBufferScale : public QObject {
         m_dPitchRatio = *pPitchRatio;
     }
 
-    // Set the desired output sample rate.
-    void setSampleRate(
-            mixxx::audio::SampleRate sampleRate);
+    // Set the desired output signal.
+    void setOutputSignal(
+            mixxx::audio::SampleRate sampleRate,
+            mixxx::audio::ChannelCount channelCout);
 
     const mixxx::audio::SignalInfo& getOutputSignal() const {
         return m_outputSignal;
@@ -66,7 +67,7 @@ class EngineBufferScale : public QObject {
   private:
     mixxx::audio::SignalInfo m_outputSignal;
 
-    virtual void onSampleRateChanged() = 0;
+    virtual void onOutputSignalChanged() = 0;
 
   protected:
     double m_dBaseRate;

diff --git a/src/engine/bufferscalers/enginebufferscalelinear.cpp b/src/engine/bufferscalers/enginebufferscalelinear.cpp
@@ -17,15 +17,19 @@ EngineBufferScaleLinear::EngineBufferScaleLinear(ReadAheadManager *pReadAheadMan
       m_dOldRate(1.0),
       m_dCurrentFrame(0.0),
       m_dNextFrame(0.0) {
-    m_floorSampleOld[0] = 0.0;
-    m_floorSampleOld[1] = 0.0;
+    onOutputSignalChanged();
     SampleUtil::clear(m_bufferInt, kiLinearScaleReadAheadLength);
 }
 
 EngineBufferScaleLinear::~EngineBufferScaleLinear() {
     SampleUtil::free(m_bufferInt);
 }
 
+void EngineBufferScaleLinear::onOutputSignalChanged() {
+    m_floorSampleOld.resize(getOutputSignal().getChannelCount());
+    std::fill(m_floorSampleOld.begin(), m_floorSampleOld.end(), 0.0);
+}
+
 void EngineBufferScaleLinear::setScaleParameters(double base_rate,
                                                  double* pTempoRatio,
                                                  double* pPitchRatio) {
@@ -40,8 +44,7 @@ void EngineBufferScaleLinear::clear() {
     // Clear out buffer and saved sample data
     m_bufferIntSize = 0;
     m_dNextFrame = 0;
-    m_floorSampleOld[0] = 0;
-    m_floorSampleOld[1] = 0;
+    onOutputSignalChanged();
 }
 
 // laurent de soras - punked from musicdsp.org (mad props)
@@ -85,9 +88,11 @@ double EngineBufferScaleLinear::scaleBuffer(
         // reset m_floorSampleOld in a way as we were coming from
         // the other direction
         SINT iNextSample = getOutputSignal().frames2samples(static_cast<SINT>(ceil(m_dNextFrame)));
-        if (iNextSample + 1 < m_bufferIntSize) {
-            m_floorSampleOld[0] = m_bufferInt[iNextSample];
-            m_floorSampleOld[1] = m_bufferInt[iNextSample + 1];
+        int chCount = getOutputSignal().getChannelCount();
+        if (iNextSample + chCount <= m_bufferIntSize) {
+            for (int c = 0; c < chCount; c++) {
+                m_floorSampleOld[c] = m_bufferInt[iNextSample + c];
+            }
         }
 
         // if the buffer has extra samples, do a read so RAMAN ends up back where
@@ -103,7 +108,7 @@ double EngineBufferScaleLinear::scaleBuffer(
             //qDebug() << "extra samples" << extra_samples;
 
             SINT next_samples_read = m_pReadAheadManager->getNextSamples(
-                    rate_add_new, m_bufferInt, extra_samples);
+                    rate_add_new, m_bufferInt, extra_samples, getOutputSignal().getChannelCount());
             frames_read += getOutputSignal().samples2frames(next_samples_read);
         }
         // force a buffer read:
@@ -145,8 +150,10 @@ SINT EngineBufferScaleLinear::do_copy(CSAMPLE* buf, SINT buf_size) {
     // to call getNextSamples until you receive the number of samples you
     // wanted.
     while (samples_needed > 0) {
-        SINT read_size = m_pReadAheadManager->getNextSamples(m_dRate, write_buf,
-                samples_needed);
+        SINT read_size = m_pReadAheadManager->getNextSamples(m_dRate,
+                write_buf,
+                samples_needed,
+                getOutputSignal().getChannelCount());
         if (read_size == 0) {
             if (++read_failed_count > 1) {
                 break;
@@ -168,9 +175,11 @@ SINT EngineBufferScaleLinear::do_copy(CSAMPLE* buf, SINT buf_size) {
     // blow away the fractional sample position here
     m_bufferIntSize = 0; // force buffer read
     m_dNextFrame = 0;
-    if (read_samples > 1) {
-        m_floorSampleOld[0] = buf[read_samples - 2];
-        m_floorSampleOld[1] = buf[read_samples - 1];
+    int chCount = getOutputSignal().getChannelCount();
+    if (read_samples > chCount - 1) {
+        for (int c = 0; c < chCount; c++) {
+            m_floorSampleOld[c] = buf[read_samples - chCount + c];
+        }
     }
     return read_samples;
 }
@@ -219,13 +228,12 @@ double EngineBufferScaleLinear::do_scale(CSAMPLE* buf, SINT buf_size) {
     SINT unscaled_frames_needed = static_cast<SINT>(frames +
             m_dNextFrame - floor(m_dNextFrame));
 
-    CSAMPLE floor_sample[2];
-    CSAMPLE ceil_sample[2];
+    int chCount = getOutputSignal().getChannelCount();
+    std::vector<CSAMPLE> floor_sample(chCount);
+    std::vector<CSAMPLE> ceil_sample(chCount);
 
-    floor_sample[0] = 0;
-    floor_sample[1] = 0;
-    ceil_sample[0] = 0;
-    ceil_sample[1] = 0;
+    std::fill(floor_sample.begin(), floor_sample.end(), 0.0);
+    std::fill(ceil_sample.begin(), ceil_sample.end(), 0.0);
 
     double startFrame = m_dNextFrame;
     SINT i = 0;
@@ -248,27 +256,29 @@ double EngineBufferScaleLinear::do_scale(CSAMPLE* buf, SINT buf_size) {
 
         SINT currentFrameFloor = static_cast<SINT>(floor(m_dCurrentFrame));
 
+        int sampleCount = getOutputSignal().frames2samples(currentFrameFloor);
         if (currentFrameFloor < 0) {
             // we have advanced to a new buffer in the previous run,
             // but the floor still points to the old buffer
             // so take the cached sample, this happens on slow rates
-            floor_sample[0] = m_floorSampleOld[0];
-            floor_sample[1] = m_floorSampleOld[1];
-            ceil_sample[0] = m_bufferInt[0];
-            ceil_sample[1] = m_bufferInt[1];
-        } else if (getOutputSignal().frames2samples(currentFrameFloor) + 3 < m_bufferIntSize) {
+            for (int c = 0; c < chCount; c++) {
+                floor_sample[c] = m_floorSampleOld[c];
+                ceil_sample[c] = m_bufferInt[c];
+            }
+        } else if (sampleCount + 2 * chCount - 1 < m_bufferIntSize) {
             // take floor_sample form the buffer of the previous run
-            floor_sample[0] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor)];
-            floor_sample[1] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 1];
-            ceil_sample[0] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 2];
-            ceil_sample[1] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 3];
+            for (int c = 0; c < chCount; c++) {
+                floor_sample[c] = m_bufferInt[sampleCount + c];
+                ceil_sample[c] = m_bufferInt[sampleCount + chCount + c];
+            }
         } else {
             // if we don't have the ceil_sample in buffer, load some more
 
-            if (getOutputSignal().frames2samples(currentFrameFloor) + 1 < m_bufferIntSize) {
+            if (sampleCount + chCount - 1 < m_bufferIntSize) {
                 // take floor_sample form the buffer of the previous run
-                floor_sample[0] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor)];
-                floor_sample[1] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 1];
+                for (int c = 0; c < chCount; c++) {
+                    floor_sample[c] = m_bufferInt[sampleCount + c];
+                }
             }
 
             do {
@@ -285,7 +295,9 @@ double EngineBufferScaleLinear::do_scale(CSAMPLE* buf, SINT buf_size) {
 
                 m_bufferIntSize = m_pReadAheadManager->getNextSamples(
                         rate_new == 0 ? rate_old : rate_new,
-                        m_bufferInt, samples_to_read);
+                        m_bufferInt,
+                        samples_to_read,
+                        getOutputSignal().getChannelCount());
                 // Note we may get 0 samples once if we just hit a loop trigger,
                 // e.g. when reloop_toggle jumps back to loop_in, or when
                 // moving a loop causes the play position to be moved along.
@@ -297,29 +309,32 @@ double EngineBufferScaleLinear::do_scale(CSAMPLE* buf, SINT buf_size) {
                 startFrame -= oldBufferFrames;
                 currentFrameFloor -= oldBufferFrames;
 
-            } while (getOutputSignal().frames2samples(currentFrameFloor) + 3 >= m_bufferIntSize);
+                sampleCount = getOutputSignal().frames2samples(currentFrameFloor);
+            } while (sampleCount + 2 * chCount - 1 >= m_bufferIntSize);
 
             // Now that the buffer is up to date, we can get the value of the sample
             // at the floor of our position.
             if (currentFrameFloor >= 0) {
                 // the previous position is in the new buffer
-                floor_sample[0] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor)];
-                floor_sample[1] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 1];
+                for (int c = 0; c < chCount; c++) {
+                    floor_sample[c] = m_bufferInt[sampleCount + c];
+                }
+            }
+            for (int c = 0; c < chCount; c++) {
+                ceil_sample[c] = m_bufferInt[sampleCount + chCount + c];
             }
-            ceil_sample[0] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 2];
-            ceil_sample[1] = m_bufferInt[getOutputSignal().frames2samples(currentFrameFloor) + 3];
         }
 
         // For the current index, what percentage is it
         // between the previous and the next?
         CSAMPLE frac = static_cast<CSAMPLE>(m_dCurrentFrame) - currentFrameFloor;
 
         // Perform linear interpolation
-        buf[i] = floor_sample[0] + frac * (ceil_sample[0] - floor_sample[0]);
-        buf[i + 1] = floor_sample[1] + frac * (ceil_sample[1] - floor_sample[1]);
+        for (int c = 0; c < chCount; c++) {
+            buf[i + c] = floor_sample[c] + frac * (ceil_sample[c] - floor_sample[c]);
+        }
 
-        m_floorSampleOld[0] = floor_sample[0];
-        m_floorSampleOld[1] = floor_sample[1];
+        m_floorSampleOld = floor_sample;
 
         // increment the index for the next loop
         m_dNextFrame = m_dCurrentFrame + rate_add;
@@ -328,7 +343,7 @@ double EngineBufferScaleLinear::do_scale(CSAMPLE* buf, SINT buf_size) {
         // samples. This prevents the change from being discontinuous and helps
         // improve sound quality.
         rate_add += rate_delta_abs;
-        i += getOutputSignal().getChannelCount();
+        i += chCount;
     }
 
     SampleUtil::clear(&buf[i], buf_size - i);