From b136993969f60af503c10fd477aafad97f153b6e Mon Sep 17 00:00:00 2001 From: MIRIMIRIM <59959583+MIRIMIRIM@users.noreply.github.com> Date: Wed, 3 Aug 2022 13:20:10 +0800 Subject: [PATCH] Spectrum display improvement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://github.com/TypesettingTools/Aegisub/pull/94 – Selection of different frequency mappings for the spectrum display, from linear (current default) to logarithmic. – Field added in the preference page to select the frequency mapping – New preference key to remember this setting: Audio/Renderer/Spectrum/FreqCurve – Consistent display with high sampling rates (> 48 kHz) – Fixed time shift with high quality settings Co-Authored-By: EleonoreMizo <9513144+EleonoreMizo@users.noreply.github.com> --- src/audio_display.cpp | 10 ++ src/audio_renderer_spectrum.cpp | 157 +++++++++++++++++++++------ src/audio_renderer_spectrum.h | 33 ++++++ src/libresrc/default_config.json | 3 +- src/libresrc/osx/default_config.json | 3 +- src/preferences.cpp | 4 + 6 files changed, 172 insertions(+), 38 deletions(-) diff --git a/src/audio_display.cpp b/src/audio_display.cpp index cdc942692e..fd7db878db 100644 --- a/src/audio_display.cpp +++ b/src/audio_display.cpp @@ -767,6 +767,15 @@ void AudioDisplay::ReloadRenderingSettings() spectrum_width[spectrum_quality], spectrum_distance[spectrum_quality]); + // Frequency curve + int64_t spectrum_freq_curve = OPT_GET("Audio/Renderer/Spectrum/FreqCurve")->GetInt(); + spectrum_freq_curve = mid(0, spectrum_freq_curve, 4); + const float spectrum_fref_pos[] = { 0.001f, 0.125f, 0.333f, 0.425f, 0.999f }; + + audio_spectrum_renderer->set_reference_frequency_position( + spectrum_fref_pos[spectrum_freq_curve] + ); + audio_renderer_provider = std::move(audio_spectrum_renderer); } else @@ -1236,6 +1245,7 @@ void AudioDisplay::OnAudioOpen(agi::AudioProvider *provider) OPT_SUB("Colour/Audio Display/Spectrum", &AudioDisplay::ReloadRenderingSettings, this), OPT_SUB("Colour/Audio Display/Waveform", &AudioDisplay::ReloadRenderingSettings, this), OPT_SUB("Audio/Renderer/Spectrum/Quality", &AudioDisplay::ReloadRenderingSettings, this), + OPT_SUB("Audio/Renderer/Spectrum/FreqCurve", &AudioDisplay::ReloadRenderingSettings, this), }); OnTimingController(); } diff --git a/src/audio_renderer_spectrum.cpp b/src/audio_renderer_spectrum.cpp index 563ffadc3d..d5d2347bc2 100644 --- a/src/audio_renderer_spectrum.cpp +++ b/src/audio_renderer_spectrum.cpp @@ -143,20 +143,28 @@ void AudioSpectrumRenderer::OnSetProvider() void AudioSpectrumRenderer::SetResolution(size_t _derivation_size, size_t _derivation_dist) { - if (derivation_dist != _derivation_dist) + if (derivation_dist_user != _derivation_dist) { - derivation_dist = _derivation_dist; - if (cache) - cache->Age(0); + derivation_dist_user = _derivation_dist; + update_derivation_values(); + AgeCache(0); } - if (derivation_size != _derivation_size) + if (derivation_size_user != _derivation_size) { - derivation_size = _derivation_size; + derivation_size_user = _derivation_size; RecreateCache(); } } +void AudioSpectrumRenderer::set_reference_frequency_position(float pos_fref_) +{ + assert(pos_fref_ > 0.f); + assert(pos_fref_ < 1.f); + + pos_fref = pos_fref_; +} + template void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) { for (size_t si = 0; si < count; ++si) @@ -165,6 +173,32 @@ void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) { } } +void AudioSpectrumRenderer::update_derivation_values() +{ + // Below this sampling rate (Hz), the derivation values are identical to + // the user-provided ones. Otherwise, they are scaled according to the + // ratio between the sampling rates. + // The threshold is set at 50 kHz so with standard rates like 48 kHz, + // the values are kept identical, and scaled with higher standard rates + // like 88.2 or 96 kHz. + constexpr float sample_rate_ref = 50000.f; + + derivation_dist = derivation_dist_user; + derivation_size = derivation_size_user; + + if (provider != nullptr) + { + const int sample_rate = provider->GetSampleRate(); + float mult = float(sample_rate) / sample_rate_ref; + while (mult > 1) + { + ++derivation_dist; + ++derivation_size; + mult *= 0.5f; + } + } +} + void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) { assert(cache); @@ -173,12 +207,19 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) int64_t first_sample = (((int64_t)block_index) << derivation_dist) - ((int64_t)1 << derivation_size); provider->GetInt16MonoAudio(audio_scratch.data(), first_sample, 2 << derivation_size); + // Because the FFTs used here are unnormalized DFTs, we have to compensate + // the possible length difference between derivation_size used in the + // calculations and its user-provided counterpart. Thus, the display is + // kept independent of the sampling rate. + const float scale_fix = + 1.f / sqrtf(float(1 << (derivation_size - derivation_size_user))); + #ifdef WITH_FFTW3 ConvertToFloat(2 << derivation_size, dft_input); fftw_execute(dft_plan); - double scale_factor = 9 / sqrt(2 << (derivation_size + 1)); + double scale_factor = scale_fix * 9 / sqrt(2 << (derivation_size + 1)); fftw_complex *o = dft_output; for (size_t si = (size_t)1< 0; --si) @@ -196,7 +237,7 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) FFT fft; fft.Transform(2< 0; --si) { @@ -211,6 +252,10 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block) void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle style) { + // Misc. utility functions + auto floor_int = [](float val) { return int(floorf(val)); }; + auto round_int = [](float val) { return int(floorf(val + 0.5f)); }; + if (!cache) return; @@ -229,9 +274,34 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle const AudioColorScheme *pal = &colors[style]; - /// @todo Make minband and maxband configurable - int minband = 0; - int maxband = 1 << derivation_size; + // Sampling rate, in Hz. + const float sample_rate = float(provider->GetSampleRate()); + + // Number of FFT bins, excluding the "Nyquist" one + const int nbr_bins = 1 << derivation_size; + + // minband and maxband define an half-open range. + int minband = 1; // Starts at 1, we don't care about showing the DC. + int maxband = std::min( + round_int(nbr_bins * max_freq / (sample_rate * 0.5f)), + nbr_bins + ); + assert(minband < maxband); + + // Precomputes this once, this will be useful for the log curve. + const float scale_log = logf(maxband / minband); + + // Turns the user-specified 1 kHz position into a ratio between the linear + // and logarithmic curves that we can directly use in the following + // calculations. + assert(pos_fref > 0); + assert(pos_fref < 1); + float b_fref = nbr_bins * freq_ref / (sample_rate * 0.5f); + b_fref = mid(1.f, b_fref, float(maxband - 1)); + const float clin = minband + (maxband - minband) * pos_fref; + const float clog = minband * expf(pos_fref * scale_log); + float log_ratio_calc = (b_fref - clin) / (clog - clin); + log_ratio_calc = mid(0.f, log_ratio_calc, 1.f); // ax = absolute x, absolute to the virtual spectrum bitmap for (int ax = start; ax < end; ++ax) @@ -243,36 +313,51 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle // Prepare bitmap writing unsigned char *px = imgdata + (imgheight-1) * stride + (ax - start) * 3; - // Scale up or down vertically? - if (imgheight > 1<= imgdata); - assert(px < imgdata + imgheight*stride); - auto ideal = (double)(y+1.)/imgheight * (maxband-minband) + minband; - float sample1 = power[(int)floor(ideal)+minband]; - float sample2 = power[(int)ceil(ideal)+minband]; - float frac = ideal - floor(ideal); - float val = (1-frac)*sample1 + frac*sample2; - pal->map(val*amplitude_scale, px); - px -= stride; + // Bin index is an interpolation between the linear and log curves. + const float pos_rel = float(y + 1) / float(imgheight); + const float b_lin = minband + pos_rel * (maxband - minband); + const float b_log = minband * expf(pos_rel * scale_log); + bin_nxt = b_lin + log_ratio_calc * (b_log - b_lin); } - } - else - { - // Pick greatest - for (int y = 0; y < imgheight; ++y) + + float val = 0; + + // Interpolate between consecutive bins + if (bin_nxt - bin_prv < 2) { - assert(px >= imgdata); - assert(px < imgdata + imgheight*stride); - int sample1 = std::max(0, maxband * y/imgheight + minband); - int sample2 = std::min((1<map(maxval*amplitude_scale, px); - px -= stride; + const int bin_0 = floor_int(bin_cur); + const int bin_1 = std::min(bin_0 + 1, nbr_bins - 1); + const float frac = bin_cur - float(bin_0); + const float v0 = power[bin_0]; + const float v1 = power[bin_1]; + val = v0 + frac * (v1 - v0); } + + // Pick the greatest bin on the interval + else + { + int bin_inf = floor_int((bin_prv + bin_cur) * 0.5f); + int bin_sup = floor_int((bin_cur + bin_nxt) * 0.5f); + bin_inf = std::min(bin_inf, nbr_bins - 2); + bin_sup = std::min(bin_sup, nbr_bins - 1); + assert(bin_inf < bin_sup); + val = *std::max_element(&power[bin_inf], &power[bin_sup]); + } + + pal->map(val * amplitude_scale, px); + + px -= stride; + bin_prv = bin_cur; + bin_cur = bin_nxt; } } diff --git a/src/audio_renderer_spectrum.h b/src/audio_renderer_spectrum.h index d4641f37de..3bc9847770 100644 --- a/src/audio_renderer_spectrum.h +++ b/src/audio_renderer_spectrum.h @@ -61,10 +61,34 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider { /// Colour tables used for rendering std::vector colors; + /// User-provided value for derivation_size + size_t derivation_size_user = 0; + + /// User-provided value for derivation_dist + size_t derivation_dist_user = 0; + + /// Maximum audible, displayed frequency. Avoids wasting the display space + /// with ultrasonic content at sampling rates > 40 kHz. + float max_freq = 20000.f; + + /// Relative vertical position of the 1 kHz frequency, in (0 ; 1) open range + /// 0 = bottom of the display zone, 1 = top + /// The actual position, as displayed, is limited by the available mapping + /// curves (linear and log). + /// Values close to 0 will give a linear curve, and close to 1 a log curve. + float pos_fref = 1.0f / 3; + + /// Reference frequency which vertical position is constant, Hz. + const float freq_ref = 1000.0f; + /// Binary logarithm of number of samples to use in deriving frequency-power data + /// This could differ from the user-provided value because the actual value + /// used in computations may be scaled, depending on the sampling rate. size_t derivation_size = 0; /// Binary logarithm of number of samples between the start of derivations + /// This could differ from the user-provided value because the actual value + /// used in computations may be scaled, depending on the sampling rate. size_t derivation_dist = 0; /// @brief Reset in response to changing audio provider @@ -90,6 +114,9 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider { template void ConvertToFloat(size_t count, T *dest); + /// @brief Updates the derivation_* after a derivation_*_user change. + void update_derivation_values(); + #ifdef WITH_FFTW3 /// FFTW plan data fftw_plan dft_plan = nullptr; @@ -133,6 +160,12 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider { /// is specified too large, it will be clamped to the size. void SetResolution(size_t derivation_size, size_t derivation_dist); + /// @brief Set the vertical relative position of the reference frequency (1 kHz) + /// @param fref_pos_ Vertical position of the 1 kHz frequency. Between 0 and 1, boundaries excluded. + /// + /// A value close to 0 gives a linear display, and close to 1 a logarithmic display. + void set_reference_frequency_position(float pos_fref_); + /// @brief Cleans up the cache /// @param max_size Maximum size in bytes for the cache void AgeCache(size_t max_size) override; diff --git a/src/libresrc/default_config.json b/src/libresrc/default_config.json index 8a648fa47c..3ea90c8de6 100644 --- a/src/libresrc/default_config.json +++ b/src/libresrc/default_config.json @@ -71,7 +71,8 @@ "Spectrum" : { "Cutoff" : 0, "Memory Max" : 128, - "Quality" : 1 + "Quality" : 1, + "FreqCurve" : 0 } }, "Snap" : { diff --git a/src/libresrc/osx/default_config.json b/src/libresrc/osx/default_config.json index 82223cf484..4d55817884 100644 --- a/src/libresrc/osx/default_config.json +++ b/src/libresrc/osx/default_config.json @@ -71,7 +71,8 @@ "Spectrum" : { "Cutoff" : 0, "Memory Max" : 128, - "Quality" : 1 + "Quality" : 1, + "FreqCurve" : 0 } }, "Snap" : { diff --git a/src/preferences.cpp b/src/preferences.cpp index 1a315ed50a..79b5b2b932 100644 --- a/src/preferences.cpp +++ b/src/preferences.cpp @@ -394,6 +394,10 @@ void Advanced_Audio(wxTreebook *book, Preferences *parent) { wxArrayString sq_choice(4, sq_arr); p->OptionChoice(spectrum, _("Quality"), sq_choice, "Audio/Renderer/Spectrum/Quality"); + const wxString sc_arr[5] = { _("Linear"), _("Extended"), _("Medium"), _("Compressed"), _("Logarithmic") }; + wxArrayString sc_choice(5, sc_arr); + p->OptionChoice(spectrum, _("Frequency mapping"), sc_choice, "Audio/Renderer/Spectrum/FreqCurve"); + p->OptionAdd(spectrum, _("Cache memory max (MB)"), "Audio/Renderer/Spectrum/Memory Max", 2, 1024); #ifdef WITH_AVISYNTH