From b136993969f60af503c10fd477aafad97f153b6e Mon Sep 17 00:00:00 2001
From: MIRIMIRIM <59959583+MIRIMIRIM@users.noreply.github.com>
Date: Wed, 3 Aug 2022 13:20:10 +0800
Subject: [PATCH] Spectrum display improvement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

https://github.com/TypesettingTools/Aegisub/pull/94

– Selection of different frequency mappings for the spectrum display,
from linear (current default) to logarithmic.
– Field added in the preference page to select the frequency mapping
– New preference key to remember this setting:
Audio/Renderer/Spectrum/FreqCurve
– Consistent display with high sampling rates (> 48 kHz)
– Fixed time shift with high quality settings

Co-Authored-By: EleonoreMizo <9513144+EleonoreMizo@users.noreply.github.com>
---
 src/audio_display.cpp                |  10 ++
 src/audio_renderer_spectrum.cpp      | 157 +++++++++++++++++++++------
 src/audio_renderer_spectrum.h        |  33 ++++++
 src/libresrc/default_config.json     |   3 +-
 src/libresrc/osx/default_config.json |   3 +-
 src/preferences.cpp                  |   4 +
 6 files changed, 172 insertions(+), 38 deletions(-)
diff --git a/src/audio_display.cpp b/src/audio_display.cpp
index cdc942692e..fd7db878db 100644
--- a/src/audio_display.cpp
+++ b/src/audio_display.cpp
@@ -767,6 +767,15 @@ void AudioDisplay::ReloadRenderingSettings()
 			spectrum_width[spectrum_quality],
 			spectrum_distance[spectrum_quality]);
 
+		// Frequency curve
+		int64_t spectrum_freq_curve = OPT_GET("Audio/Renderer/Spectrum/FreqCurve")->GetInt();
+		spectrum_freq_curve = mid<int64_t>(0, spectrum_freq_curve, 4);
+		const float spectrum_fref_pos[] = { 0.001f, 0.125f, 0.333f, 0.425f, 0.999f };
+
+		audio_spectrum_renderer->set_reference_frequency_position(
+			spectrum_fref_pos[spectrum_freq_curve]
+		);
+
 		audio_renderer_provider = std::move(audio_spectrum_renderer);
 	}
 	else
@@ -1236,6 +1245,7 @@ void AudioDisplay::OnAudioOpen(agi::AudioProvider *provider)
 				OPT_SUB("Colour/Audio Display/Spectrum", &AudioDisplay::ReloadRenderingSettings, this),
 				OPT_SUB("Colour/Audio Display/Waveform", &AudioDisplay::ReloadRenderingSettings, this),
 				OPT_SUB("Audio/Renderer/Spectrum/Quality", &AudioDisplay::ReloadRenderingSettings, this),
+				OPT_SUB("Audio/Renderer/Spectrum/FreqCurve", &AudioDisplay::ReloadRenderingSettings, this),
 			});
 			OnTimingController();
 		}
diff --git a/src/audio_renderer_spectrum.cpp b/src/audio_renderer_spectrum.cpp
index 563ffadc3d..d5d2347bc2 100644
--- a/src/audio_renderer_spectrum.cpp
+++ b/src/audio_renderer_spectrum.cpp
@@ -143,20 +143,28 @@ void AudioSpectrumRenderer::OnSetProvider()
 
 void AudioSpectrumRenderer::SetResolution(size_t _derivation_size, size_t _derivation_dist)
 {
-	if (derivation_dist != _derivation_dist)
+	if (derivation_dist_user != _derivation_dist)
 	{
-		derivation_dist = _derivation_dist;
-		if (cache)
-			cache->Age(0);
+		derivation_dist_user = _derivation_dist;
+		update_derivation_values();
+		AgeCache(0);
 	}
 
-	if (derivation_size != _derivation_size)
+	if (derivation_size_user != _derivation_size)
 	{
-		derivation_size = _derivation_size;
+		derivation_size_user = _derivation_size;
 		RecreateCache();
 	}
 }
 
+void AudioSpectrumRenderer::set_reference_frequency_position(float pos_fref_)
+{
+	assert(pos_fref_ > 0.f);
+	assert(pos_fref_ < 1.f);
+
+	pos_fref = pos_fref_;
+}
+
 template<class T>
 void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) {
 	for (size_t si = 0; si < count; ++si)
@@ -165,6 +173,32 @@ void AudioSpectrumRenderer::ConvertToFloat(size_t count, T *dest) {
 	}
 }
 
+void AudioSpectrumRenderer::update_derivation_values()
+{
+	// Below this sampling rate (Hz), the derivation values are identical to
+	// the user-provided ones. Otherwise, they are scaled according to the
+	// ratio between the sampling rates.
+	// The threshold is set at 50 kHz so with standard rates like 48 kHz,
+	// the values are kept identical, and scaled with higher standard rates
+	// like 88.2 or 96 kHz.
+	constexpr float sample_rate_ref = 50000.f;
+
+	derivation_dist = derivation_dist_user;
+	derivation_size = derivation_size_user;
+
+	if (provider != nullptr)
+	{
+		const int sample_rate = provider->GetSampleRate();
+		float mult = float(sample_rate) / sample_rate_ref;
+		while (mult > 1)
+		{
+			++derivation_dist;
+			++derivation_size;
+			mult *= 0.5f;
+		}
+	}
+}
+
 void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
 {
 	assert(cache);
@@ -173,12 +207,19 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
 	int64_t first_sample = (((int64_t)block_index) << derivation_dist) - ((int64_t)1 << derivation_size);
 	provider->GetInt16MonoAudio(audio_scratch.data(), first_sample, 2 << derivation_size);
 
+	// Because the FFTs used here are unnormalized DFTs, we have to compensate
+	// the possible length difference between derivation_size used in the
+	// calculations and its user-provided counterpart. Thus, the display is
+	// kept independent of the sampling rate.
+	const float scale_fix =
+		1.f / sqrtf(float(1 << (derivation_size - derivation_size_user)));
+
 #ifdef WITH_FFTW3
 	ConvertToFloat(2 << derivation_size, dft_input);
 
 	fftw_execute(dft_plan);
 
-	double scale_factor = 9 / sqrt(2 << (derivation_size + 1));
+	double scale_factor = scale_fix * 9 / sqrt(2 << (derivation_size + 1));
 
 	fftw_complex *o = dft_output;
 	for (size_t si = (size_t)1<<derivation_size; si > 0; --si)
@@ -196,7 +237,7 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
 	FFT fft;
 	fft.Transform(2<<derivation_size, fft_input, fft_real, fft_imag);
 
-	float scale_factor = 9 / sqrt(2 * (float)(2<<derivation_size));
+	float scale_factor = scale_fix * 9 / sqrt(2 * (float)(2<<derivation_size));
 
 	for (size_t si = 1<<derivation_size; si > 0; --si)
 	{
@@ -211,6 +252,10 @@ void AudioSpectrumRenderer::FillBlock(size_t block_index, float *block)
 
 void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle style)
 {
+	// Misc. utility functions
+	auto floor_int = [](float val) { return int(floorf(val)); };
+	auto round_int = [](float val) { return int(floorf(val + 0.5f)); };
+
 	if (!cache)
 		return;
 
@@ -229,9 +274,34 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle
 
 	const AudioColorScheme *pal = &colors[style];
 
-	/// @todo Make minband and maxband configurable
-	int minband = 0;
-	int maxband = 1 << derivation_size;
+	// Sampling rate, in Hz.
+	const float sample_rate = float(provider->GetSampleRate());
+
+	// Number of FFT bins, excluding the "Nyquist" one
+	const int nbr_bins = 1 << derivation_size;
+
+	// minband and maxband define an half-open range.
+	int minband = 1; // Starts at 1, we don't care about showing the DC.
+	int maxband = std::min(
+		round_int(nbr_bins * max_freq / (sample_rate * 0.5f)),
+		nbr_bins
+	);
+	assert(minband < maxband);
+
+	// Precomputes this once, this will be useful for the log curve.
+	const float scale_log = logf(maxband / minband);
+
+	// Turns the user-specified 1 kHz position into a ratio between the linear
+	// and logarithmic curves that we can directly use in the following
+	// calculations.
+	assert(pos_fref > 0);
+	assert(pos_fref < 1);
+	float b_fref = nbr_bins * freq_ref / (sample_rate * 0.5f);
+	b_fref = mid(1.f, b_fref, float(maxband - 1));
+	const float clin = minband + (maxband - minband) * pos_fref;
+	const float clog = minband * expf(pos_fref * scale_log);
+	float log_ratio_calc = (b_fref - clin) / (clog - clin);
+	log_ratio_calc = mid(0.f, log_ratio_calc, 1.f);
 
 	// ax = absolute x, absolute to the virtual spectrum bitmap
 	for (int ax = start; ax < end; ++ax)
@@ -243,36 +313,51 @@ void AudioSpectrumRenderer::Render(wxBitmap &bmp, int start, AudioRenderingStyle
 		// Prepare bitmap writing
 		unsigned char *px = imgdata + (imgheight-1) * stride + (ax - start) * 3;
 
-		// Scale up or down vertically?
-		if (imgheight > 1<<derivation_size)
+		float bin_prv = minband;
+		float bin_cur = minband;
+		for (int y = 0; y < imgheight; ++y)
 		{
-			// Interpolate
-			for (int y = 0; y < imgheight; ++y)
+			assert(bin_cur < float(maxband));
+
+			float       bin_nxt = maxband;
+			if (y + 1 < imgheight)
 			{
-				assert(px >= imgdata);
-				assert(px < imgdata + imgheight*stride);
-				auto ideal = (double)(y+1.)/imgheight * (maxband-minband) + minband;
-				float sample1 = power[(int)floor(ideal)+minband];
-				float sample2 = power[(int)ceil(ideal)+minband];
-				float frac = ideal - floor(ideal);
-				float val = (1-frac)*sample1 + frac*sample2;
-				pal->map(val*amplitude_scale, px);
-				px -= stride;
+				// Bin index is an interpolation between the linear and log curves.
+				const float pos_rel = float(y + 1) / float(imgheight);
+				const float b_lin = minband + pos_rel * (maxband - minband);
+				const float b_log = minband * expf(pos_rel * scale_log);
+				bin_nxt = b_lin + log_ratio_calc * (b_log - b_lin);
 			}
-		}
-		else
-		{
-			// Pick greatest
-			for (int y = 0; y < imgheight; ++y)
+
+			float val = 0;
+
+			// Interpolate between consecutive bins
+			if (bin_nxt - bin_prv < 2)
 			{
-				assert(px >= imgdata);
-				assert(px < imgdata + imgheight*stride);
-				int sample1 = std::max(0, maxband * y/imgheight + minband);
-				int sample2 = std::min((1<<derivation_size)-1, maxband * (y+1)/imgheight + minband);
-				float maxval = *std::max_element(&power[sample1], &power[sample2 + 1]);
-				pal->map(maxval*amplitude_scale, px);
-				px -= stride;
+				const int   bin_0 = floor_int(bin_cur);
+				const int   bin_1 = std::min(bin_0 + 1, nbr_bins - 1);
+				const float frac = bin_cur - float(bin_0);
+				const float v0 = power[bin_0];
+				const float v1 = power[bin_1];
+				val = v0 + frac * (v1 - v0);
 			}
+
+			// Pick the greatest bin on the interval
+			else
+			{
+				int bin_inf = floor_int((bin_prv + bin_cur) * 0.5f);
+				int bin_sup = floor_int((bin_cur + bin_nxt) * 0.5f);
+				bin_inf = std::min(bin_inf, nbr_bins - 2);
+				bin_sup = std::min(bin_sup, nbr_bins - 1);
+				assert(bin_inf < bin_sup);
+				val = *std::max_element(&power[bin_inf], &power[bin_sup]);
+			}
+
+			pal->map(val * amplitude_scale, px);
+
+			px -= stride;
+			bin_prv = bin_cur;
+			bin_cur = bin_nxt;
 		}
 	}
 
diff --git a/src/audio_renderer_spectrum.h b/src/audio_renderer_spectrum.h
index d4641f37de..3bc9847770 100644
--- a/src/audio_renderer_spectrum.h
+++ b/src/audio_renderer_spectrum.h
@@ -61,10 +61,34 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
 	/// Colour tables used for rendering
 	std::vector<AudioColorScheme> colors;
 
+	/// User-provided value for derivation_size
+	size_t derivation_size_user = 0;
+
+	/// User-provided value for derivation_dist
+	size_t derivation_dist_user = 0;
+
+	/// Maximum audible, displayed frequency. Avoids wasting the display space
+	/// with ultrasonic content at sampling rates > 40 kHz.
+	float max_freq = 20000.f;
+
+	/// Relative vertical position of the 1 kHz frequency, in (0 ; 1) open range
+	/// 0 = bottom of the display zone, 1 = top
+	/// The actual position, as displayed, is limited by the available mapping
+	/// curves (linear and log).
+	/// Values close to 0 will give a linear curve, and close to 1 a log curve.
+	float pos_fref = 1.0f / 3;
+
+	/// Reference frequency which vertical position is constant, Hz.
+	const float freq_ref = 1000.0f;
+
 	/// Binary logarithm of number of samples to use in deriving frequency-power data
+	/// This could differ from the user-provided value because the actual value
+	/// used in computations may be scaled, depending on the sampling rate.
 	size_t derivation_size = 0;
 
 	/// Binary logarithm of number of samples between the start of derivations
+	/// This could differ from the user-provided value because the actual value
+	/// used in computations may be scaled, depending on the sampling rate.
 	size_t derivation_dist = 0;
 
 	/// @brief Reset in response to changing audio provider
@@ -90,6 +114,9 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
 	template<class T>
 	void ConvertToFloat(size_t count, T *dest);
 
+	/// @brief Updates the derivation_* after a derivation_*_user change.
+	void update_derivation_values();
+
 #ifdef WITH_FFTW3
 	/// FFTW plan data
 	fftw_plan dft_plan = nullptr;
@@ -133,6 +160,12 @@ class AudioSpectrumRenderer final : public AudioRendererBitmapProvider {
 	/// is specified too large, it will be clamped to the size.
 	void SetResolution(size_t derivation_size, size_t derivation_dist);
 
+	/// @brief Set the vertical relative position of the reference frequency (1 kHz)
+	/// @param fref_pos_ Vertical position of the 1 kHz frequency. Between 0 and 1, boundaries excluded.
+	///
+	/// A value close to 0 gives a linear display, and close to 1 a logarithmic display.
+	void set_reference_frequency_position(float pos_fref_);
+
 	/// @brief Cleans up the cache
 	/// @param max_size Maximum size in bytes for the cache
 	void AgeCache(size_t max_size) override;
diff --git a/src/libresrc/default_config.json b/src/libresrc/default_config.json
index 8a648fa47c..3ea90c8de6 100644
--- a/src/libresrc/default_config.json
+++ b/src/libresrc/default_config.json
@@ -71,7 +71,8 @@
 			"Spectrum" : {
 				"Cutoff" : 0,
 				"Memory Max" : 128,
-				"Quality" : 1
+				"Quality" : 1,
+				"FreqCurve" : 0
 			}
 		},
 		"Snap" : {
diff --git a/src/libresrc/osx/default_config.json b/src/libresrc/osx/default_config.json
index 82223cf484..4d55817884 100644
--- a/src/libresrc/osx/default_config.json
+++ b/src/libresrc/osx/default_config.json
@@ -71,7 +71,8 @@
 			"Spectrum" : {
 				"Cutoff" : 0,
 				"Memory Max" : 128,
-				"Quality" : 1
+				"Quality" : 1,
+				"FreqCurve" : 0
 			}
 		},
 		"Snap" : {
diff --git a/src/preferences.cpp b/src/preferences.cpp
index 1a315ed50a..79b5b2b932 100644
--- a/src/preferences.cpp
+++ b/src/preferences.cpp
@@ -394,6 +394,10 @@ void Advanced_Audio(wxTreebook *book, Preferences *parent) {
 	wxArrayString sq_choice(4, sq_arr);
 	p->OptionChoice(spectrum, _("Quality"), sq_choice, "Audio/Renderer/Spectrum/Quality");
 
+	const wxString sc_arr[5] = { _("Linear"), _("Extended"), _("Medium"), _("Compressed"), _("Logarithmic") };
+	wxArrayString sc_choice(5, sc_arr);
+	p->OptionChoice(spectrum, _("Frequency mapping"), sc_choice, "Audio/Renderer/Spectrum/FreqCurve");
+
 	p->OptionAdd(spectrum, _("Cache memory max (MB)"), "Audio/Renderer/Spectrum/Memory Max", 2, 1024);
 
 #ifdef WITH_AVISYNTH