Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some documentation to the spectrogram example #50

Merged
merged 1 commit into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions example/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# A command line spectrogram viewer.

Displays a [spectrogram](https://en.wikipedia.org/wiki/Spectrogram) of the input
audio, using STFT.

`dart run spectrogram.dart test.wav`
117 changes: 73 additions & 44 deletions example/spectrogram.dart
Original file line number Diff line number Diff line change
Expand Up @@ -18,68 +18,97 @@ import 'dart:typed_data';
import 'package:fftea/fftea.dart';
import 'package:wav/wav.dart';

Float64List normalizeRmsVolume(List<double> a, double target) {
final b = Float64List.fromList(a);
double squareSum = 0;
for (final x in b) {
squareSum += x * x;
}
double factor = target * math.sqrt(b.length / squareSum);
for (int i = 0; i < b.length; ++i) {
b[i] *= factor;
}
return b;
}

Uint64List linSpace(int end, int steps) {
final a = Uint64List(steps);
for (int i = 1; i < steps; ++i) {
a[i - 1] = (end * i) ~/ steps;
}
a[steps - 1] = end;
return a;
}

String gradient(double power) {
const scale = 2;
const levels = [' ', '░', '▒', '▓', '█'];
int index = math.log((power * levels.length) * scale).floor();
if (index < 0) index = 0;
if (index >= levels.length) index = levels.length - 1;
return levels[index];
}

// Displays a spectrogram of the input audio, using STFT.
//
// To generate the spectrogram, we break the audio into small chunks, FFT the
// chunk to get frequency, divide the frequencies into buckets, and print a
// character indicating the power of that frequency bucket. A darker shading
// means that group of frequencies is louder at that time. Time goes from top
// top bottom, with low frequencies on the left and high on the right.
//
// https://en.wikipedia.org/wiki/Spectrogram
void main(List<String> argv) async {
if (argv.length != 1) {
print('Wrong number of args. Usage:');
print(' dart run spectrogram.dart test.wav');
return;
}

// Load the user's WAV file and normalize its volume.
final wav = await Wav.readFile(argv[0]);
final audio = normalizeRmsVolume(wav.toMono(), 0.3);

// Set up the STFT. The chunk size is the number of audio samples in each
// chunk to be FFT'd. The buckets is the number of frequency buckets to
// split the resulting FFT into when printing.
const chunkSize = 2048;
const buckets = 120;
final stft = STFT(chunkSize, Window.hanning(chunkSize));
Uint64List? logItr;
const buckets = 120;

// STFT divides the input into chunks, and runs an FFT on all the chunks.
stft.run(
audio,
// This callback is called for each FFT'd chunk.
(Float64x2List chunk) {
// FFTs of real valued data contain a lot of redundant frequency data that
// we don't want to see in our spectrogram, so discard it. See
// [ComplexArray.discardConjugates] for more info. We also don't care
// about the phase of the frequencies, just the amplitude, so calculate
// magnitudes.
final amp = chunk.discardConjugates().magnitudes();
logItr ??= linSpace(amp.length, buckets);
int i0 = 0;
for (final i1 in logItr!) {
double power = 0;
if (i1 != i0) {
for (int i = i0; i < i1; ++i) {
power += amp[i];
}
power /= i1 - i0;
}

for (int bucket = 0; bucket < buckets; ++bucket) {
// Calculate the bucket endpoints.
int start = (amp.length * bucket) ~/ buckets;
int end = (amp.length * (bucket + 1)) ~/ buckets;

// RMS works in the frequency domain too. This is essentially
// calculating what the perceived volume would be if we were only
// listening to this frequency bucket. Technically there are some
// scaling factors I'm ignoring.
// https://en.wikipedia.org/wiki/Root_mean_square#In_frequency_domain
double power = rms(Float64List.sublistView(amp, start, end));

stdout.write(gradient(power));
i0 = i1;
}
stdout.write('\n');
},
// Stride by half the chunk size, so that the chunks overlap.
chunkSize ~/ 2,
);
}

// Calculates the RMS volume of the audio. This is a decent approxiation of
// human perception of loudness.
// https://en.wikipedia.org/wiki/Root_mean_square
double rms(List<double> audio) {
if (audio.isEmpty) {
return 0;
}
double squareSum = 0;
for (final x in audio) {
squareSum += x * x;
}
return math.sqrt(squareSum / audio.length);
}

// Returns a copy of the input audio, with the amplitude adjusted so that the
// RMS volume of the result is set to the target.
Float64List normalizeRmsVolume(List<double> audio, double target) {
double factor = target / rms(audio);
final output = Float64List.fromList(audio);
for (int i = 0; i < audio.length; ++i) {
output[i] *= factor;
}
return output;
}

// Converts audio power into a unicode gradient for printing.
String gradient(double power) {
const scale = 2;
const levels = [' ', '░', '▒', '▓', '█'];
int index = math.log((power * levels.length) * scale).floor();
if (index < 0) index = 0;
if (index >= levels.length) index = levels.length - 1;
return levels[index];
}
Loading