From a6b58824fa2fd5a67a7d7feff79ba1e01e2cc2ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daschuer@mixxx.org>
Date: Mon, 3 Jun 2019 00:17:56 +0200
Subject: [PATCH 1/2] Update qm-dsp to current master
 https://github.com/c4dm/qm-dsp/commit/4c406d9990bc42e5e298dc28eb8b897095275683

---
 build/depends.py                              |   2 +-
 lib/qm-dsp/CONTRIBUTING.md                    |  79 +++++
 lib/qm-dsp/README.md                          |  59 ++++
 lib/qm-dsp/README.txt                         |  35 --
 lib/qm-dsp/base/Restrict.h                    |  17 +
 lib/qm-dsp/dsp/chromagram/Chromagram.cpp      |  40 ++-
 lib/qm-dsp/dsp/chromagram/Chromagram.h        |  53 +++-
 lib/qm-dsp/dsp/chromagram/ConstantQ.cpp       |  32 +-
 lib/qm-dsp/dsp/chromagram/ConstantQ.h         |   4 +-
 lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp    | 259 ++++++++-------
 lib/qm-dsp/dsp/keydetection/GetKeyMode.h      |  12 +-
 lib/qm-dsp/dsp/rateconversion/DecimatorB.cpp  |   1 -
 lib/qm-dsp/dsp/rateconversion/Resampler.cpp   |  64 ++--
 lib/qm-dsp/dsp/rateconversion/Resampler.h     |   1 -
 .../dsp/segmentation/ClusterMeltSegmenter.cpp |  19 +-
 lib/qm-dsp/dsp/segmentation/Segmenter.cpp     |  17 +-
 lib/qm-dsp/dsp/segmentation/cluster_melt.c    |   4 +-
 .../dsp/segmentation/cluster_segmenter.c      |   2 +-
 .../dsp/signalconditioning/DFProcess.cpp      |  15 +-
 lib/qm-dsp/dsp/signalconditioning/DFProcess.h |   2 -
 .../dsp/signalconditioning/FiltFilt.cpp       |  57 ++--
 lib/qm-dsp/dsp/signalconditioning/FiltFilt.h  |  17 +-
 lib/qm-dsp/dsp/signalconditioning/Filter.cpp  | 148 +++++----
 lib/qm-dsp/dsp/signalconditioning/Filter.h    |  72 +++--
 lib/qm-dsp/dsp/tempotracking/DownBeat.cpp     |   2 +-
 lib/qm-dsp/dsp/tempotracking/TempoTrack.cpp   |  52 ++-
 lib/qm-dsp/dsp/tempotracking/TempoTrack.h     |  36 +--
 lib/qm-dsp/dsp/tonal/TCSgram.cpp              |   2 +-
 lib/qm-dsp/dsp/tonal/TonalEstimator.h         |   4 +-
 lib/qm-dsp/dsp/transforms/DCT.cpp             |  91 ++++++
 lib/qm-dsp/dsp/transforms/DCT.h               |  85 +++++
 lib/qm-dsp/dsp/transforms/FFT.cpp             |   4 +-
 lib/qm-dsp/dsp/transforms/FFT.h               |   6 +
 lib/qm-dsp/dsp/wavelet/Wavelet.cpp            |  11 +-
 lib/qm-dsp/dsp/wavelet/Wavelet.h              |   4 +-
 lib/qm-dsp/ext/kissfft/CHANGELOG              |   4 +-
 lib/qm-dsp/ext/kissfft/README                 | 134 ++++++++
 lib/qm-dsp/ext/kissfft/README.simd            |  78 +++++
 lib/qm-dsp/ext/kissfft/TIPS                   |  39 +++
 lib/qm-dsp/ext/kissfft/kiss_fft.c             |   5 +-
 lib/qm-dsp/ext/kissfft/kissfft.hh             | 300 ++++++++++++++++++
 .../ext/kissfft/{ => tools}/kiss_fftr.c       |   0
 .../ext/kissfft/{ => tools}/kiss_fftr.h       |   0
 lib/qm-dsp/key_rounding.patch                 | 253 ---------------
 lib/qm-dsp/maths/CosineDistance.cpp           |   2 +-
 lib/qm-dsp/maths/MathUtilities.cpp            |  84 +++--
 lib/qm-dsp/maths/MathUtilities.h              |  54 +++-
 lib/qm-dsp/maths/Polyfit.h                    | 103 ++----
 lib/qm-dsp/maths/pca/pca.c                    |   2 +-
 49 files changed, 1534 insertions(+), 832 deletions(-)
 create mode 100644 lib/qm-dsp/CONTRIBUTING.md
 create mode 100644 lib/qm-dsp/README.md
 delete mode 100644 lib/qm-dsp/README.txt
 create mode 100644 lib/qm-dsp/base/Restrict.h
 create mode 100644 lib/qm-dsp/dsp/transforms/DCT.cpp
 create mode 100644 lib/qm-dsp/dsp/transforms/DCT.h
 create mode 100644 lib/qm-dsp/ext/kissfft/README
 create mode 100644 lib/qm-dsp/ext/kissfft/README.simd
 create mode 100644 lib/qm-dsp/ext/kissfft/TIPS
 create mode 100644 lib/qm-dsp/ext/kissfft/kissfft.hh
 rename lib/qm-dsp/ext/kissfft/{ => tools}/kiss_fftr.c (100%)
 rename lib/qm-dsp/ext/kissfft/{ => tools}/kiss_fftr.h (100%)
 delete mode 100644 lib/qm-dsp/key_rounding.patch

diff --git a/build/depends.py b/build/depends.py
index 1390b0ef025..067a93c6ec1 100644
--- a/build/depends.py
+++ b/build/depends.py
@@ -595,7 +595,7 @@ def sources(self, build):
             "#lib/qm-dsp/dsp/transforms/FFT.cpp",
             #"#lib/qm-dsp/dsp/wavelet/Wavelet.cpp",
             "#lib/qm-dsp/ext/kissfft/kiss_fft.c",
-            "#lib/qm-dsp/ext/kissfft/kiss_fftr.c",
+            "#lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c",
             #"#lib/qm-dsp/hmm/hmm.c",
             "#lib/qm-dsp/maths/Correlation.cpp",
             #"#lib/qm-dsp/maths/CosineDistance.cpp",
diff --git a/lib/qm-dsp/CONTRIBUTING.md b/lib/qm-dsp/CONTRIBUTING.md
new file mode 100644
index 00000000000..6f6229e6102
--- /dev/null
+++ b/lib/qm-dsp/CONTRIBUTING.md
@@ -0,0 +1,79 @@
+
+Contributing
+============
+
+The qm-dsp library is maintained in a Github repository at
+https://github.com/c4dm/qm-dsp.
+
+
+Reporting bugs
+--------------
+
+Please use Github issues for bug reports. Try to make them as specific
+as possible. For example, describe an input that triggers some
+particular behaviour, and tell us how that behaviour differs from what
+you expected.
+
+If your bug can be reproduced by processing an audio file using one of
+the QM Vamp Plugins (https://github.com/c4dm/qm-vamp-plugins), which
+are built using this library, that might be a good way to illustrate
+the problem.
+
+
+Pull requests
+-------------
+
+We're happy to see pull requests, and can pull them directly in some
+circumstances.
+
+ * Please make sure your change compiles without warnings and passes
+   the existing tests.
+
+ * Please follow the code style guidelines (see below).
+
+ * Please make it as easy as possible to verify the behaviour of the
+   pull request, for example by adding a new test in the `tests`
+   directory. This library has only limited test coverage, but we
+   would like to expand it, and prefer not to make changes unless they
+   are backed by tests.
+
+ * Please provide your changes under terms which permit Queen Mary
+   University of London to relicense the code for commercial
+   purposes. The qm-dsp library as a whole is provided under the GPL,
+   but QM also make commercial licences available separately, and
+   cannot accept any pull request whose copyright status would prevent
+   that. In practice, this means any non-trivial change not
+   originating from QM must be explicitly licensed using a BSD-like
+   licence text, either in the source file itself or in an
+   accompanying file. See `thread/BlockAllocator.h` for an example of
+   typical language.
+
+Please note also that fixes which change the behaviour of the existing
+QM Vamp Plugins will need particularly close scrutiny - these are
+reasonably widely used and, even where they have defects, changes may
+cause problems for users and will at least need to be documented with
+the plugins. For this reason it may take some time for such changes to
+be reviewed or integrated.
+
+
+Code style
+----------
+
+ * C++ code must compile with the C++98 standard, except for the unit
+   tests which are C++14
+
+ * Classes are named `LikeThis` - functions, methods, and local
+   variables `likeThis` - class member variables `m_likeThis`
+
+ * Indentation is four spaces at a time (no tabs)
+
+ * The opening brace for a block goes at the end of the line, except
+   at the start of a function or class definition where it gets a line
+   of its own
+
+ * Please use braces around any conditional or loop block that
+   occupies its own line
+
+Some of the older code in this library does not follow these
+guidelines - usually this means the code needs to be updated.
+
diff --git a/lib/qm-dsp/README.md b/lib/qm-dsp/README.md
new file mode 100644
index 00000000000..20bcb60f04f
--- /dev/null
+++ b/lib/qm-dsp/README.md
@@ -0,0 +1,59 @@
+
+QM-DSP library
+==============
+
+This is a C++ library of functions for Digital Signal Processing and
+Music Informatics purposes developed in the [Centre for Digital
+Music](http://c4dm.eecs.qmul.ac.uk) at Queen Mary, University of
+London.
+
+It is used by [QM Vamp Plugins](http://isophonics.net/QMVampPlugins)
+amongst other things.
+
+Despite the assertive name "qm-dsp", it is not "the official QM DSP
+library", just one library for DSP that happens to have been written
+at QM. It got this name because nothing else was using it at the time.
+
+
+Compiling the library
+---------------------
+
+ - Linux: `make -f build/linux/Makefile.linux64`
+
+ - Mac: `make -f build/osx/Makefile.osx`
+
+ - Windows (MSVC): Use the project file `build/msvc/QMDSP.vcxproj`
+
+To build and run unit tests as well, add the `test` target to your
+Make invocation, e.g. `make -f build/linux/Makefile.linux64
+test`. Tests require the Boost library.
+
+
+Licence
+-------
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.  See the file COPYING included with
+this distribution for more information.
+
+This code is Copyright (c) 2006-2019 Queen Mary, University of London,
+with the following exceptions:
+
+ - `ext/kissfft` - Copyright (c) 2003-2010 Mark Borgerding
+
+ - `maths/pca/pca.c` - Fionn Murtagh, from StatLib, used with permission
+
+ - `maths/Polyfit.h` - by Allen Miller, David J Taylor and others;
+also for Delphi in the the JEDI Math Library, under the Mozilla Public
+License
+
+ - `thread/BlockAllocator.h` - derived from FSB Allocator by Juha
+Nieminen, under a BSD-style license
+
+See individual files for further authorship details.
+
+If you wish to use this code in a proprietary application or product
+for which the terms of the GPL are not appropriate, please contact QM
+Innovation https://www.qminnovation.co.uk/ for licensing terms.
diff --git a/lib/qm-dsp/README.txt b/lib/qm-dsp/README.txt
deleted file mode 100644
index 6927c7a1b83..00000000000
--- a/lib/qm-dsp/README.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-
-
-QM-DSP library
-==============
-
-This is a C++ library of functions for DSP and Music Informatics
-purposes developed at Queen Mary, University of London.
-It is used by the QM Vamp Plugins (q.v.) amongst other things.
-
-This code is Copyright (c) 2006-2015 Queen Mary, University of London,
-with the following exceptions:
-
-ext/kissfft -- Copyright (c) 2003-2010 Mark Borgerding
-
-maths/pca.c -- Fionn Murtagh, from StatLib; with permission
-
-maths/Polyfit.h -- Allen Miller, David J Taylor and others; also for
-Delphi in the the JEDI Math Library, under the Mozilla Public License
-
-thread/BlockAllocator.h -- derived from FSB Allocator by Juha Nieminen,
-under BSD-style license
-
-See individual files for further authorship details.
-
-
-License
-=======
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.  See the file
-COPYING included with this distribution for more information.
-
-
diff --git a/lib/qm-dsp/base/Restrict.h b/lib/qm-dsp/base/Restrict.h
new file mode 100644
index 00000000000..723a8b3b67d
--- /dev/null
+++ b/lib/qm-dsp/base/Restrict.h
@@ -0,0 +1,17 @@
+
+#ifndef QM_DSP_RESTRICT_H
+#define QM_DSP_RESTRICT_H
+
+#ifdef _MSC_VER
+#define QM_R__ __restrict
+#endif
+
+#ifdef __GNUC__
+#define QM_R__ __restrict__
+#endif
+
+#ifndef QM_R__
+#define QM_R__
+#endif
+
+#endif
diff --git a/lib/qm-dsp/dsp/chromagram/Chromagram.cpp b/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
index 5ba2a762de2..9f40b1d2f1f 100644
--- a/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
+++ b/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
@@ -33,8 +33,9 @@ int Chromagram::initialise( ChromaConfig Config )
     m_BPO  = Config.BPO;		// bins per octave
     m_normalise = Config.normalise;     // if frame normalisation is required
 
-    // No. of constant Q bins, extended to a full octave
-    m_uK = m_BPO * (unsigned int)ceil(log(m_FMax/m_FMin)/log(2.0));
+    // Extend range to a full octave
+    double octaves = log(m_FMax / m_FMin) / log(2.0);
+    m_FMax = m_FMin * pow(2.0, ceil(octaves));
 
     // Create array for chroma result
     m_chromadata = new double[ m_BPO ];
@@ -53,6 +54,9 @@ int Chromagram::initialise( ChromaConfig Config )
     // Initialise ConstantQ operator
     m_ConstantQ = new ConstantQ( ConstantQConfig );
 
+    // No. of constant Q bins
+    m_uK = m_ConstantQ->getK();
+
     // Initialise working arrays
     m_frameSize = m_ConstantQ->getfftlength();
     m_hopSize = m_ConstantQ->gethop();
@@ -112,7 +116,7 @@ void Chromagram::unityNormalise(double *src)
 
     MathUtilities::getFrameMinMax( src, m_BPO, & min, &max );
 
-    for( unsigned int i = 0; i < m_BPO; i++ )
+    for (int i = 0; i < m_BPO; i++)
     {
 	val = src[ i ] / max;
 
@@ -121,7 +125,7 @@ void Chromagram::unityNormalise(double *src)
 }
 
 
-double* Chromagram::process( const double *data )
+double *Chromagram::process(const double *data)
 {
     if (!m_skGenerated) {
         // Generate CQ Kernel 
@@ -134,17 +138,25 @@ double* Chromagram::process( const double *data )
         m_windowbuf = new double[m_frameSize];
     }
 
-    for (unsigned int i = 0; i < m_frameSize; ++i) {
+    for (int i = 0; i < m_frameSize; ++i) {
         m_windowbuf[i] = data[i];
     }
     m_window->cut(m_windowbuf);
 
+    // The frequency-domain version expects pre-fftshifted input - so
+    // we must do the same here
+    for (int i = 0; i < m_frameSize/2; ++i) {
+        double tmp = m_windowbuf[i];
+        m_windowbuf[i] = m_windowbuf[i + m_frameSize/2];
+        m_windowbuf[i + m_frameSize/2] = tmp;
+    }
+
     m_FFT->forward(m_windowbuf, m_FFTRe, m_FFTIm);
 
     return process(m_FFTRe, m_FFTIm);
 }
 
-double* Chromagram::process( const double *real, const double *imag )
+double *Chromagram::process(const double *real, const double *imag)
 {
     if (!m_skGenerated) {
         // Generate CQ Kernel 
@@ -153,20 +165,20 @@ double* Chromagram::process( const double *real, const double *imag )
     }
 
     // initialise chromadata to 0
-    for (unsigned i = 0; i < m_BPO; i++) m_chromadata[i] = 0;
+    for (int i = 0; i < m_BPO; i++) m_chromadata[i] = 0;
 
     // Calculate ConstantQ frame
     m_ConstantQ->process( real, imag, m_CQRe, m_CQIm );
 	
     // add each octave of cq data into Chromagram
-    const unsigned octaves = m_uK / m_BPO;
-    for (unsigned octave = 0; octave < octaves; octave++)
+    const int octaves = m_uK / m_BPO;
+    for (int octave = 0; octave < octaves; octave++) 
     {
-        unsigned firstBin = octave * m_BPO;
-        for (unsigned i = 0; i < m_BPO; i++)
-        {
-            m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
-        }
+	int firstBin = octave*m_BPO;
+	for (int i = 0; i < m_BPO; i++) 
+	{
+	    m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
+	}
     }
 
     MathUtilities::normalise(m_chromadata, m_BPO, m_normalise);
diff --git a/lib/qm-dsp/dsp/chromagram/Chromagram.h b/lib/qm-dsp/dsp/chromagram/Chromagram.h
index b2ad72e0ac0..ca68ee9071b 100644
--- a/lib/qm-dsp/dsp/chromagram/Chromagram.h
+++ b/lib/qm-dsp/dsp/chromagram/Chromagram.h
@@ -20,11 +20,11 @@
 #include "base/Window.h"
 #include "ConstantQ.h"
 
-struct ChromaConfig{
+struct ChromaConfig {
     double FS;
     double min;
     double max;
-    unsigned int BPO;
+    int BPO;
     double CQThresh;
     MathUtilities::NormaliseType normalise;
 };
@@ -35,19 +35,44 @@ class Chromagram
 public:	
     Chromagram( ChromaConfig Config );
     ~Chromagram();
-	
-    double* process( const double *data ); // time domain
-    double* process( const double *real, const double *imag ); // frequency domain
-    void unityNormalise( double* src );
+
+    /**
+     * Process a time-domain input signal of length equal to
+     * getFrameSize().
+     * 
+     * The returned buffer contains the chromagram values indexed by
+     * bin, with the number of values corresponding to the BPO field
+     * in the ChromaConfig supplied at construction. It is owned by
+     * the Chromagram object and is reused from one process call to
+     * the next.
+     */
+    double *process(const double *data);
+    
+    /**
+     * Process a frequency-domain input signal generated from a
+     * time-domain signal of length equal to getFrameSize() that has
+     * been windowed and "fftshifted" to place the zero index in the
+     * centre of the frame. The real and imag buffers must each
+     * contain the full getFrameSize() frequency bins.
+     * 
+     * The returned buffer contains the chromagram values indexed by
+     * bin, with the number of values corresponding to the BPO field
+     * in the ChromaConfig supplied at construction. It is owned by
+     * the Chromagram object and is reused from one process call to
+     * the next.
+     */
+    double *process(const double *real, const double *imag);
+    
+    void unityNormalise(double* src);
 
     // Complex arithmetic
     double kabs( double real, double imag );
 	
     // Results
-    unsigned int getK() { return m_uK;}
-    unsigned int getFrameSize() { return m_frameSize; }
-    unsigned int getHopSize()   { return m_hopSize; }
-
+    int getK() { return m_uK;}
+    int getFrameSize() { return m_frameSize; }
+    int getHopSize()   { return m_hopSize; }
+    
 private:
     int initialise( ChromaConfig Config );
     int deInitialise();
@@ -58,13 +83,13 @@ class Chromagram
     double* m_chromadata;
     double m_FMin;
     double m_FMax;
-    unsigned int m_BPO;
-    unsigned int m_uK;
+    int m_BPO;
+    int m_uK;
 
     MathUtilities::NormaliseType m_normalise;
 
-    unsigned int m_frameSize;
-    unsigned int m_hopSize;
+    int m_frameSize;
+    int m_hopSize;
 
     FFTReal* m_FFT;
     ConstantQ* m_ConstantQ;
diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
index f2129f2e6ff..4585ddc236a 100644
--- a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
+++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
@@ -125,18 +125,17 @@ void ConstantQ::sparsekernel()
             hammingWindowIm[u] = 0;
         }
         
-        const double samplesPerCycle =
-                m_FS / (m_FMin * pow(2, (double)k / (double)m_BPO));
+	// Computing a hamming window
+	const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
 
-        // Computing a hamming window
-        const unsigned hammingLength = (int) ceil(
-                m_dQ * samplesPerCycle);
+//        cerr << "k = " << k << ", q = " << m_dQ << ", m_FMin = " << m_FMin << ", hammingLength = " << hammingLength << " (rounded up from " << (m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO))) << ")" << endl;
+        
 
         unsigned origin = m_FFTLength/2 - hammingLength/2;
 
 	for (unsigned i=0; i<hammingLength; i++) 
 	{
-	    const double angle = 2*PI*i/samplesPerCycle;
+	    const double angle = 2*PI*m_dQ*i/hammingLength;
 	    const double real = cos(angle);
 	    const double imag = sin(angle);
 	    const double absol = hamming(hammingLength, i)/hammingLength;
@@ -144,10 +143,6 @@ void ConstantQ::sparsekernel()
 	    hammingWindowIm[ origin + i ] = absol*imag;
 	}
 
-	/* This code splits the hanning window and moves it to the beginning
-	   and the end, creating an empty gap in the middle.
-	   It is disabled, because it results in wrong results,
-	   when tested with sin waves centered on a bin frequency.
         for (unsigned i = 0; i < m_FFTLength/2; ++i) {
             double temp = hammingWindowRe[i];
             hammingWindowRe[i] = hammingWindowRe[i + m_FFTLength/2];
@@ -156,7 +151,6 @@ void ConstantQ::sparsekernel()
             hammingWindowIm[i] = hammingWindowIm[i + m_FFTLength/2];
             hammingWindowIm[i + m_FFTLength/2] = temp;
         }
-	*/
     
 	//do fft of hammingWindow
 	m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
@@ -168,7 +162,7 @@ void ConstantQ::sparsekernel()
 	    const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
 	    if (squaredBin <= squareThreshold) continue;
 		
-	    // Insert non-zero position indexes, doubled because they are floats
+	    // Insert non-zero position indexes
 	    sk->is.push_back(j);
 	    sk->js.push_back(k);
 
@@ -280,6 +274,7 @@ double* ConstantQ::process( const double* fftdata )
     {
 	const unsigned row = cqbin[i];
 	const unsigned col = fftbin[i];
+        if (col == 0) continue;
 	const double & r1  = real[i];
 	const double & i1  = imag[i];
 	const double & r2  = fftdata[ (2*m_FFTLength) - 2*col - 2 ];
@@ -301,17 +296,15 @@ void ConstantQ::initialise( CQConfig Config )
     m_BPO = Config.BPO;		// bins per octave
     m_CQThresh = Config.CQThresh;// ConstantQ threshold for kernel generation
 
-    // Work out Q value for Filter bank
-    m_dQ = 1/(pow(2,(1/(double)m_BPO))-1);
-    // No. of constant Q bins, extended to a full octave
-    m_uK = m_BPO * (unsigned int)ceil(log(m_FMax/m_FMin)/log(2.0));
+    m_dQ = 1/(pow(2,(1/(double)m_BPO))-1);	// Work out Q value for Filter bank
+    m_uK = (unsigned int) ceil(m_BPO * log(m_FMax/m_FMin)/log(2.0));	// No. of constant Q bins
 
 //    std::cerr << "ConstantQ::initialise: rate = " << m_FS << ", fmin = " << m_FMin << ", fmax = " << m_FMax << ", bpo = " << m_BPO << ", K = " << m_uK << ", Q = " << m_dQ << std::endl;
 
     // work out length of fft required for this constant Q Filter bank
     m_FFTLength = (int) pow(2, nextpow2(ceil( m_dQ*m_FS/m_FMin )));
 
-    m_hop = m_FFTLength/8; // <------ hop size is window length divided by 32
+    m_hop = m_FFTLength/8;
 
 //    std::cerr << "ConstantQ::initialise: -> fft length = " << m_FFTLength << ", hop = " << m_hop << std::endl;
 
@@ -351,10 +344,11 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
     {
 	const unsigned row = cqbin[i];
 	const unsigned col = fftbin[i];
+        if (col == 0) continue;
 	const double & r1  = real[i];
 	const double & i1  = imag[i];
-	const double & r2  = FFTRe[ m_FFTLength - col - 1 ];
-	const double & i2  = FFTIm[ m_FFTLength - col - 1 ];
+	const double & r2  = FFTRe[ m_FFTLength - col ];
+	const double & i2  = FFTIm[ m_FFTLength - col ];
 	// add the multiplication
 	CQRe[ row ] += (r1*r2 - i1*i2);
 	CQIm[ row ] += (r1*i2 + i1*r2);
diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.h b/lib/qm-dsp/dsp/chromagram/ConstantQ.h
index bd666ddd6f0..7507a1f9641 100644
--- a/lib/qm-dsp/dsp/chromagram/ConstantQ.h
+++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.h
@@ -20,8 +20,8 @@
 #include "maths/MathAliases.h"
 #include "maths/MathUtilities.h"
 
-struct CQConfig{
-    double FS;   // samplerate
+struct CQConfig {
+    double FS;         // samplerate
     double min;        // minimum frequency
     double max;        // maximum frequency
     unsigned int BPO;  // bins per octave
diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
index 7407c43febd..cb8bc2477af 100644
--- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
+++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
@@ -1,7 +1,12 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
 /*
-    Copyright (c) 2005 Centre for Digital Music ( C4DM )
-                       Queen Mary Univesrity of London
+    QM DSP Library
+
+    Centre for Digital Music, Queen Mary, University of London.
+    This file 2005-2006 Christian Landone and Katy Noland.
+
+    Fixes to correct chroma offsets and for thread safety contributed
+    by Daniel Schürmann.
 
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -9,9 +14,6 @@
     License, or (at your option) any later version.  See the file
     COPYING included with this distribution for more information.
 */
-// GetKeyMode.cpp: implementation of the CGetKeyMode class.
-//
-//////////////////////////////////////////////////////////////////////
 
 #include "GetKeyMode.h"
 #include "maths/MathUtilities.h"
@@ -22,18 +24,20 @@
 #include <cstring>
 #include <cstdlib>
 
+static const int kBinsPerOctave = 36;
+
 // Chords profile
-static double MajProfile[36] = 
-{ 0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267,
-  0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186, 
-  0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098,
-  0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239};
-
-static double MinProfile[36] =
-{ 0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257,
-  0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248,
-  0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164,
-  0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146};
+static double MajProfile[kBinsPerOctave] = {
+    0.0384, 0.0629, 0.0258, 0.0121, 0.0146, 0.0106, 0.0364, 0.0610, 0.0267,
+    0.0126, 0.0121, 0.0086, 0.0364, 0.0623, 0.0279, 0.0275, 0.0414, 0.0186, 
+    0.0173, 0.0248, 0.0145, 0.0364, 0.0631, 0.0262, 0.0129, 0.0150, 0.0098,
+    0.0312, 0.0521, 0.0235, 0.0129, 0.0142, 0.0095, 0.0289, 0.0478, 0.0239};
+
+static double MinProfile[kBinsPerOctave] = { 
+    0.0375, 0.0682, 0.0299, 0.0119, 0.0138, 0.0093, 0.0296, 0.0543, 0.0257,
+    0.0292, 0.0519, 0.0246, 0.0159, 0.0234, 0.0135, 0.0291, 0.0544, 0.0248,
+    0.0137, 0.0176, 0.0104, 0.0352, 0.0670, 0.0302, 0.0222, 0.0349, 0.0164,
+    0.0174, 0.0297, 0.0166, 0.0222, 0.0401, 0.0202, 0.0175, 0.0270, 0.0146};
 //
     
 
@@ -42,7 +46,7 @@ static double MinProfile[36] =
 //////////////////////////////////////////////////////////////////////
 
 GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
-			double hpcpAverage, double medianAverage ) :
+                        double hpcpAverage, double medianAverage ) :
     m_hpcpAverage( hpcpAverage ),
     m_medianAverage( medianAverage ),
     m_ChrPointer(0),
@@ -51,7 +55,6 @@ GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
     m_MeanHPCP(0),
     m_MajCorr(0),
     m_MinCorr(0),
-    m_Keys(0),
     m_MedianFilterBuffer(0),
     m_SortedBuffer(0),
     m_keyStrengths(0)
@@ -61,16 +64,16 @@ GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
     // Chromagram configuration parameters
     m_ChromaConfig.normalise = MathUtilities::NormaliseUnitMax;
     m_ChromaConfig.FS = sampleRate/(double)m_DecimationFactor;
+    if (m_ChromaConfig.FS < 1) {
+        m_ChromaConfig.FS = 1;
+    }
 
     // Set C3 (= MIDI #48) as our base:
     // This implies that key = 1 => Cmaj, key = 12 => Bmaj, key = 13 => Cmin, etc.
-    m_ChromaConfig.min = Pitch::getFrequencyForPitch
-        (48, 0, tuningFrequency);
-    // C7 (= MIDI #96) is the exclusive maximum key:
-    m_ChromaConfig.max = Pitch::getFrequencyForPitch
-        (96, 0, tuningFrequency);
+    m_ChromaConfig.min = Pitch::getFrequencyForPitch( 48, 0, tuningFrequency );
+    m_ChromaConfig.max = Pitch::getFrequencyForPitch( 96, 0, tuningFrequency );
 
-    m_ChromaConfig.BPO = 36;
+    m_ChromaConfig.BPO = kBinsPerOctave;
     m_ChromaConfig.CQThresh = 0.0054;
 
     // Chromagram inst.
@@ -80,7 +83,6 @@ GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
     m_ChromaFrameSize = m_Chroma->getFrameSize();
     // override hopsize for this application
     m_ChromaHopSize = m_ChromaFrameSize;
-    m_BPO = m_ChromaConfig.BPO;
 
 //    std::cerr << "chroma frame size = " << m_ChromaFrameSize << ", decimation factor = " << m_DecimationFactor << " therefore block size = " << getBlockSize() << std::endl;
 
@@ -96,30 +98,38 @@ GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
     // Spawn objectc/arrays
     m_DecimatedBuffer = new double[m_ChromaFrameSize];
     
-    m_ChromaBuffer = new double[m_BPO * m_ChromaBuffersize];
-    memset( m_ChromaBuffer, 0, sizeof(double) * m_BPO * m_ChromaBuffersize);
+    m_ChromaBuffer = new double[kBinsPerOctave * m_ChromaBuffersize];
+    memset( m_ChromaBuffer, 0, sizeof(double) * kBinsPerOctave * m_ChromaBuffersize);
     
-    m_MeanHPCP = new double[m_BPO];
+    m_MeanHPCP = new double[kBinsPerOctave];
     
-    m_MajCorr = new double[m_BPO];
-    m_MinCorr = new double[m_BPO];
-    m_Keys  = new double[2*m_BPO];
+    m_MajCorr = new double[kBinsPerOctave];
+    m_MinCorr = new double[kBinsPerOctave];
     
+    m_MajProfileNorm = new double[kBinsPerOctave];
+    m_MinProfileNorm = new double[kBinsPerOctave];
+
+    double mMaj = MathUtilities::mean( MajProfile, kBinsPerOctave );
+    double mMin = MathUtilities::mean( MinProfile, kBinsPerOctave );
+
+    for( unsigned int i = 0; i < kBinsPerOctave; i++ ) {
+        m_MajProfileNorm[i] = MajProfile[i] - mMaj;
+        m_MinProfileNorm[i] = MinProfile[i] - mMin;
+    }
+
     m_MedianFilterBuffer = new int[ m_MedianWinsize ];
     memset( m_MedianFilterBuffer, 0, sizeof(int)*m_MedianWinsize);
     
     m_SortedBuffer = new int[ m_MedianWinsize ];
-    memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize);	
+    memset( m_SortedBuffer, 0, sizeof(int)*m_MedianWinsize);
     
-    m_Decimator = new Decimator
-        ( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor );
+    m_Decimator = new Decimator( m_ChromaFrameSize*m_DecimationFactor, m_DecimationFactor );
 
     m_keyStrengths = new double[24];
 }
 
 GetKeyMode::~GetKeyMode()
 {
-
     delete m_Chroma;
     delete m_Decimator;
     
@@ -128,40 +138,40 @@ GetKeyMode::~GetKeyMode()
     delete [] m_MeanHPCP;
     delete [] m_MajCorr;
     delete [] m_MinCorr;
-    delete [] m_Keys;
+    delete [] m_MajProfileNorm;
+    delete [] m_MinProfileNorm;
     delete [] m_MedianFilterBuffer;
     delete [] m_SortedBuffer;
-
-    delete[] m_keyStrengths;
+    delete [] m_keyStrengths;
 }
 
-double GetKeyMode::krumCorr(double *pData1, double *pData2, unsigned int length)
+double GetKeyMode::krumCorr( const double *pDataNorm, const double *pProfileNorm, 
+                             int shiftProfile, unsigned int length)
 {
     double retVal= 0.0;
     
     double num = 0;
     double den = 0;
-    double mX = MathUtilities::mean( pData1, length );
-    double mY = MathUtilities::mean( pData2, length );
-    
     double sum1 = 0;
     double sum2 = 0;
     
     for( unsigned int i = 0; i <length; i++ )
     {
-        num += ( pData1[i] - mX ) * ( pData2[i] - mY );
+        int k = (i - shiftProfile + length) % length;
+
+        num += pDataNorm[i] * pProfileNorm[k];
 
-        sum1 += ( (pData1[i]-mX) * (pData1[i]-mX) );
-        sum2 += ( (pData2[i]-mY) * (pData2[i]-mY) );
+        sum1 += ( pDataNorm[i] * pDataNorm[i] );
+        sum2 += ( pProfileNorm[k] * pProfileNorm[k] );
     }
 	
     den = sqrt(sum1 * sum2);
-	
-    if( den>0 )
+
+    if( den>0 ) {
         retVal = num/den;
-    else
+    } else {
         retVal = 0;
-
+    }
 
     return retVal;
 }
@@ -169,106 +179,87 @@ double GetKeyMode::krumCorr(double *pData1, double *pData2, unsigned int length)
 int GetKeyMode::process(double *PCMData)
 {
     int key;
-
     unsigned int j,k;
 
     //////////////////////////////////////////////
     m_Decimator->process( PCMData, m_DecimatedBuffer);
 
-    m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );		
+    m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );
 
-    // The Cromagram has the center of C at bin 0, while the major
-    // and minor profiles have the center of C at 1. We want to have
-    // the correlation for C result also at 1.
-    // To achieve this we have to shift two times:
-    MathUtilities::circShift( m_ChrPointer, m_BPO, 2);
 /*
     std::cout << "raw chroma: ";
-    for (unsigned int ii = 0; ii < m_BPO; ++ii) {
-      if (ii % (m_BPO/12) == 0) std::cout << "\n";
+    for (int ii = 0; ii < kBinsPerOctave; ++ii) {
+      if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
         std::cout << m_ChrPointer[ii] << " ";
     }
     std::cout << std::endl;
 */
     // populate hpcp values;
     int cbidx;
-    for( j = 0; j < m_BPO; j++ )
-    {
-        cbidx = (m_bufferindex * m_BPO) + j;
+    for( j = 0; j < kBinsPerOctave; j++ ) {
+        cbidx = (m_bufferindex * kBinsPerOctave) + j;
         m_ChromaBuffer[ cbidx ] = m_ChrPointer[j];
     }
 
     //keep track of input buffers;
-    if( m_bufferindex++ >= m_ChromaBuffersize - 1) 
+    if( m_bufferindex++ >= m_ChromaBuffersize - 1) {
         m_bufferindex = 0;
+    }
 
     // track filling of chroma matrix
-    if( m_ChromaBufferFilling++ >= m_ChromaBuffersize)
+    if( m_ChromaBufferFilling++ >= m_ChromaBuffersize) {
         m_ChromaBufferFilling = m_ChromaBuffersize;
+    }
 
-    //calculate mean 		
-    for( k = 0; k < m_BPO; k++ )
-    {
+    //calculate mean
+    for( k = 0; k < kBinsPerOctave; k++ ) {
         double mnVal = 0.0;
-        for( j = 0; j < m_ChromaBufferFilling; j++ )
-        {
-            mnVal += m_ChromaBuffer[ k + (j*m_BPO) ];
+        for( j = 0; j < m_ChromaBufferFilling; j++ ) {
+            mnVal += m_ChromaBuffer[ k + (j*kBinsPerOctave) ];
         }
 
         m_MeanHPCP[k] = mnVal/(double)m_ChromaBufferFilling;
     }
 
-
-    for( k = 0; k < m_BPO; k++ )
+    // Normalize for zero average
+    double mHPCP = MathUtilities::mean( m_MeanHPCP, kBinsPerOctave );
+    for( k = 0; k < kBinsPerOctave; k++ )
     {
-        m_MajCorr[k] = krumCorr( m_MeanHPCP, MajProfile, m_BPO );
-        m_MinCorr[k] = krumCorr( m_MeanHPCP, MinProfile, m_BPO );
-
-        MathUtilities::circShift( MajProfile, m_BPO, 1 );
-        MathUtilities::circShift( MinProfile, m_BPO, 1 );
-    }
-	
-    for( k = 0; k < m_BPO; k++ )
-    {
-        m_Keys[k] = m_MajCorr[k];
-        m_Keys[k+m_BPO] = m_MinCorr[k];
+        m_MeanHPCP[k] -= mHPCP;
     }
 
-    for (k = 0; k < 24; ++k) {
-        m_keyStrengths[k] = 0;
-    }
 
-    for( k = 0; k < m_BPO*2; k++ )
+    for( k = 0; k < kBinsPerOctave; k++ )
     {
-        int idx = k / (m_BPO/12);
-        int rem = k % (m_BPO/12);
-        if (rem == 0 || m_Keys[k] > m_keyStrengths[idx]) {
-            m_keyStrengths[idx] = m_Keys[k];
-        }
-
-//        m_keyStrengths[k/(m_BPO/12)] += m_Keys[k];
+        // The Cromagram has the center of C at bin 0, while the major
+        // and minor profiles have the center of C at 1. We want to have
+        // the correlation for C result also at 1.
+        // To achieve this we have to shift two times:
+        m_MajCorr[k] = krumCorr( m_MeanHPCP, m_MajProfileNorm, (int)k - 2, kBinsPerOctave );
+        m_MinCorr[k] = krumCorr( m_MeanHPCP, m_MinProfileNorm, (int)k - 2, kBinsPerOctave );
     }
 
 /*
   std::cout << "raw keys: ";
-  for (int ii = 0; ii < 2*m_BPO; ++ii) {
-      if (ii % (m_BPO/12) == 0) std::cout << "\n";
-      std::cout << m_Keys[ii] << " ";
+  for (int ii = 0; ii < kBinsPerOctave; ++ii) {
+      if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
+      std::cout << m_MajCorr[ii] << " ";
   }
-  std::cout << std::endl;
-
-  std::cout << "key strengths: ";
-  for (int ii = 0; ii < 24; ++ii) {
-      if (ii % 6 == 0) std::cout << "\n";
-      std::cout << m_keyStrengths[ii] << " ";
+  for (int ii = 0; ii < kBinsPerOctave; ++ii) {
+      if (ii % (kBinsPerOctave/12) == 0) std::cout << "\n";
+      std::cout << m_MinCorr[ii] << " ";
   }
   std::cout << std::endl;
 */
-    double dummy;
-    // m_Keys[1] is C center  1 / 3 + 1 = 1
-    // m_Keys[4] is D center  4 / 3 + 1 = 2
+
+    // m_MajCorr[1] is C center  1 / 3 + 1 = 1
+    // m_MajCorr[4] is D center  4 / 3 + 1 = 2
     // '+ 1' because we number keys 1-24, not 0-23.
-    int maxBin = MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy );
+    double maxMaj;
+    int maxMajBin = MathUtilities::getMax( m_MajCorr, kBinsPerOctave, &maxMaj );
+    double maxMin;
+    int maxMinBin = MathUtilities::getMax( m_MinCorr, kBinsPerOctave, &maxMin );
+    int maxBin = (maxMaj > maxMin) ? maxMajBin : (maxMinBin + kBinsPerOctave);
     key = maxBin / 3 + 1;
 
 //    std::cout << "fractional key pre-sorting: " << (maxBin + 2) / 3.0 << std::endl;
@@ -278,12 +269,12 @@ int GetKeyMode::process(double *PCMData)
     //Median filtering
 
     // track Median buffer initial filling
-    if( m_MedianBufferFilling++ >= m_MedianWinsize)
+    if( m_MedianBufferFilling++ >= m_MedianWinsize) {
         m_MedianBufferFilling = m_MedianWinsize;
-		
+    }
+
     //shift median buffer
-    for( k = 1; k < m_MedianWinsize; k++ )
-    {
+    for( k = 1; k < m_MedianWinsize; k++ ) {
         m_MedianFilterBuffer[ k - 1 ] = m_MedianFilterBuffer[ k ];
     }
 
@@ -293,8 +284,7 @@ int GetKeyMode::process(double *PCMData)
 
     //Copy median into sorting buffer, reversed
     unsigned int ijx = 0;
-    for( k = 0; k < m_MedianWinsize; k++ )
-    {
+    for( k = 0; k < m_MedianWinsize; k++ ) {
         m_SortedBuffer[k] = m_MedianFilterBuffer[m_MedianWinsize-1-ijx];
         ijx++;
     }
@@ -313,8 +303,9 @@ int GetKeyMode::process(double *PCMData)
 
 //  std::cout << "midpoint = " << midpoint << endl;
 
-    if( midpoint <= 0 )
+    if( midpoint <= 0 ) {
         midpoint = 1;
+    }
 
     key = m_SortedBuffer[midpoint-1];
 
@@ -328,3 +319,45 @@ bool GetKeyMode::isModeMinor( int key )
 { 
     return (key > 12);
 }
+
+unsigned int getChromaSize() 
+{ 
+    return kBinsPerOctave; 
+}
+
+double* GetKeyMode::getKeyStrengths() {
+    unsigned int k;
+
+    for (k = 0; k < 24; ++k) {
+        m_keyStrengths[k] = 0;
+    }
+
+    for( k = 0; k < kBinsPerOctave; k++ )
+    {
+        int idx = k / (kBinsPerOctave/12);
+        int rem = k % (kBinsPerOctave/12);
+        if (rem == 0 || m_MajCorr[k] > m_keyStrengths[idx]) {
+            m_keyStrengths[idx] = m_MajCorr[k];
+        }
+    }
+
+    for( k = 0; k < kBinsPerOctave; k++ )
+    {
+        int idx = (k + kBinsPerOctave) / (kBinsPerOctave/12);
+        int rem = k % (kBinsPerOctave/12);
+        if (rem == 0 || m_MinCorr[k] > m_keyStrengths[idx]) {
+            m_keyStrengths[idx] = m_MinCorr[k];
+        }
+    }
+
+/*
+    std::cout << "key strengths: ";
+    for (int ii = 0; ii < 24; ++ii) {
+        if (ii % 6 == 0) std::cout << "\n";
+        std::cout << m_keyStrengths[ii] << " ";
+    }
+    std::cout << std::endl;
+*/
+
+    return m_keyStrengths;
+}
diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.h b/lib/qm-dsp/dsp/keydetection/GetKeyMode.h
index bc192087afa..9384899abe6 100644
--- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.h
+++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.h
@@ -27,17 +27,18 @@ class GetKeyMode
 
 	int process( double* PCMData );
 
-	double krumCorr( double* pData1, double* pData2, unsigned int length );
+	double krumCorr( const double *pDataNorm, const double *pProfileNorm, 
+                         int shiftProfile, unsigned int length );
 
 	unsigned int getBlockSize() { return m_ChromaFrameSize*m_DecimationFactor; }
 	unsigned int getHopSize() { return m_ChromaHopSize*m_DecimationFactor; }
 
 	double* getChroma() { return m_ChrPointer; }
-	unsigned int getChromaSize() { return m_BPO; }
+	unsigned int getChromaSize();
 
 	double* getMeanHPCP() { return m_MeanHPCP; }
 
-	double *getKeyStrengths() { return m_keyStrengths; }
+	double* getKeyStrengths();
 
 	bool isModeMinor( int key ); 
 
@@ -63,8 +64,6 @@ class GetKeyMode
 	unsigned int m_ChromaFrameSize;
 	//Hop
 	unsigned int m_ChromaHopSize;
-	//Bins per octave
-	unsigned int m_BPO;
 
 
 	unsigned int m_ChromaBuffersize;
@@ -79,9 +78,10 @@ class GetKeyMode
 	double* m_ChromaBuffer;
 	double* m_MeanHPCP;
 
+	double* m_MajProfileNorm;
+	double* m_MinProfileNorm;
 	double* m_MajCorr;
 	double* m_MinCorr;
-	double* m_Keys;
 	int* m_MedianFilterBuffer;
 	int* m_SortedBuffer;
 
diff --git a/lib/qm-dsp/dsp/rateconversion/DecimatorB.cpp b/lib/qm-dsp/dsp/rateconversion/DecimatorB.cpp
index 55df1e9fc0a..5f27130c49f 100644
--- a/lib/qm-dsp/dsp/rateconversion/DecimatorB.cpp
+++ b/lib/qm-dsp/dsp/rateconversion/DecimatorB.cpp
@@ -112,7 +112,6 @@ void DecimatorB::doProcess()
 {
     int filteridx = 0;
     int factorDone = 1;
-    int factorRemaining = m_decFactor;
 
     while (factorDone < m_decFactor) {
 
diff --git a/lib/qm-dsp/dsp/rateconversion/Resampler.cpp b/lib/qm-dsp/dsp/rateconversion/Resampler.cpp
index f0598cab2ca..eb88b0f0473 100644
--- a/lib/qm-dsp/dsp/rateconversion/Resampler.cpp
+++ b/lib/qm-dsp/dsp/rateconversion/Resampler.cpp
@@ -17,12 +17,12 @@
 #include "maths/MathUtilities.h"
 #include "base/KaiserWindow.h"
 #include "base/SincWindow.h"
-#include "thread/Thread.h"
 
 #include <iostream>
 #include <vector>
 #include <map>
 #include <cassert>
+#include <algorithm>
 
 using std::vector;
 using std::map;
@@ -36,6 +36,9 @@ Resampler::Resampler(int sourceRate, int targetRate) :
     m_sourceRate(sourceRate),
     m_targetRate(targetRate)
 {
+#ifdef DEBUG_RESAMPLER
+    cerr << "Resampler::Resampler(" <<  sourceRate << "," << targetRate << ")" << endl;
+#endif
     initialise(100, 0.02);
 }
 
@@ -52,13 +55,6 @@ Resampler::~Resampler()
     delete[] m_phaseData;
 }
 
-// peakToPole -> length -> beta -> window
-static map<double, map<int, map<double, vector<double> > > >
-knownFilters;
-
-static Mutex
-knownFilterMutex;
-
 void
 Resampler::initialise(double snr, double bandwidth)
 {
@@ -85,25 +81,15 @@ Resampler::initialise(double snr, double bandwidth)
     m_filterLength = params.length;
 
     vector<double> filter;
-    knownFilterMutex.lock();
-
-    if (knownFilters[m_peakToPole][m_filterLength].find(params.beta) ==
-	knownFilters[m_peakToPole][m_filterLength].end()) {
-
-	KaiserWindow kw(params);
-	SincWindow sw(m_filterLength, m_peakToPole * 2);
-
-	filter = vector<double>(m_filterLength, 0.0);
-	for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0;
-	sw.cut(filter.data());
-	kw.cut(filter.data());
-
-	knownFilters[m_peakToPole][m_filterLength][params.beta] = filter;
-    }
 
-    filter = knownFilters[m_peakToPole][m_filterLength][params.beta];
-    knownFilterMutex.unlock();
+    KaiserWindow kw(params);
+    SincWindow sw(m_filterLength, m_peakToPole * 2);
 
+    filter = vector<double>(m_filterLength, 0.0);
+    for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0;
+    sw.cut(filter.data());
+    kw.cut(filter.data());
+    
     int inputSpacing = m_targetRate / m_gcd;
     int outputSpacing = m_sourceRate / m_gcd;
 
@@ -293,10 +279,22 @@ Resampler::reconstructOne()
     double v = 0.0;
     int n = pd.filter.size();
 
-    assert(n + m_bufferOrigin <= (int)m_buffer.size());
+    if (n + m_bufferOrigin > (int)m_buffer.size()) {
+        cerr << "ERROR: n + m_bufferOrigin > m_buffer.size() [" << n << " + "
+             << m_bufferOrigin << " > " << m_buffer.size() << "]" << endl;
+        throw std::logic_error("n + m_bufferOrigin > m_buffer.size()");
+    }
+
+#if defined(__MSVC__)
+#define R__ __restrict
+#elif defined(__GNUC__)
+#define R__ __restrict__
+#else
+#define R__
+#endif
 
-    const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin;
-    const double *const __restrict__ filt = pd.filter.data();
+    const double *const R__ buf(m_buffer.data() + m_bufferOrigin);
+    const double *const R__ filt(pd.filter.data());
 
     for (int i = 0; i < n; ++i) {
 	// NB gcc can only vectorize this with -ffast-math
@@ -311,9 +309,7 @@ Resampler::reconstructOne()
 int
 Resampler::process(const double *src, double *dst, int n)
 {
-    for (int i = 0; i < n; ++i) {
-	m_buffer.push_back(src[i]);
-    }
+    m_buffer.insert(m_buffer.end(), src, src + n);
 
     int maxout = int(ceil(double(n) * m_targetRate / m_sourceRate));
     int outidx = 0;
@@ -330,6 +326,12 @@ Resampler::process(const double *src, double *dst, int n)
 	outidx++;
     }
 
+    if (m_bufferOrigin > (int)m_buffer.size()) {
+        cerr << "ERROR: m_bufferOrigin > m_buffer.size() [" 
+             << m_bufferOrigin << " > " << m_buffer.size() << "]" << endl;
+        throw std::logic_error("m_bufferOrigin > m_buffer.size()");
+    }
+
     m_buffer = vector<double>(m_buffer.begin() + m_bufferOrigin, m_buffer.end());
     m_bufferOrigin = 0;
     
diff --git a/lib/qm-dsp/dsp/rateconversion/Resampler.h b/lib/qm-dsp/dsp/rateconversion/Resampler.h
index 92c0169ba0a..c9cee89d841 100644
--- a/lib/qm-dsp/dsp/rateconversion/Resampler.h
+++ b/lib/qm-dsp/dsp/rateconversion/Resampler.h
@@ -79,7 +79,6 @@ class Resampler
     int m_targetRate;
     int m_gcd;
     int m_filterLength;
-    int m_bufferLength;
     int m_latency;
     double m_peakToPole;
     
diff --git a/lib/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp b/lib/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
index 22835f71162..2011c744716 100644
--- a/lib/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
+++ b/lib/qm-dsp/dsp/segmentation/ClusterMeltSegmenter.cpp
@@ -329,23 +329,27 @@ void ClusterMeltSegmenter::segment()
     delete decimator;
     decimator = 0;
 
-    if (features.size() < histogramLength) return;
+    int sz = features.size();
+    
+    if (sz < histogramLength) return;
 /*    
     std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
               << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
 */
     // copy the features to a native array and use the existing C segmenter...
     double** arrFeatures = new double*[features.size()];	
-    for (int i = 0; i < features.size(); i++)
+    for (int i = 0; i < sz; i++)
     {
         if (featureType == FEATURE_TYPE_UNKNOWN) {
             arrFeatures[i] = new double[features[0].size()];
-            for (int j = 0; j < features[0].size(); j++)
-                arrFeatures[i][j] = features[i][j];	
+            for (int j = 0; j < int(features[0].size()); j++) {
+                arrFeatures[i][j] = features[i][j];
+            }
         } else {
             arrFeatures[i] = new double[ncoeff+1];	// allow space for the normalised envelope
-            for (int j = 0; j < ncoeff; j++)
-                arrFeatures[i][j] = features[i][j];	
+            for (int j = 0; j < ncoeff; j++) {
+                arrFeatures[i][j] = features[i][j];
+            }
         }
     }
 	
@@ -364,8 +368,7 @@ void ClusterMeltSegmenter::segment()
 	
     // de-allocate arrays
     delete [] q;
-    for (int i = 0; i < features.size(); i++)
-        delete [] arrFeatures[i];
+    for (int i = 0; i < int(features.size()); i++) delete [] arrFeatures[i];
     delete [] arrFeatures;
 	
     // clear the features
diff --git a/lib/qm-dsp/dsp/segmentation/Segmenter.cpp b/lib/qm-dsp/dsp/segmentation/Segmenter.cpp
index 538eaacc78d..00c4d2070ed 100644
--- a/lib/qm-dsp/dsp/segmentation/Segmenter.cpp
+++ b/lib/qm-dsp/dsp/segmentation/Segmenter.cpp
@@ -18,14 +18,13 @@
 
 ostream& operator<<(ostream& os, const Segmentation& s)
 {
-	os << "structure_name : begin_time end_time\n";
+    os << "structure_name : begin_time end_time\n";
 	
-	for (int i = 0; i < s.segments.size(); i++)
-	{
-		Segment seg = s.segments[i];
-		os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate) 
-			<< '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
-	}
-	
-	return os;
+    for (int i = 0; i < int(s.segments.size()); i++) {
+	Segment seg = s.segments[i];
+	os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate) 
+	   << '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
+    }
+    
+    return os;
 }
diff --git a/lib/qm-dsp/dsp/segmentation/cluster_melt.c b/lib/qm-dsp/dsp/segmentation/cluster_melt.c
index 1441b394c2c..5241a160965 100644
--- a/lib/qm-dsp/dsp/segmentation/cluster_melt.c
+++ b/lib/qm-dsp/dsp/segmentation/cluster_melt.c
@@ -43,7 +43,7 @@ double kldist(double* a, double* b, int n) {
 
 void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
 	double lambda, sum, beta, logsumexp, maxlp;
-	int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1;
+	int i, j, a, b, b0, b1, limit, /* B, */ it, maxiter, maxiter0, maxiter1;
 	double** cl;	/* reference histograms for each cluster */
 	int** nc;	/* neighbour counts for each histogram */
 	double** lp;	/* soft assignment probs for each histogram */
@@ -57,7 +57,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
 		limit = l;
 	else
 		limit = DEFAULT_LIMIT;		/* use default if no valid neighbourhood limit supplied */
-	B = 2 * limit + 1;
+//	B = 2 * limit + 1;
 	maxiter0 = 20;	/* number of iterations at initial temperature */
 	maxiter1 = 5;	/* number of iterations at subsequent temperatures */
 	
diff --git a/lib/qm-dsp/dsp/segmentation/cluster_segmenter.c b/lib/qm-dsp/dsp/segmentation/cluster_segmenter.c
index 796c787aa3b..2a6b196921f 100644
--- a/lib/qm-dsp/dsp/segmentation/cluster_segmenter.c
+++ b/lib/qm-dsp/dsp/segmentation/cluster_segmenter.c
@@ -227,7 +227,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
 		int ncomponents = 20;
 		pca_project(features, frames_read, ncoeff, ncomponents);
 		
-		/* copy the envelope so that it immediately follows the chosen components */
+		/* copy the envelope so that it immediatly follows the chosen components */
 		for (i = 0; i < frames_read; i++)
 			features[i][ncomponents] = features[i][ncoeff];	
 		
diff --git a/lib/qm-dsp/dsp/signalconditioning/DFProcess.cpp b/lib/qm-dsp/dsp/signalconditioning/DFProcess.cpp
index 4ca215934dc..d756b44d900 100644
--- a/lib/qm-dsp/dsp/signalconditioning/DFProcess.cpp
+++ b/lib/qm-dsp/dsp/signalconditioning/DFProcess.cpp
@@ -27,6 +27,7 @@
 #include <cstring>
 #include <algorithm>
 
+
 //////////////////////////////////////////////////////////////////////
 // Construction/Destruction
 //////////////////////////////////////////////////////////////////////
@@ -60,13 +61,11 @@ void DFProcess::initialise( DFProcConfig Config )
     filtSrc = new double[ m_length ];
     filtDst = new double[ m_length ];
 
-	
-    //Low Pass Smoothing Filter Config
-    m_FilterConfigParams.ord = Config.LPOrd;
-    m_FilterConfigParams.ACoeffs = Config.LPACoeffs;
-    m_FilterConfigParams.BCoeffs = Config.LPBCoeffs;
-	
-    m_FiltFilt = new FiltFilt( m_FilterConfigParams );
+    Filter::Parameters params;
+    params.a = std::vector<double>(Config.LPACoeffs, Config.LPACoeffs + Config.LPOrd + 1);
+    params.b = std::vector<double>(Config.LPBCoeffs, Config.LPBCoeffs + Config.LPOrd + 1);
+    
+    m_FiltFilt = new FiltFilt(params);
 	
     //add delta threshold
     m_delta = Config.delta;
@@ -194,7 +193,7 @@ void DFProcess::removeDCNormalize( double *src, double*dst )
 
     MathUtilities::getAlphaNorm( src, m_length, m_alphaNormParam, &DFAlphaNorm );
 
-    for( unsigned int i = 0; i< m_length; i++)
+    for (int i = 0; i < m_length; i++)
     {
 	dst[ i ] = ( src[ i ] - DFMin ) / DFAlphaNorm; 
     }
diff --git a/lib/qm-dsp/dsp/signalconditioning/DFProcess.h b/lib/qm-dsp/dsp/signalconditioning/DFProcess.h
index b93b5355184..b256eca16e1 100644
--- a/lib/qm-dsp/dsp/signalconditioning/DFProcess.h
+++ b/lib/qm-dsp/dsp/signalconditioning/DFProcess.h
@@ -81,8 +81,6 @@ class DFProcess
     double* m_filtScratchIn;
     double* m_filtScratchOut;
 
-    FilterConfig m_FilterConfigParams;
-
     FiltFilt* m_FiltFilt;
 
     bool m_isMedianPositive;
diff --git a/lib/qm-dsp/dsp/signalconditioning/FiltFilt.cpp b/lib/qm-dsp/dsp/signalconditioning/FiltFilt.cpp
index 89076c150f2..714d5755d39 100644
--- a/lib/qm-dsp/dsp/signalconditioning/FiltFilt.cpp
+++ b/lib/qm-dsp/dsp/signalconditioning/FiltFilt.cpp
@@ -19,36 +19,16 @@
 // Construction/Destruction
 //////////////////////////////////////////////////////////////////////
 
-FiltFilt::FiltFilt( FilterConfig Config )
+FiltFilt::FiltFilt(Filter::Parameters parameters) :
+    m_filter(parameters)
 {
-    m_filtScratchIn = NULL;
-    m_filtScratchOut = NULL;
-    m_ord = 0;
-	
-    initialise( Config );
+    m_ord = m_filter.getOrder();
 }
 
 FiltFilt::~FiltFilt()
 {
-    deInitialise();
-}
-
-void FiltFilt::initialise( FilterConfig Config )
-{
-    m_ord = Config.ord;
-    m_filterConfig.ord = Config.ord;
-    m_filterConfig.ACoeffs = Config.ACoeffs;
-    m_filterConfig.BCoeffs = Config.BCoeffs;
-	
-    m_filter = new Filter( m_filterConfig );
-}
-
-void FiltFilt::deInitialise()
-{
-    delete m_filter;
 }
 
-
 void FiltFilt::process(double *src, double *dst, unsigned int length)
 {	
     unsigned int i;
@@ -59,14 +39,13 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
     unsigned int nFact = 3 * ( nFilt - 1);
     unsigned int nExt	= length + 2 * nFact;
 
-    m_filtScratchIn = new double[ nExt ];
-    m_filtScratchOut = new double[ nExt ];
-
+    double *filtScratchIn = new double[ nExt ];
+    double *filtScratchOut = new double[ nExt ];
 	
     for( i = 0; i< nExt; i++ ) 
     {
-	m_filtScratchIn[ i ] = 0.0;
-	m_filtScratchOut[ i ] = 0.0;
+	filtScratchIn[ i ] = 0.0;
+	filtScratchOut[ i ] = 0.0;
     }
 
     // Edge transients reflection
@@ -76,51 +55,51 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
     unsigned int index = 0;
     for( i = nFact; i > 0; i-- )
     {
-	m_filtScratchIn[ index++ ] = sample0 - src[ i ];
+	filtScratchIn[ index++ ] = sample0 - src[ i ];
     }
     index = 0;
     for( i = 0; i < nFact; i++ )
     {
-	m_filtScratchIn[ (nExt - nFact) + index++ ] = sampleN - src[ (length - 2) - i ];
+	filtScratchIn[ (nExt - nFact) + index++ ] = sampleN - src[ (length - 2) - i ];
     }
 
     index = 0;
     for( i = 0; i < length; i++ )
     {
-	m_filtScratchIn[ i + nFact ] = src[ i ];
+	filtScratchIn[ i + nFact ] = src[ i ];
     }
 	
     ////////////////////////////////
     // Do  0Ph filtering
-    m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
+    m_filter.process( filtScratchIn, filtScratchOut, nExt);
 	
     // reverse the series for FILTFILT 
     for ( i = 0; i < nExt; i++)
     { 
-	m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1];
+	filtScratchIn[ i ] = filtScratchOut[ nExt - i - 1];
     }
 
     // do FILTER again 
-    m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
+    m_filter.process( filtScratchIn, filtScratchOut, nExt);
 	
     // reverse the series back 
     for ( i = 0; i < nExt; i++)
     {
-	m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1 ];
+	filtScratchIn[ i ] = filtScratchOut[ nExt - i - 1 ];
     }
     for ( i = 0;i < nExt; i++)
     {
-	m_filtScratchOut[ i ] = m_filtScratchIn[ i ];
+	filtScratchOut[ i ] = filtScratchIn[ i ];
     }
 
     index = 0;
     for( i = 0; i < length; i++ )
     {
-	dst[ index++ ] = m_filtScratchOut[ i + nFact ];
+	dst[ index++ ] = filtScratchOut[ i + nFact ];
     }	
 
-    delete [] m_filtScratchIn;
-    delete [] m_filtScratchOut;
+    delete [] filtScratchIn;
+    delete [] filtScratchOut;
 
 }
 
diff --git a/lib/qm-dsp/dsp/signalconditioning/FiltFilt.h b/lib/qm-dsp/dsp/signalconditioning/FiltFilt.h
index e5a38124cf7..9fc43950685 100644
--- a/lib/qm-dsp/dsp/signalconditioning/FiltFilt.h
+++ b/lib/qm-dsp/dsp/signalconditioning/FiltFilt.h
@@ -20,30 +20,21 @@
 
 /**
  * Zero-phase digital filter, implemented by processing the data
- * through a filter specified by the given FilterConfig structure (see
+ * through a filter specified by the given filter parameters (see
  * Filter) and then processing it again in reverse.
  */
 class FiltFilt  
 {
 public:
-    FiltFilt( FilterConfig Config );
+    FiltFilt(Filter::Parameters);
     virtual ~FiltFilt();
 
     void reset();
     void process( double* src, double* dst, unsigned int length );
 
 private:
-    void initialise( FilterConfig Config );
-    void deInitialise();
-
-    unsigned int m_ord;
-
-    Filter* m_filter;
-
-    double* m_filtScratchIn;
-    double* m_filtScratchOut;
-
-    FilterConfig m_filterConfig;
+    Filter m_filter;
+    int m_ord;
 };
 
 #endif
diff --git a/lib/qm-dsp/dsp/signalconditioning/Filter.cpp b/lib/qm-dsp/dsp/signalconditioning/Filter.cpp
index fcc12e590ae..c82d22b77b6 100644
--- a/lib/qm-dsp/dsp/signalconditioning/Filter.cpp
+++ b/lib/qm-dsp/dsp/signalconditioning/Filter.cpp
@@ -4,7 +4,6 @@
     QM DSP Library
 
     Centre for Digital Music, Queen Mary, University of London.
-    This file 2005-2006 Christian Landone.
 
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -15,73 +14,112 @@
 
 #include "Filter.h"
 
-//////////////////////////////////////////////////////////////////////
-// Construction/Destruction
-//////////////////////////////////////////////////////////////////////
+#include <stdexcept>
 
-Filter::Filter( FilterConfig Config )
-{
-    m_ord = 0;
-    m_outBuffer = NULL;
-    m_inBuffer = NULL;
+using namespace std;
 
-    initialise( Config );
+Filter::Filter(Parameters params)
+{
+    if (params.a.empty()) {
+        m_fir = true;
+        if (params.b.empty()) {
+            throw logic_error("Filter must have at least one pair of coefficients");
+        }
+    } else {
+        m_fir = false;
+        if (params.a.size() != params.b.size()) {
+            throw logic_error("Inconsistent numbers of filter coefficients");
+        }
+    }
+    
+    m_sz = int(params.b.size());
+    m_order = m_sz - 1;
+
+    m_a = params.a;
+    m_b = params.b;
+    
+    // We keep some empty space at the start of the buffer, and
+    // encroach gradually into it as we add individual sample
+    // calculations at the start. Then when we run out of space, we
+    // move the buffer back to the end and begin again. This is
+    // significantly faster than moving the whole buffer along in
+    // 1-sample steps every time.
+
+    m_offmax = 20;
+    m_offa = m_offmax;
+    m_offb = m_offmax;
+
+    if (!m_fir) {
+        m_bufa.resize(m_order + m_offmax);
+    }
+
+    m_bufb.resize(m_sz + m_offmax);
 }
 
 Filter::~Filter()
 {
-    deInitialise();
 }
 
-void Filter::initialise( FilterConfig Config )
+void
+Filter::reset()
 {
-    m_ord = Config.ord;
-    m_ACoeffs = Config.ACoeffs;
-    m_BCoeffs = Config.BCoeffs;
+    m_offb = m_offmax;
+    m_offa = m_offmax;
 
-    m_inBuffer = new double[ m_ord + 1 ];
-    m_outBuffer = new double[ m_ord + 1 ];
-
-    reset();
-}
+    if (!m_fir) {
+        m_bufa.assign(m_bufa.size(), 0.0);
+    }
 
-void Filter::deInitialise()
-{
-    delete[] m_inBuffer;
-    delete[] m_outBuffer;
+    m_bufb.assign(m_bufb.size(), 0.0);
 }
 
-void Filter::reset()
+void
+Filter::process(const double *const QM_R__ in,
+		double *const QM_R__ out,
+		const int n)
 {
-    for( unsigned int i = 0; i < m_ord+1; i++ ){ m_inBuffer[ i ] = 0.0; }
-    for(unsigned int  i = 0; i < m_ord+1; i++ ){ m_outBuffer[ i ] = 0.0; }
+    for (int s = 0; s < n; ++s) {
+
+        if (m_offb > 0) --m_offb;
+        else {
+            for (int i = m_sz - 2; i >= 0; --i) {
+                m_bufb[i + m_offmax + 1] = m_bufb[i];
+            }
+            m_offb = m_offmax;
+        }
+        m_bufb[m_offb] = in[s];
+
+        double b_sum = 0.0;
+        for (int i = 0; i < m_sz; ++i) {
+            b_sum += m_b[i] * m_bufb[i + m_offb];
+        }
+
+        double outval;
+
+        if (m_fir) {
+
+            outval = b_sum;
+
+        } else {
+
+            double a_sum = 0.0;
+            for (int i = 0; i < m_order; ++i) {
+                a_sum += m_a[i + 1] * m_bufa[i + m_offa];
+            }
+
+            outval = b_sum - a_sum;
+
+            if (m_offa > 0) --m_offa;
+            else {
+                for (int i = m_order - 2; i >= 0; --i) {
+                    m_bufa[i + m_offmax + 1] = m_bufa[i];
+                }
+                m_offa = m_offmax;
+            }
+            m_bufa[m_offa] = outval;
+        }
+        
+	out[s] = outval;
+    }
 }
 
-void Filter::process( double *src, double *dst, unsigned int length )
-{
-    unsigned int SP,i,j;
-
-    double xin,xout;
-
-    for (SP=0;SP<length;SP++)
-    {
-        xin=src[SP];
-        /* move buffer */
-        for ( i = 0; i < m_ord; i++) {m_inBuffer[ m_ord - i ]=m_inBuffer[ m_ord - i - 1 ];}
-        m_inBuffer[0]=xin;
-
-        xout=0.0;
-        for (j=0;j< m_ord + 1; j++)
-	    xout = xout + m_BCoeffs[ j ] * m_inBuffer[ j ];
-        for (j = 0; j < m_ord; j++)
-	    xout= xout - m_ACoeffs[ j + 1 ] * m_outBuffer[ j ];
-
-        dst[ SP ] = xout;
-        for ( i = 0; i < m_ord - 1; i++ ) { m_outBuffer[ m_ord - i - 1 ] = m_outBuffer[ m_ord - i - 2 ];}
-        m_outBuffer[0]=xout;
-
-    } /* end of SP loop */
-}
-
-
-
diff --git a/lib/qm-dsp/dsp/signalconditioning/Filter.h b/lib/qm-dsp/dsp/signalconditioning/Filter.h
index 9f25945fc1a..78984fba777 100644
--- a/lib/qm-dsp/dsp/signalconditioning/Filter.h
+++ b/lib/qm-dsp/dsp/signalconditioning/Filter.h
@@ -4,7 +4,6 @@
     QM DSP Library
 
     Centre for Digital Music, Queen Mary, University of London.
-    This file 2005-2006 Christian Landone.
 
     This program is free software; you can redistribute it and/or
     modify it under the terms of the GNU General Public License as
@@ -16,46 +15,55 @@
 #ifndef FILTER_H
 #define FILTER_H
 
-#ifndef NULL
-#define NULL 0
-#endif
+#include "base/Restrict.h"
 
-/**
- * Filter specification. For a filter of order ord, the ACoeffs and
- * BCoeffs arrays must point to ord+1 values each. ACoeffs provides
- * the denominator and BCoeffs the numerator coefficients of the
- * filter.
- */
-struct FilterConfig{
-    unsigned int ord;
-    double* ACoeffs;
-    double* BCoeffs;
-};
+#include <vector>
 
-/**
- * Digital filter specified through FilterConfig structure.
- */
-class Filter  
+class Filter
 {
 public:
-    Filter( FilterConfig Config );
-    virtual ~Filter();
+    struct Parameters {
+        std::vector<double> a;
+        std::vector<double> b;
+    };
+
+    /**
+     * Construct an IIR filter with numerators b and denominators
+     * a. The filter will have order b.size()-1. To make an FIR
+     * filter, leave the vector a in the param struct empty.
+     * Otherwise, a and b must have the same number of values.
+     */
+    Filter(Parameters params);
+    
+    ~Filter();
 
     void reset();
 
-    void process( double *src, double *dst, unsigned int length );
+    /**
+     * Filter the input sequence \arg in of length \arg n samples, and
+     * write the resulting \arg n samples into \arg out. There must be
+     * enough room in \arg out for \arg n samples to be written.
+     */
+    void process(const double *const QM_R__ in,
+                 double *const QM_R__ out,
+                 const int n);
 
+    int getOrder() const { return m_order; }
+    
 private:
-    void initialise( FilterConfig Config );
-    void deInitialise();
+    int m_order;
+    int m_sz;
+    std::vector<double> m_a;
+    std::vector<double> m_b;
+    std::vector<double> m_bufa;
+    std::vector<double> m_bufb;
+    int m_offa;
+    int m_offb;
+    int m_offmax;
+    bool m_fir;
 
-    unsigned int m_ord;
-
-    double* m_inBuffer;
-    double* m_outBuffer;
-
-    double* m_ACoeffs;
-    double* m_BCoeffs;
+    Filter(const Filter &); // not supplied
+    Filter &operator=(const Filter &); // not supplied
 };
-
+    
 #endif
diff --git a/lib/qm-dsp/dsp/tempotracking/DownBeat.cpp b/lib/qm-dsp/dsp/tempotracking/DownBeat.cpp
index 95256c55d6b..8570da6a17e 100644
--- a/lib/qm-dsp/dsp/tempotracking/DownBeat.cpp
+++ b/lib/qm-dsp/dsp/tempotracking/DownBeat.cpp
@@ -149,7 +149,7 @@ DownBeat::findDownBeats(const float *audio,
                         i_vec_t &downbeats)
 {
     // FIND DOWNBEATS BY PARTITIONING THE INPUT AUDIO FILE INTO BEAT SEGMENTS
-    // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED BY A FACTOR OF 16 (fs ~= 2700Hz)
+    // WHERE THE AUDIO FRAMES ARE DOWNSAMPLED  BY A FACTOR OF 16 (fs ~= 2700Hz)
     // THEN TAKING THE JENSEN-SHANNON DIVERGENCE BETWEEN BEAT SYNCHRONOUS SPECTRAL FRAMES
 
     // IMPLEMENTATION (MOSTLY) FOLLOWS:
diff --git a/lib/qm-dsp/dsp/tempotracking/TempoTrack.cpp b/lib/qm-dsp/dsp/tempotracking/TempoTrack.cpp
index 389403edaa8..7129b379660 100644
--- a/lib/qm-dsp/dsp/tempotracking/TempoTrack.cpp
+++ b/lib/qm-dsp/dsp/tempotracking/TempoTrack.cpp
@@ -70,10 +70,6 @@ void TempoTrack::initialise( TTParams Params )
     m_tempoScratch = new double[ m_lagLength ];
 	m_smoothRCF = new double[ m_lagLength ];
 
-
-    unsigned int winPre = Params.WinT.pre;
-    unsigned int winPost = Params.WinT.post;
-
     m_DFFramer.configure( m_winLength, m_lagLength );
 	
     m_DFPParams.length = m_winLength;
@@ -120,9 +116,9 @@ void TempoTrack::deInitialise()
 
 }
 
-void TempoTrack::createCombFilter(double* Filter, unsigned int winLength, unsigned int TSig, double beatLag)
+void TempoTrack::createCombFilter(double* Filter, int winLength, int /* TSig */, double beatLag)
 {
-    unsigned int i;
+    int i;
 
     if( beatLag == 0 )
     {
@@ -147,15 +143,15 @@ double TempoTrack::tempoMM(double* ACF, double* weight, int tsig)
 
     double period = 0;
     double maxValRCF = 0.0;
-    unsigned int maxIndexRCF = 0;
+    int maxIndexRCF = 0;
 
     double* pdPeaks;
 
-    unsigned int maxIndexTemp;
-    double	maxValTemp;
-    unsigned int count; 
+    int maxIndexTemp;
+    double maxValTemp;
+    int count; 
 	
-    unsigned int numelem,i,j;
+    int numelem,i,j;
     int a, b;
 
     for( i = 0; i < m_lagLength; i++ )
@@ -476,7 +472,7 @@ void TempoTrack::constDetect( double* periodP, int currentIdx, int* flag )
     }
 }
 
-int TempoTrack::findMeter(double *ACF, unsigned int len, double period)
+int TempoTrack::findMeter(double *ACF, int len, double period)
 {
     int i;
     int p = (int)MathUtilities::round( period );
@@ -491,7 +487,7 @@ int TempoTrack::findMeter(double *ACF, unsigned int len, double period)
     double temp4B = 0.0;
 
     double* dbf = new double[ len ]; int t = 0;
-    for( unsigned int u = 0; u < len; u++ ){ dbf[ u ] = 0.0; }
+    for( int u = 0; u < len; u++ ){ dbf[ u ] = 0.0; }
 
     if( (double)len < 6 * p + 2 )
     {
@@ -548,7 +544,7 @@ int TempoTrack::findMeter(double *ACF, unsigned int len, double period)
     return tsig;
 }
 
-void TempoTrack::createPhaseExtractor(double *Filter, unsigned int winLength, double period, unsigned int fsp, unsigned int lastBeat)
+void TempoTrack::createPhaseExtractor(double *Filter, int /* winLength */, double period, int fsp, int lastBeat)
 {	
     int p = (int)MathUtilities::round( period );
     int predictedOffset = 0;
@@ -584,7 +580,7 @@ void TempoTrack::createPhaseExtractor(double *Filter, unsigned int winLength, do
 	double sigma = (double)p/8;
 	double PhaseMin = 0.0;
 	double PhaseMax = 0.0;
-	unsigned int scratchLength = p*2;
+	int scratchLength = p*2;
 	double temp = 0.0;
 
 	for(  int i = 0; i < scratchLength; i++ )
@@ -604,7 +600,7 @@ void TempoTrack::createPhaseExtractor(double *Filter, unsigned int winLength, do
         std::cerr << "predictedOffset = " << predictedOffset << std::endl;
 #endif
 
-	unsigned int index = 0;
+	int index = 0;
 	for (int i = p - ( predictedOffset - 1); i < p + ( p - predictedOffset) + 1; i++)
 	{
 #ifdef DEBUG_TEMPO_TRACK
@@ -624,7 +620,7 @@ void TempoTrack::createPhaseExtractor(double *Filter, unsigned int winLength, do
     delete [] phaseScratch;
 }
 
-int TempoTrack::phaseMM(double *DF, double *weighting, unsigned int winLength, double period)
+int TempoTrack::phaseMM(double *DF, double *weighting, int winLength, double period)
 {
     int alignment = 0;
     int p = (int)MathUtilities::round( period );
@@ -667,7 +663,7 @@ int TempoTrack::phaseMM(double *DF, double *weighting, unsigned int winLength, d
     return alignment;
 }
 
-int TempoTrack::beatPredict(unsigned int FSP0, double alignment, double period, unsigned int step )
+int TempoTrack::beatPredict(int FSP0, double alignment, double period, int step )
 {
     int beat = 0;
 
@@ -712,39 +708,39 @@ vector<int> TempoTrack::process( vector <double> DF,
     causalDF = DF;
 
     //Prepare Causal Extension DFData
-    unsigned int DFCLength = m_dataLength + m_winLength;
+//    int DFCLength = m_dataLength + m_winLength;
 	
-    for( unsigned int j = 0; j < m_winLength; j++ )
+    for( int j = 0; j < m_winLength; j++ )
     {
 	causalDF.push_back( 0 );
     }
 	
 	
     double* RW = new double[ m_lagLength ];
-    for( unsigned int clear = 0; clear < m_lagLength; clear++){ RW[ clear ] = 0.0;}
+    for (int clear = 0; clear < m_lagLength; clear++){ RW[ clear ] = 0.0;}
 
     double* GW = new double[ m_lagLength ];
-    for(unsigned int clear = 0; clear < m_lagLength; clear++){ GW[ clear ] = 0.0;}
+    for (int clear = 0; clear < m_lagLength; clear++){ GW[ clear ] = 0.0;}
 
     double* PW = new double[ m_lagLength ];
-    for(unsigned clear = 0; clear < m_lagLength; clear++){ PW[ clear ] = 0.0;}
+    for(int clear = 0; clear < m_lagLength; clear++){ PW[ clear ] = 0.0;}
 
     m_DFFramer.setSource( &causalDF[0], m_dataLength );
 
-    unsigned int TTFrames = m_DFFramer.getMaxNoFrames();
+    int TTFrames = m_DFFramer.getMaxNoFrames();
 
 #ifdef DEBUG_TEMPO_TRACK
     std::cerr << "TTFrames = " << TTFrames << std::endl;
 #endif
 	
     double* periodP = new double[ TTFrames ];
-    for(unsigned clear = 0; clear < TTFrames; clear++){ periodP[ clear ] = 0.0;}
+    for(int clear = 0; clear < TTFrames; clear++){ periodP[ clear ] = 0.0;}
 	
     double* periodG = new double[ TTFrames ];
-    for(unsigned clear = 0; clear < TTFrames; clear++){ periodG[ clear ] = 0.0;}
+    for(int clear = 0; clear < TTFrames; clear++){ periodG[ clear ] = 0.0;}
 	
     double* alignment = new double[ TTFrames ];
-    for(unsigned clear = 0; clear < TTFrames; clear++){ alignment[ clear ] = 0.0;}
+    for(int clear = 0; clear < TTFrames; clear++){ alignment[ clear ] = 0.0;}
 
     m_beats.clear();
 
@@ -752,7 +748,7 @@ vector<int> TempoTrack::process( vector <double> DF,
 
     int TTLoopIndex = 0;
 
-    for( unsigned int i = 0; i < TTFrames; i++ )
+    for( int i = 0; i < TTFrames; i++ )
     {
 	m_DFFramer.getFrame( m_rawDFFrame );
 
diff --git a/lib/qm-dsp/dsp/tempotracking/TempoTrack.h b/lib/qm-dsp/dsp/tempotracking/TempoTrack.h
index c973eba6ecb..72c2f756efa 100644
--- a/lib/qm-dsp/dsp/tempotracking/TempoTrack.h
+++ b/lib/qm-dsp/dsp/tempotracking/TempoTrack.h
@@ -30,16 +30,16 @@ using std::vector;
 
 struct WinThresh
 {
-    unsigned int pre;
-    unsigned int post;
+    int pre;
+    int post;
 };
 
 struct TTParams
 {
-    unsigned int winLength; //Analysis window length
-    unsigned int lagLength; //Lag & Stride size
-    unsigned int alpha; //alpha-norm parameter
-    unsigned int LPOrd; // low-pass Filter order
+    int winLength; //Analysis window length
+    int lagLength; //Lag & Stride size
+    int alpha; //alpha-norm parameter
+    int LPOrd; // low-pass Filter order
     double* LPACoeffs; //low pass Filter den coefficients
     double* LPBCoeffs; //low pass Filter num coefficients
     WinThresh WinT;//window size in frames for adaptive thresholding [pre post]:
@@ -59,22 +59,22 @@ class TempoTrack
     void initialise( TTParams Params );
     void deInitialise();
 
-    int beatPredict( unsigned int FSP, double alignment, double period, unsigned int step);
-    int phaseMM( double* DF, double* weighting, unsigned int winLength, double period );
-    void createPhaseExtractor( double* Filter, unsigned int winLength,  double period,  unsigned int fsp, unsigned int lastBeat );
-    int findMeter( double* ACF,  unsigned int len, double period );
+    int beatPredict( int FSP, double alignment, double period, int step);
+    int phaseMM( double* DF, double* weighting, int winLength, double period );
+    void createPhaseExtractor( double* Filter, int winLength,  double period,  int fsp, int lastBeat );
+    int findMeter( double* ACF,  int len, double period );
     void constDetect( double* periodP, int currentIdx, int* flag );
     void stepDetect( double* periodP, double* periodG, int currentIdx, int* flag );
-    void createCombFilter( double* Filter, unsigned int winLength, unsigned int TSig, double beatLag );
+    void createCombFilter( double* Filter, int winLength, int TSig, double beatLag );
     double tempoMM( double* ACF, double* weight, int sig );
 	
-    unsigned int m_dataLength;
-    unsigned int m_winLength;
-    unsigned int m_lagLength;
+    int m_dataLength;
+    int m_winLength;
+    int m_lagLength;
 
-    double		 m_rayparam;
-    double		 m_sigma;
-    double		 m_DFWVNnorm;
+    double m_rayparam;
+    double m_sigma;
+    double m_DFWVNnorm;
 
     vector<int>	 m_beats; // Vector of detected beats
 
@@ -92,7 +92,7 @@ class TempoTrack
     double* m_ACoeffs;
     double* m_BCoeffs;
 	
-    // Objects/operators declaration
+    // Objetcs/operators declaration
     Framer m_DFFramer;
     DFProcess* m_DFConditioning;
     Correlation m_correlator;
diff --git a/lib/qm-dsp/dsp/tonal/TCSgram.cpp b/lib/qm-dsp/dsp/tonal/TCSgram.cpp
index 8840872576c..8730dcf1107 100644
--- a/lib/qm-dsp/dsp/tonal/TCSgram.cpp
+++ b/lib/qm-dsp/dsp/tonal/TCSgram.cpp
@@ -36,7 +36,7 @@ void TCSGram::getTCSVector(int iPosition, TCSVector& rTCSVector) const
 {
 	if (iPosition < 0) 
 		rTCSVector = TCSVector();
-	else if (iPosition >= m_VectorList.size())
+	else if (iPosition >= int(m_VectorList.size()))
 		rTCSVector = TCSVector();
 	else
 		rTCSVector = m_VectorList[iPosition].second;
diff --git a/lib/qm-dsp/dsp/tonal/TonalEstimator.h b/lib/qm-dsp/dsp/tonal/TonalEstimator.h
index f555ef9e3b7..b3c9c7b235a 100644
--- a/lib/qm-dsp/dsp/tonal/TonalEstimator.h
+++ b/lib/qm-dsp/dsp/tonal/TonalEstimator.h
@@ -32,7 +32,7 @@ class ChromaVector : public std::valarray<double>
 	
 	void printDebug()
 	{
-		for (int i = 0; i < size(); i++)
+		for (int i = 0; i < int(size()); i++)
 		{
 			std::cout <<  (*this)[i] << ";";
 		}
@@ -68,7 +68,7 @@ class TCSVector : public std::valarray<double>
 
 	void printDebug()
 	{
-		for (int i = 0; i < size(); i++)
+		for (int i = 0; i < int(size()); i++)
 		{
 			std::cout <<  (*this)[i] << ";";
 		}
diff --git a/lib/qm-dsp/dsp/transforms/DCT.cpp b/lib/qm-dsp/dsp/transforms/DCT.cpp
new file mode 100644
index 00000000000..7636af02acd
--- /dev/null
+++ b/lib/qm-dsp/dsp/transforms/DCT.cpp
@@ -0,0 +1,91 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    QM DSP Library
+
+    Centre for Digital Music, Queen Mary, University of London.
+    This file by Chris Cannam.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#include "DCT.h"
+
+#include <cmath>
+
+DCT::DCT(int n) :
+    m_n(n),
+    m_scaled(n, 0.0),
+    m_time2(n*4, 0.0),
+    m_freq2r(n*4, 0.0),
+    m_freq2i(n*4, 0.0),
+    m_fft(n*4)
+{
+    m_scale = m_n * sqrt(2.0 / m_n);
+}
+
+DCT::~DCT()
+{
+}
+
+void
+DCT::forward(const double *in, double *out)
+{
+    for (int i = 0; i < m_n; ++i) {
+	m_time2[i*2 + 1] = in[i];
+	m_time2[m_n*4 - i*2 - 1] = in[i];
+    }
+
+    m_fft.forward(m_time2.data(), m_freq2r.data(), m_freq2i.data());
+
+    for (int i = 0; i < m_n; ++i) {
+	out[i] = m_freq2r[i];
+    }
+}
+
+void
+DCT::forwardUnitary(const double *in, double *out)
+{
+    forward(in, out);
+    for (int i = 0; i < m_n; ++i) {
+	out[i] /= m_scale;
+    }
+    out[0] /= sqrt(2.0);
+}
+
+void
+DCT::inverse(const double *in, double *out)
+{
+    for (int i = 0; i < m_n; ++i) {
+	m_freq2r[i] = in[i];
+    }
+    for (int i = 0; i < m_n; ++i) {
+	m_freq2r[m_n*2 - i] = -in[i];
+    }
+    m_freq2r[m_n] = 0.0;
+
+    for (int i = 0; i <= m_n*2; ++i) {
+	m_freq2i[i] = 0.0;
+    }
+    
+    m_fft.inverse(m_freq2r.data(), m_freq2i.data(), m_time2.data());
+
+    for (int i = 0; i < m_n; ++i) {
+	out[i] = m_time2[i*2 + 1];
+    }
+}
+
+void
+DCT::inverseUnitary(const double *in, double *out)
+{
+    for (int i = 0; i < m_n; ++i) {
+	m_scaled[i] = in[i] * m_scale;
+    }
+    m_scaled[0] *= sqrt(2.0);
+    inverse(m_scaled.data(), out);
+}
+
diff --git a/lib/qm-dsp/dsp/transforms/DCT.h b/lib/qm-dsp/dsp/transforms/DCT.h
new file mode 100644
index 00000000000..2ebc83fe26d
--- /dev/null
+++ b/lib/qm-dsp/dsp/transforms/DCT.h
@@ -0,0 +1,85 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    QM DSP Library
+
+    Centre for Digital Music, Queen Mary, University of London.
+    This file by Chris Cannam.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#ifndef DCT_H
+#define DCT_H
+
+#include "FFT.h"
+
+#include <vector>
+
+class DCT
+{
+public:
+    /**
+     * Construct a DCT object to calculate the Discrete Cosine
+     * Transform given input of size n samples. The transform is
+     * implemented using an FFT of size 4n, for simplicity.
+     */
+    DCT(int n);
+
+    ~DCT();
+
+    /**
+     * Carry out a type-II DCT of size n, where n is the value
+     * provided to the constructor above.
+     *
+     * The in and out pointers must point to (enough space for) n
+     * values.
+     */
+    void forward(const double *in, double *out);
+
+    /**
+     * Carry out a type-II unitary DCT of size n, where n is the value
+     * provided to the constructor above. This is a scaled version of
+     * the type-II DCT corresponding to a transform with an orthogonal
+     * matrix. This is the transform implemented by the dct() function
+     * in MATLAB.
+     *
+     * The in and out pointers must point to (enough space for) n
+     * values.
+     */
+    void forwardUnitary(const double *in, double *out);
+
+    /**
+     * Carry out a type-III (inverse) DCT of size n, where n is the
+     * value provided to the constructor above.
+     *
+     * The in and out pointers must point to (enough space for) n
+     * values.
+     */
+    void inverse(const double *in, double *out);
+
+    /**
+     * Carry out a type-III (inverse) unitary DCT of size n, where n
+     * is the value provided to the constructor above. This is the
+     * inverse of forwardUnitary().
+     *
+     * The in and out pointers must point to (enough space for) n
+     * values.
+     */
+    void inverseUnitary(const double *in, double *out);
+
+private:
+    int m_n;
+    double m_scale;
+    std::vector<double> m_scaled;
+    std::vector<double> m_time2;
+    std::vector<double> m_freq2r;
+    std::vector<double> m_freq2i;
+    FFTReal m_fft;
+};
+
+#endif
diff --git a/lib/qm-dsp/dsp/transforms/FFT.cpp b/lib/qm-dsp/dsp/transforms/FFT.cpp
index f5bbc3d77f2..da476b8a8b9 100644
--- a/lib/qm-dsp/dsp/transforms/FFT.cpp
+++ b/lib/qm-dsp/dsp/transforms/FFT.cpp
@@ -10,8 +10,8 @@
 
 #include "maths/MathUtilities.h"
 
-#include "ext/kissfft/kiss_fft.h"
-#include "ext/kissfft/kiss_fftr.h"
+#include "kiss_fft.h"
+#include "kiss_fftr.h"
 
 #include <cmath>
 
diff --git a/lib/qm-dsp/dsp/transforms/FFT.h b/lib/qm-dsp/dsp/transforms/FFT.h
index c8956ac7f24..73af5616e7e 100644
--- a/lib/qm-dsp/dsp/transforms/FFT.h
+++ b/lib/qm-dsp/dsp/transforms/FFT.h
@@ -4,6 +4,12 @@
     QM DSP Library
 
     Centre for Digital Music, Queen Mary, University of London.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
 */
 
 #ifndef FFT_H
diff --git a/lib/qm-dsp/dsp/wavelet/Wavelet.cpp b/lib/qm-dsp/dsp/wavelet/Wavelet.cpp
index 764c84b24a2..99f4bf9ef5f 100644
--- a/lib/qm-dsp/dsp/wavelet/Wavelet.cpp
+++ b/lib/qm-dsp/dsp/wavelet/Wavelet.cpp
@@ -71,8 +71,8 @@ Wavelet::getWaveletName(Type wavelet)
 
 void
 Wavelet::createDecompositionFilters(Type wavelet,
-                                    std::vector<float> &lpd,
-                                    std::vector<float> &hpd)
+                                    std::vector<double> &lpd,
+                                    std::vector<double> &hpd)
 {
     lpd.clear();
     hpd.clear();
@@ -1843,7 +1843,10 @@ Wavelet::createDecompositionFilters(Type wavelet,
         break;
     }
 
-    assert(flength == lpd.size());
-    assert(flength == hpd.size());
+    // avoid compiler warning for unused value if assert is not compiled in:
+    (void)flength;
+
+    assert(flength == int(lpd.size()));
+    assert(flength == int(hpd.size()));
 }
 
diff --git a/lib/qm-dsp/dsp/wavelet/Wavelet.h b/lib/qm-dsp/dsp/wavelet/Wavelet.h
index c07549584a9..131c3e79f49 100644
--- a/lib/qm-dsp/dsp/wavelet/Wavelet.h
+++ b/lib/qm-dsp/dsp/wavelet/Wavelet.h
@@ -73,8 +73,8 @@ class Wavelet
     static std::string getWaveletName(Type);
 
     static void createDecompositionFilters(Type,
-                                           std::vector<float> &lpd,
-                                           std::vector<float> &hpd);
+                                           std::vector<double> &lpd,
+                                           std::vector<double> &hpd);
 };
 
 #endif
diff --git a/lib/qm-dsp/ext/kissfft/CHANGELOG b/lib/qm-dsp/ext/kissfft/CHANGELOG
index 34feea539b6..2dd3603755c 100644
--- a/lib/qm-dsp/ext/kissfft/CHANGELOG
+++ b/lib/qm-dsp/ext/kissfft/CHANGELOG
@@ -58,7 +58,7 @@
     Replaced fixed point division with multiply&shift.  Thanks to Jean-Marc Valin for 
     discussions regarding.  Considerable speedup for fixed-point.
 
-    Corrected overflow protection in real fft routines when using fixed point.
+    Corrected overflow protection in real fft routines  when using fixed point.
     Finder's Credit goes to Robert Oschler of robodance for pointing me at the bug.
     This also led to the CHECK_OVERFLOW_OP macro.
 
@@ -78,7 +78,7 @@
 
 1.2 (Feb 23, 2004)
     interface change -- cfg object is forward declaration of struct instead of void*
-    This maintains type safety and lets the compiler warn/error about stupid mistakes.
+    This maintains type saftey and lets the compiler warn/error about stupid mistakes.
             (prompted by suggestion from Erik de Castro Lopo)
 
     small speed improvements
diff --git a/lib/qm-dsp/ext/kissfft/README b/lib/qm-dsp/ext/kissfft/README
new file mode 100644
index 00000000000..03b2e7a9c14
--- /dev/null
+++ b/lib/qm-dsp/ext/kissfft/README
@@ -0,0 +1,134 @@
+KISS FFT - A mixed-radix Fast Fourier Transform based up on the principle, 
+"Keep It Simple, Stupid."
+
+    There are many great fft libraries already around.  Kiss FFT is not trying
+to be better than any of them.  It only attempts to be a reasonably efficient, 
+moderately useful FFT that can use fixed or floating data types and can be 
+incorporated into someone's C program in a few minutes with trivial licensing.
+
+USAGE:
+
+    The basic usage for 1-d complex FFT is:
+
+        #include "kiss_fft.h"
+
+        kiss_fft_cfg cfg = kiss_fft_alloc( nfft ,is_inverse_fft ,0,0 );
+
+        while ...
+        
+            ... // put kth sample in cx_in[k].r and cx_in[k].i
+            
+            kiss_fft( cfg , cx_in , cx_out );
+            
+            ... // transformed. DC is in cx_out[0].r and cx_out[0].i 
+            
+        free(cfg);
+
+    Note: frequency-domain data is stored from dc up to 2pi.
+    so cx_out[0] is the dc bin of the FFT
+    and cx_out[nfft/2] is the Nyquist bin (if exists)
+
+    Declarations are in "kiss_fft.h", along with a brief description of the 
+functions you'll need to use. 
+
+Code definitions for 1d complex FFTs are in kiss_fft.c.
+
+You can do other cool stuff with the extras you'll find in tools/
+
+    * multi-dimensional FFTs 
+    * real-optimized FFTs  (returns the positive half-spectrum: (nfft/2+1) complex frequency bins)
+    * fast convolution FIR filtering (not available for fixed point)
+    * spectrum image creation
+
+The core fft and most tools/ code can be compiled to use float, double,
+ Q15 short or Q31 samples. The default is float.
+
+
+BACKGROUND:
+
+    I started coding this because I couldn't find a fixed point FFT that didn't 
+use assembly code.  I started with floating point numbers so I could get the 
+theory straight before working on fixed point issues.  In the end, I had a 
+little bit of code that could be recompiled easily to do ffts with short, float
+or double (other types should be easy too).  
+
+    Once I got my FFT working, I was curious about the speed compared to
+a well respected and highly optimized fft library.  I don't want to criticize 
+this great library, so let's call it FFT_BRANDX.
+During this process, I learned:
+
+    1. FFT_BRANDX has more than 100K lines of code. The core of kiss_fft is about 500 lines (cpx 1-d).
+    2. It took me an embarrassingly long time to get FFT_BRANDX working.
+    3. A simple program using FFT_BRANDX is 522KB. A similar program using kiss_fft is 18KB (without optimizing for size).
+    4. FFT_BRANDX is roughly twice as fast as KISS FFT in default mode.
+
+    It is wonderful that free, highly optimized libraries like FFT_BRANDX exist.
+But such libraries carry a huge burden of complexity necessary to extract every 
+last bit of performance.
+
+    Sometimes simpler is better, even if it's not better.
+
+FREQUENTLY ASKED QUESTIONS:
+	Q: Can I use kissfft in a project with a ___ license?
+	A: Yes.  See LICENSE below.
+
+	Q: Why don't I get the output I expect?
+	A: The two most common causes of this are 
+		1) scaling : is there a constant multiplier between what you got and what you want?
+		2) mixed build environment -- all code must be compiled with same preprocessor 
+		definitions for FIXED_POINT and kiss_fft_scalar
+
+	Q: Will you write/debug my code for me?
+	A: Probably not unless you pay me.  I am happy to answer pointed and topical questions, but 
+	I may refer you to a book, a forum, or some other resource.
+
+
+PERFORMANCE:
+    (on Athlon XP 2100+, with gcc 2.96, float data type)
+
+    Kiss performed 10000 1024-pt cpx ffts in .63 s of cpu time.
+    For comparison, it took md5sum twice as long to process the same amount of data.
+
+    Transforming 5 minutes of CD quality audio takes less than a second (nfft=1024). 
+
+DO NOT:
+    ... use Kiss if you need the Fastest Fourier Transform in the World
+    ... ask me to add features that will bloat the code
+
+UNDER THE HOOD:
+
+    Kiss FFT uses a time decimation, mixed-radix, out-of-place FFT. If you give it an input buffer  
+    and output buffer that are the same, a temporary buffer will be created to hold the data.
+
+    No static data is used.  The core routines of kiss_fft are thread-safe (but not all of the tools directory).
+
+    No scaling is done for the floating point version (for speed).  
+    Scaling is done both ways for the fixed-point version (for overflow prevention).
+
+    Optimized butterflies are used for factors 2,3,4, and 5. 
+
+    The real (i.e. not complex) optimization code only works for even length ffts.  It does two half-length
+    FFTs in parallel (packed into real&imag), and then combines them via twiddling.  The result is 
+    nfft/2+1 complex frequency bins from DC to Nyquist.  If you don't know what this means, search the web.
+
+    The fast convolution filtering uses the overlap-scrap method, slightly 
+    modified to put the scrap at the tail.
+
+LICENSE:
+    Revised BSD License, see COPYING for verbiage. 
+    Basically, "free to use&change, give credit where due, no guarantees"
+    Note this license is compatible with GPL at one end of the spectrum and closed, commercial software at 
+    the other end.  See http://www.fsf.org/licensing/licenses
+
+    A commercial license is available which removes the requirement for attribution.  Contact me for details.
+
+  
+TODO:
+    *) Add real optimization for odd length FFTs 
+    *) Document/revisit the input/output fft scaling
+    *) Make doc describing the overlap (tail) scrap fast convolution filtering in kiss_fastfir.c
+    *) Test all the ./tools/ code with fixed point (kiss_fastfir.c doesn't work, maybe others)
+
+AUTHOR:
+    Mark Borgerding
+    Mark@Borgerding.net
diff --git a/lib/qm-dsp/ext/kissfft/README.simd b/lib/qm-dsp/ext/kissfft/README.simd
new file mode 100644
index 00000000000..b0fdac55064
--- /dev/null
+++ b/lib/qm-dsp/ext/kissfft/README.simd
@@ -0,0 +1,78 @@
+If you are reading this, it means you think you may be interested in using the SIMD extensions in kissfft 
+to do 4 *separate* FFTs at once.
+
+Beware! Beyond here there be dragons!
+
+This API is not easy to use, is not well documented, and breaks the KISS principle.  
+
+
+Still reading? Okay, you may get rewarded for your patience with a considerable speedup 
+(2-3x) on intel x86 machines with SSE if you are willing to jump through some hoops.
+
+The basic idea is to use the packed 4 float __m128 data type as a scalar element.  
+This means that the format is pretty convoluted. It performs 4 FFTs per fft call on signals A,B,C,D.
+
+For complex data, the data is interlaced as follows:
+rA0,rB0,rC0,rD0,      iA0,iB0,iC0,iD0,   rA1,rB1,rC1,rD1, iA1,iB1,iC1,iD1 ...
+where "rA0" is the real part of the zeroth sample for signal A
+
+Real-only data is laid out:
+rA0,rB0,rC0,rD0,     rA1,rB1,rC1,rD1,      ... 
+
+Compile with gcc flags something like
+-O3 -mpreferred-stack-boundary=4  -DUSE_SIMD=1 -msse 
+
+Be aware of SIMD alignment.  This is the most likely cause of segfaults.  
+The code within kissfft uses scratch variables on the stack.  
+With SIMD, these must have addresses on 16 byte boundaries.  
+Search on "SIMD alignment" for more info.
+
+
+
+Robin at Divide Concept was kind enough to share his code for formatting to/from the SIMD kissfft.  
+I have not run it -- use it at your own risk.  It appears to do 4xN and Nx4 transpositions 
+(out of place).
+
+void SSETools::pack128(float* target, float* source, unsigned long size128)
+{
+   __m128* pDest = (__m128*)target;
+   __m128* pDestEnd = pDest+size128;
+   float* source0=source;
+   float* source1=source0+size128;
+   float* source2=source1+size128;
+   float* source3=source2+size128;
+
+   while(pDest<pDestEnd)
+   {
+       *pDest=_mm_set_ps(*source3,*source2,*source1,*source0);
+       source0++;
+       source1++;
+       source2++;
+       source3++;
+       pDest++;
+   }
+}
+
+void SSETools::unpack128(float* target, float* source, unsigned long size128)
+{
+
+   float* pSrc = source;
+   float* pSrcEnd = pSrc+size128*4;
+   float* target0=target;
+   float* target1=target0+size128;
+   float* target2=target1+size128;
+   float* target3=target2+size128;
+
+   while(pSrc<pSrcEnd)
+   {
+       *target0=pSrc[0];
+       *target1=pSrc[1];
+       *target2=pSrc[2];
+       *target3=pSrc[3];
+       target0++;
+       target1++;
+       target2++;
+       target3++;
+       pSrc+=4;
+   }
+} 
diff --git a/lib/qm-dsp/ext/kissfft/TIPS b/lib/qm-dsp/ext/kissfft/TIPS
new file mode 100644
index 00000000000..6a9579d6d4f
--- /dev/null
+++ b/lib/qm-dsp/ext/kissfft/TIPS
@@ -0,0 +1,39 @@
+Speed:
+    * If you want to use multiple cores, then compile with -openmp or -fopenmp (see your compiler docs).
+	Realize that larger FFTs will reap more benefit than smaller FFTs. This generally uses more CPU time, but
+	less wall time.
+
+    * experiment with compiler flags
+        Special thanks to Oscar Lesta. He suggested some compiler flags 
+        for gcc that make a big difference. They shave 10-15% off
+        execution time on some systems.  Try some combination of:
+                -march=pentiumpro
+                -ffast-math
+                -fomit-frame-pointer
+
+    * If the input data has no imaginary component, use the kiss_fftr code under tools/.
+      Real ffts are roughly twice as fast as complex.
+
+    * If you can rearrange your code to do 4 FFTs in parallel and you are on a recent Intel or AMD machine,
+    then you might want to experiment with the USE_SIMD code.  See README.simd
+
+
+Reducing code size:
+    * remove some of the butterflies. There are currently butterflies optimized for radices
+        2,3,4,5.  It is worth mentioning that you can still use FFT sizes that contain 
+        other factors, they just won't be quite as fast.  You can decide for yourself 
+        whether to keep radix 2 or 4.  If you do some work in this area, let me 
+        know what you find.
+
+    * For platforms where ROM/code space is more plentiful than RAM,
+     consider creating a hardcoded kiss_fft_state. In other words, decide which 
+     FFT size(s) you want and make a structure with the correct factors and twiddles.
+
+    * Frank van der Hulst offered numerous suggestions for smaller code size and correct operation 
+    on embedded targets.  "I'm happy to help anyone who is trying to implement KISSFFT on a micro"
+
+    Some of these were rolled into the mainline code base:
+        - using long casts to promote intermediate results of short*short multiplication
+        - delaying allocation of buffers that are sometimes unused.
+    In some cases, it may be desirable to limit capability in order to better suit the target:
+        - predefining the twiddle tables for the desired fft size.  
diff --git a/lib/qm-dsp/ext/kissfft/kiss_fft.c b/lib/qm-dsp/ext/kissfft/kiss_fft.c
index 846549b1cd6..7824d34521f 100644
--- a/lib/qm-dsp/ext/kissfft/kiss_fft.c
+++ b/lib/qm-dsp/ext/kissfft/kiss_fft.c
@@ -15,7 +15,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 
 #include "_kiss_fft_guts.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
- fixed or floating point complex numbers.  It also declares the kf_ internal functions.
+ fixed or floating point complex numbers.  It also delares the kf_ internal functions.
  */
 
 static void kf_bfly2(
@@ -275,7 +275,8 @@ void kf_work(
 
     if (m==1) {
         do{
-            *Fout = *f;
+            Fout->r = f->r;
+            Fout->i = f->i;
             f += fstride*in_stride;
         }while(++Fout != Fout_end );
     }else{
diff --git a/lib/qm-dsp/ext/kissfft/kissfft.hh b/lib/qm-dsp/ext/kissfft/kissfft.hh
new file mode 100644
index 00000000000..4f6ac92f3b1
--- /dev/null
+++ b/lib/qm-dsp/ext/kissfft/kissfft.hh
@@ -0,0 +1,300 @@
+#ifndef KISSFFT_CLASS_HH
+#define KISSFFT_CLASS_HH
+#include <complex>
+#include <vector>
+
+namespace kissfft_utils {
+
+template <typename T_scalar>
+struct traits
+{
+    typedef T_scalar scalar_type;
+    typedef std::complex<scalar_type> cpx_type;
+    void fill_twiddles( std::complex<T_scalar> * dst ,int nfft,bool inverse)
+    {
+        T_scalar phinc =  (inverse?2:-2)* acos( (T_scalar) -1)  / nfft;
+        for (int i=0;i<nfft;++i)
+            dst[i] = exp( std::complex<T_scalar>(0,i*phinc) );
+    }
+
+    void prepare(
+            std::vector< std::complex<T_scalar> > & dst,
+            int nfft,bool inverse, 
+            std::vector<int> & stageRadix, 
+            std::vector<int> & stageRemainder )
+    {
+        _twiddles.resize(nfft);
+        fill_twiddles( &_twiddles[0],nfft,inverse);
+        dst = _twiddles;
+
+        //factorize
+        //start factoring out 4's, then 2's, then 3,5,7,9,...
+        int n= nfft;
+        int p=4;
+        do {
+            while (n % p) {
+                switch (p) {
+                    case 4: p = 2; break;
+                    case 2: p = 3; break;
+                    default: p += 2; break;
+                }
+                if (p*p>n)
+                    p=n;// no more factors
+            }
+            n /= p;
+            stageRadix.push_back(p);
+            stageRemainder.push_back(n);
+        }while(n>1);
+    }
+    std::vector<cpx_type> _twiddles;
+
+
+    const cpx_type twiddle(int i) { return _twiddles[i]; }
+};
+
+}
+
+template <typename T_Scalar,
+         typename T_traits=kissfft_utils::traits<T_Scalar> 
+         >
+class kissfft
+{
+    public:
+        typedef T_traits traits_type;
+        typedef typename traits_type::scalar_type scalar_type;
+        typedef typename traits_type::cpx_type cpx_type;
+
+        kissfft(int nfft,bool inverse,const traits_type & traits=traits_type() ) 
+            :_nfft(nfft),_inverse(inverse),_traits(traits)
+        {
+            _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder);
+        }
+
+        void transform(const cpx_type * src , cpx_type * dst)
+        {
+            kf_work(0, dst, src, 1,1);
+        }
+
+    private:
+        void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride)
+        {
+            int p = _stageRadix[stage];
+            int m = _stageRemainder[stage];
+            cpx_type * Fout_beg = Fout;
+            cpx_type * Fout_end = Fout + p*m;
+
+            if (m==1) {
+                do{
+                    *Fout = *f;
+                    f += fstride*in_stride;
+                }while(++Fout != Fout_end );
+            }else{
+                do{
+                    // recursive call:
+                    // DFT of size m*p performed by doing
+                    // p instances of smaller DFTs of size m, 
+                    // each one takes a decimated version of the input
+                    kf_work(stage+1, Fout , f, fstride*p,in_stride);
+                    f += fstride*in_stride;
+                }while( (Fout += m) != Fout_end );
+            }
+
+            Fout=Fout_beg;
+
+            // recombine the p smaller DFTs 
+            switch (p) {
+                case 2: kf_bfly2(Fout,fstride,m); break;
+                case 3: kf_bfly3(Fout,fstride,m); break;
+                case 4: kf_bfly4(Fout,fstride,m); break;
+                case 5: kf_bfly5(Fout,fstride,m); break;
+                default: kf_bfly_generic(Fout,fstride,m,p); break;
+            }
+        }
+
+        // these were #define macros in the original kiss_fft
+        void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;}
+        void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;}
+        void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;}
+        void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;}
+        void C_FIXDIV( cpx_type & ,int ) {} // NO-OP for float types
+        scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;}
+        scalar_type HALF_OF( const scalar_type & a) { return a*.5;}
+        void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;}
+
+        void kf_bfly2( cpx_type * Fout, const size_t fstride, int m)
+        {
+            for (int k=0;k<m;++k) {
+                cpx_type t = Fout[m+k] * _traits.twiddle(k*fstride);
+                Fout[m+k] = Fout[k] - t;
+                Fout[k] += t;
+            }
+        }
+
+        void kf_bfly4( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            cpx_type scratch[7];
+            int negative_if_inverse = _inverse * -2 +1;
+            for (size_t k=0;k<m;++k) {
+                scratch[0] = Fout[k+m] * _traits.twiddle(k*fstride);
+                scratch[1] = Fout[k+2*m] * _traits.twiddle(k*fstride*2);
+                scratch[2] = Fout[k+3*m] * _traits.twiddle(k*fstride*3);
+                scratch[5] = Fout[k] - scratch[1];
+
+                Fout[k] += scratch[1];
+                scratch[3] = scratch[0] + scratch[2];
+                scratch[4] = scratch[0] - scratch[2];
+                scratch[4] = cpx_type( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+                Fout[k+2*m]  = Fout[k] - scratch[3];
+                Fout[k] += scratch[3];
+                Fout[k+m] = scratch[5] + scratch[4];
+                Fout[k+3*m] = scratch[5] - scratch[4];
+            }
+        }
+
+        void kf_bfly3( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            size_t k=m;
+            const size_t m2 = 2*m;
+            cpx_type *tw1,*tw2;
+            cpx_type scratch[5];
+            cpx_type epi3;
+            epi3 = _twiddles[fstride*m];
+
+            tw1=tw2=&_twiddles[0];
+
+            do{
+                C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+                C_MUL(scratch[1],Fout[m] , *tw1);
+                C_MUL(scratch[2],Fout[m2] , *tw2);
+
+                C_ADD(scratch[3],scratch[1],scratch[2]);
+                C_SUB(scratch[0],scratch[1],scratch[2]);
+                tw1 += fstride;
+                tw2 += fstride*2;
+
+                Fout[m] = cpx_type( Fout->real() - HALF_OF(scratch[3].real() ) , Fout->imag() - HALF_OF(scratch[3].imag() ) );
+
+                C_MULBYSCALAR( scratch[0] , epi3.imag() );
+
+                C_ADDTO(*Fout,scratch[3]);
+
+                Fout[m2] = cpx_type(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+
+                C_ADDTO( Fout[m] , cpx_type( -scratch[0].imag(),scratch[0].real() ) );
+                ++Fout;
+            }while(--k);
+        }
+
+        void kf_bfly5( cpx_type * Fout, const size_t fstride, const size_t m)
+        {
+            cpx_type *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+            size_t u;
+            cpx_type scratch[13];
+            cpx_type * twiddles = &_twiddles[0];
+            cpx_type *tw;
+            cpx_type ya,yb;
+            ya = twiddles[fstride*m];
+            yb = twiddles[fstride*2*m];
+
+            Fout0=Fout;
+            Fout1=Fout0+m;
+            Fout2=Fout0+2*m;
+            Fout3=Fout0+3*m;
+            Fout4=Fout0+4*m;
+
+            tw=twiddles;
+            for ( u=0; u<m; ++u ) {
+                C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+                scratch[0] = *Fout0;
+
+                C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+                C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+                C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+                C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+                C_ADD( scratch[7],scratch[1],scratch[4]);
+                C_SUB( scratch[10],scratch[1],scratch[4]);
+                C_ADD( scratch[8],scratch[2],scratch[3]);
+                C_SUB( scratch[9],scratch[2],scratch[3]);
+
+                C_ADDTO( *Fout0, scratch[7]);
+                C_ADDTO( *Fout0, scratch[8]);
+
+                scratch[5] = scratch[0] + cpx_type(
+                        S_MUL(scratch[7].real(),ya.real() ) + S_MUL(scratch[8].real() ,yb.real() ),
+                        S_MUL(scratch[7].imag(),ya.real()) + S_MUL(scratch[8].imag(),yb.real())
+                        );
+
+                scratch[6] =  cpx_type( 
+                        S_MUL(scratch[10].imag(),ya.imag()) + S_MUL(scratch[9].imag(),yb.imag()),
+                        -S_MUL(scratch[10].real(),ya.imag()) - S_MUL(scratch[9].real(),yb.imag()) 
+                        );
+
+                C_SUB(*Fout1,scratch[5],scratch[6]);
+                C_ADD(*Fout4,scratch[5],scratch[6]);
+
+                scratch[11] = scratch[0] + 
+                    cpx_type(
+                            S_MUL(scratch[7].real(),yb.real()) + S_MUL(scratch[8].real(),ya.real()),
+                            S_MUL(scratch[7].imag(),yb.real()) + S_MUL(scratch[8].imag(),ya.real())
+                            );
+
+                scratch[12] = cpx_type(
+                        -S_MUL(scratch[10].imag(),yb.imag()) + S_MUL(scratch[9].imag(),ya.imag()),
+                        S_MUL(scratch[10].real(),yb.imag()) - S_MUL(scratch[9].real(),ya.imag())
+                        );
+
+                C_ADD(*Fout2,scratch[11],scratch[12]);
+                C_SUB(*Fout3,scratch[11],scratch[12]);
+
+                ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+            }
+        }
+
+        /* perform the butterfly for one stage of a mixed radix FFT */
+        void kf_bfly_generic(
+                cpx_type * Fout,
+                const size_t fstride,
+                int m,
+                int p
+                )
+        {
+            int u,k,q1,q;
+            cpx_type * twiddles = &_twiddles[0];
+            cpx_type t;
+            int Norig = _nfft;
+            cpx_type scratchbuf[p];
+
+            for ( u=0; u<m; ++u ) {
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    scratchbuf[q1] = Fout[ k  ];
+                    C_FIXDIV(scratchbuf[q1],p);
+                    k += m;
+                }
+
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    int twidx=0;
+                    Fout[ k ] = scratchbuf[0];
+                    for (q=1;q<p;++q ) {
+                        twidx += fstride * k;
+                        if (twidx>=Norig) twidx-=Norig;
+                        C_MUL(t,scratchbuf[q] , twiddles[twidx] );
+                        C_ADDTO( Fout[ k ] ,t);
+                    }
+                    k += m;
+                }
+            }
+        }
+
+        int _nfft;
+        bool _inverse;
+        std::vector<cpx_type> _twiddles;
+        std::vector<int> _stageRadix;
+        std::vector<int> _stageRemainder;
+        traits_type _traits;
+};
+#endif
diff --git a/lib/qm-dsp/ext/kissfft/kiss_fftr.c b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
similarity index 100%
rename from lib/qm-dsp/ext/kissfft/kiss_fftr.c
rename to lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
diff --git a/lib/qm-dsp/ext/kissfft/kiss_fftr.h b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
similarity index 100%
rename from lib/qm-dsp/ext/kissfft/kiss_fftr.h
rename to lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
diff --git a/lib/qm-dsp/key_rounding.patch b/lib/qm-dsp/key_rounding.patch
deleted file mode 100644
index c07e2d5f41e..00000000000
--- a/lib/qm-dsp/key_rounding.patch
+++ /dev/null
@@ -1,253 +0,0 @@
-diff --git a/lib/qm-dsp/dsp/chromagram/Chromagram.cpp b/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
-index a8597a5..3e83367 100644
---- a/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
-+++ b/lib/qm-dsp/dsp/chromagram/Chromagram.cpp
-@@ -33,8 +33,8 @@ int Chromagram::initialise( ChromaConfig Config )
-     m_BPO  = Config.BPO;		// bins per octave
-     m_normalise = Config.normalise;     // if frame normalisation is required
- 
--    // No. of constant Q bins
--    m_uK = ( unsigned int ) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0));	
-+    // No. of constant Q bins, extended to a full cotave
-+    m_uK = m_BPO * (unsigned int)ceil(log(m_FMax/m_FMin)/log(2.0));
- 
-     // Create array for chroma result
-     m_chromadata = new double[ m_BPO ];
-@@ -44,7 +44,7 @@ int Chromagram::initialise( ChromaConfig Config )
- 
-     // Populate CQ config structure with parameters
-     // inherited from the Chroma config
--    ConstantQConfig.FS	 = Config.FS;
-+    ConstantQConfig.FS = Config.FS;
-     ConstantQConfig.min = m_FMin;
-     ConstantQConfig.max = m_FMax;
-     ConstantQConfig.BPO = m_BPO;
-@@ -134,7 +134,7 @@ double* Chromagram::process( const double *data )
-         m_windowbuf = new double[m_frameSize];
-     }
- 
--    for (int i = 0; i < m_frameSize; ++i) {
-+    for (unsigned int i = 0; i < m_frameSize; ++i) {
-         m_windowbuf[i] = data[i];
-     }
-     m_window->cut(m_windowbuf);
-@@ -155,20 +155,18 @@ double* Chromagram::process( const double *real, const double *imag )
-     // initialise chromadata to 0
-     for (unsigned i = 0; i < m_BPO; i++) m_chromadata[i] = 0;
- 
--    double cmax = 0.0;
--    double cval = 0;
-     // Calculate ConstantQ frame
-     m_ConstantQ->process( real, imag, m_CQRe, m_CQIm );
- 	
-     // add each octave of cq data into Chromagram
--    const unsigned octaves = (int)floor(double( m_uK/m_BPO))-1;
--    for (unsigned octave = 0; octave <= octaves; octave++) 
-+    const unsigned octaves = m_uK / m_BPO;
-+    for (unsigned octave = 0; octave < octaves; octave++)
-     {
--	unsigned firstBin = octave*m_BPO;
--	for (unsigned i = 0; i < m_BPO; i++) 
--	{
--	    m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
--	}
-+        unsigned firstBin = octave * m_BPO;
-+        for (unsigned i = 0; i < m_BPO; i++)
-+        {
-+            m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
-+        }
-     }
- 
-     MathUtilities::normalise(m_chromadata, m_BPO, m_normalise);
-diff --git a/lib/qm-dsp/dsp/chromagram/Chromagram.h b/lib/qm-dsp/dsp/chromagram/Chromagram.h
-index bd928f5..b2ad72e 100644
---- a/lib/qm-dsp/dsp/chromagram/Chromagram.h
-+++ b/lib/qm-dsp/dsp/chromagram/Chromagram.h
-@@ -21,7 +21,7 @@
- #include "ConstantQ.h"
- 
- struct ChromaConfig{
--    unsigned int FS;
-+    double FS;
-     double min;
-     double max;
-     unsigned int BPO;
-diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
-index b764235..aab4848 100644
---- a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
-+++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
-@@ -125,14 +125,18 @@ void ConstantQ::sparsekernel()
-             hammingWindowIm[u] = 0;
-         }
-         
--	// Computing a hamming window
--	const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
-+        const double samplesPerCycle =
-+                m_FS / (m_FMin * pow(2, (double)k / (double)m_BPO));
-+
-+        // Computing a hamming window
-+        const unsigned hammingLength = (int) ceil(
-+                m_dQ * samplesPerCycle);
- 
-         unsigned origin = m_FFTLength/2 - hammingLength/2;
- 
- 	for (unsigned i=0; i<hammingLength; i++) 
- 	{
--	    const double angle = 2*PI*m_dQ*i/hammingLength;
-+	    const double angle = 2*PI*i/samplesPerCycle;
- 	    const double real = cos(angle);
- 	    const double imag = sin(angle);
- 	    const double absol = hamming(hammingLength, i)/hammingLength;
-@@ -140,6 +144,10 @@ void ConstantQ::sparsekernel()
- 	    hammingWindowIm[ origin + i ] = absol*imag;
- 	}
- 
-+	/* This code splits the hanning widow and moves it to the beginning
-+	   and the end, creating an epty gap in the middle. 
-+	   Is disables, because it results in wrong results,
-+	   when tested with sin waves centered on a bin frequency.
-         for (unsigned i = 0; i < m_FFTLength/2; ++i) {
-             double temp = hammingWindowRe[i];
-             hammingWindowRe[i] = hammingWindowRe[i + m_FFTLength/2];
-@@ -148,6 +156,7 @@ void ConstantQ::sparsekernel()
-             hammingWindowIm[i] = hammingWindowIm[i + m_FFTLength/2];
-             hammingWindowIm[i + m_FFTLength/2] = temp;
-         }
-+	*/
-     
- 	//do fft of hammingWindow
- 	m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
-@@ -292,8 +301,10 @@ void ConstantQ::initialise( CQConfig Config )
-     m_BPO = Config.BPO;		// bins per octave
-     m_CQThresh = Config.CQThresh;// ConstantQ threshold for kernel generation
- 
--    m_dQ = 1/(pow(2,(1/(double)m_BPO))-1);	// Work out Q value for Filter bank
--    m_uK = (unsigned int) ceil(m_BPO * log(m_FMax/m_FMin)/log(2.0));	// No. of constant Q bins
-+    // Work out Q value for Filter bank
-+    m_dQ = 1/(pow(2,(1/(double)m_BPO))-1);
-+    // No. of constant Q bins, extended to a full cotave
-+    m_uK = m_BPO * (unsigned int)ceil(log(m_FMax/m_FMin)/log(2.0));
- 
- //    std::cerr << "ConstantQ::initialise: rate = " << m_FS << ", fmin = " << m_FMin << ", fmax = " << m_FMax << ", bpo = " << m_BPO << ", K = " << m_uK << ", Q = " << m_dQ << std::endl;
- 
-diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.h b/lib/qm-dsp/dsp/chromagram/ConstantQ.h
-index c06f60a..bd666dd 100644
---- a/lib/qm-dsp/dsp/chromagram/ConstantQ.h
-+++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.h
-@@ -21,7 +21,7 @@
- #include "maths/MathUtilities.h"
- 
- struct CQConfig{
--    unsigned int FS;   // samplerate
-+    double FS;   // samplerate
-     double min;        // minimum frequency
-     double max;        // maximum frequency
-     unsigned int BPO;  // bins per octave
-@@ -58,7 +58,7 @@ private:
-     void deInitialise();
- 	
-     double* m_CQdata;
--    unsigned int m_FS;
-+    double m_FS;
-     double m_FMin;
-     double m_FMax;
-     double m_dQ;
-diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-index cf3580e..7407c43 100644
---- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-+++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-@@ -60,13 +60,13 @@ GetKeyMode::GetKeyMode( int sampleRate, float tuningFrequency,
-         
-     // Chromagram configuration parameters
-     m_ChromaConfig.normalise = MathUtilities::NormaliseUnitMax;
--    m_ChromaConfig.FS = lrint(sampleRate/(double)m_DecimationFactor);
--    if (m_ChromaConfig.FS < 1) m_ChromaConfig.FS = 1;
-+    m_ChromaConfig.FS = sampleRate/(double)m_DecimationFactor;
- 
--    // Set C (= MIDI #12) as our base :
-+    // Set C3 (= MIDI #48) as our base:
-     // This implies that key = 1 => Cmaj, key = 12 => Bmaj, key = 13 => Cmin, etc.
-     m_ChromaConfig.min = Pitch::getFrequencyForPitch
-         (48, 0, tuningFrequency);
-+    // C7 (= MIDI #96) is the exclusive maximum key:
-     m_ChromaConfig.max = Pitch::getFrequencyForPitch
-         (96, 0, tuningFrequency);
- 
-@@ -177,15 +177,14 @@ int GetKeyMode::process(double *PCMData)
- 
-     m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );		
- 
--	
--    // Move bins such that the centre of the base note is in the
--    // middle of its three bins :
--    // Added 21.11.07 by Chris Sutton based on debugging with Katy
--    // Noland + comparison with Matlab equivalent.
--    MathUtilities::circShift( m_ChrPointer, m_BPO, 1);
-+    // The Cromagram has the center of C at bin 0, while the major
-+    // and minor profiles have the center of C at 1. We want to have
-+    // the correlation for C result also at 1.
-+    // To achieve this we have to shift two times:
-+    MathUtilities::circShift( m_ChrPointer, m_BPO, 2);
- /*
-     std::cout << "raw chroma: ";
--    for (int ii = 0; ii < m_BPO; ++ii) {
-+    for (unsigned int ii = 0; ii < m_BPO; ++ii) {
-       if (ii % (m_BPO/12) == 0) std::cout << "\n";
-         std::cout << m_ChrPointer[ii] << " ";
-     }
-@@ -266,9 +265,13 @@ int GetKeyMode::process(double *PCMData)
-   std::cout << std::endl;
- */
-     double dummy;
--    // '1 +' because we number keys 1-24, not 0-23.
--    key = 1 + (int)ceil( (double)MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy )/3 );
-+    // m_Keys[1] is C center  1 / 3 + 1 = 1
-+    // m_Keys[4] is D center  4 / 3 + 1 = 2
-+    // '+ 1' because we number keys 1-24, not 0-23.
-+    int maxBin = MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy );
-+    key = maxBin / 3 + 1;
- 
-+//    std::cout << "fractional key pre-sorting: " << (maxBin + 2) / 3.0 << std::endl;
- //    std::cout << "key pre-sorting: " << key << std::endl;
- 
- 
-diff --git a/lib/qm-dsp/key_rounding.patch b/lib/qm-dsp/key_rounding.patch
-new file mode 100644
-index 0000000..dd1442e
---- /dev/null
-+++ b/lib/qm-dsp/key_rounding.patch
-@@ -0,0 +1,34 @@
-+diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-+index 55a1333..cf3580e 100644
-+--- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-++++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
-+@@ -177,11 +177,12 @@ int GetKeyMode::process(double *PCMData)
-+ 
-+     m_ChrPointer = m_Chroma->process( m_DecimatedBuffer );		
-+ 
-+-    // The Cromagram has the center of C at bin 0, while the major
-+-    // and minor profiles have the center of C at 1. We want to have
-+-    // the correlation for C result also at 1.
-+-    // To achieve this we have to shift two times:
-+-    MathUtilities::circShift( m_ChrPointer, m_BPO, 2);
-++	
-++    // Move bins such that the centre of the base note is in the
-++    // middle of its three bins :
-++    // Added 21.11.07 by Chris Sutton based on debugging with Katy
-++    // Noland + comparison with Matlab equivalent.
-++    MathUtilities::circShift( m_ChrPointer, m_BPO, 1);
-+ /*
-+     std::cout << "raw chroma: ";
-+     for (int ii = 0; ii < m_BPO; ++ii) {
-+@@ -265,10 +266,8 @@ int GetKeyMode::process(double *PCMData)
-+   std::cout << std::endl;
-+ */
-+     double dummy;
-+-    // m_Keys[1] is C center  1 / 3 + 1 = 1
-+-    // m_Keys[4] is D center  4 / 3 + 1 = 2
-+-    // '+ 1' because we number keys 1-24, not 0-23.
-+-    key = MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy ) / 3 + 1;
-++    // '1 +' because we number keys 1-24, not 0-23.
-++    key = 1 + (int)ceil( (double)MathUtilities::getMax( m_Keys, 2* m_BPO, &dummy )/3 );
-+ 
-+ //    std::cout << "key pre-sorting: " << key << std::endl;
diff --git a/lib/qm-dsp/maths/CosineDistance.cpp b/lib/qm-dsp/maths/CosineDistance.cpp
index 13ab9ce0e85..4b06a7ad2ac 100644
--- a/lib/qm-dsp/maths/CosineDistance.cpp
+++ b/lib/qm-dsp/maths/CosineDistance.cpp
@@ -34,7 +34,7 @@ double CosineDistance::distance(const vector<double> &v1,
     }
     else
     {
-        for(int i=0; i<v1.size(); i++)
+        for(int i=0; i<int(v1.size()); i++)
         {
             dSum1 += v1[i]*v2[i];
             dDen1 += v1[i]*v1[i];
diff --git a/lib/qm-dsp/maths/MathUtilities.cpp b/lib/qm-dsp/maths/MathUtilities.cpp
index cb35a2a7e0b..a9dabf372dc 100644
--- a/lib/qm-dsp/maths/MathUtilities.cpp
+++ b/lib/qm-dsp/maths/MathUtilities.cpp
@@ -20,6 +20,7 @@
 #include <vector>
 #include <cmath>
 
+using namespace std;
 
 double MathUtilities::mod(double x, double y)
 {
@@ -38,9 +39,9 @@ double MathUtilities::princarg(double ang)
     return ValOut;
 }
 
-void MathUtilities::getAlphaNorm(const double *data, unsigned int len, unsigned int alpha, double* ANorm)
+void MathUtilities::getAlphaNorm(const double *data, int len, int alpha, double* ANorm)
 {
-    unsigned int i;
+    int i;
     double temp = 0.0;
     double a=0.0;
 	
@@ -56,17 +57,16 @@ void MathUtilities::getAlphaNorm(const double *data, unsigned int len, unsigned
     *ANorm = a;
 }
 
-double MathUtilities::getAlphaNorm( const std::vector <double> &data, unsigned int alpha )
+double MathUtilities::getAlphaNorm( const vector <double> &data, int alpha )
 {
-    unsigned int i;
-    unsigned int len = data.size();
+    int i;
+    int len = data.size();
     double temp = 0.0;
     double a=0.0;
 	
     for( i = 0; i < len; i++)
     {
 	temp = data[ i ];
-		
 	a  += ::pow( fabs(temp), double(alpha) );
     }
     a /= ( double )len;
@@ -84,13 +84,13 @@ double MathUtilities::round(double x)
     }
 }
 
-double MathUtilities::median(const double *src, unsigned int len)
+double MathUtilities::median(const double *src, int len)
 {
     if (len == 0) return 0;
     
-    std::vector<double> scratch;
+    vector<double> scratch;
     for (int i = 0; i < len; ++i) scratch.push_back(src[i]);
-    std::sort(scratch.begin(), scratch.end());
+    sort(scratch.begin(), scratch.end());
 
     int middle = len/2;
     if (len % 2 == 0) {
@@ -100,9 +100,9 @@ double MathUtilities::median(const double *src, unsigned int len)
     }
 }
 
-double MathUtilities::sum(const double *src, unsigned int len)
+double MathUtilities::sum(const double *src, int len)
 {
-    unsigned int i ;
+    int i ;
     double retVal =0.0;
 
     for(  i = 0; i < len; i++)
@@ -113,7 +113,7 @@ double MathUtilities::sum(const double *src, unsigned int len)
     return retVal;
 }
 
-double MathUtilities::mean(const double *src, unsigned int len)
+double MathUtilities::mean(const double *src, int len)
 {
     double retVal =0.0;
 
@@ -126,9 +126,9 @@ double MathUtilities::mean(const double *src, unsigned int len)
     return retVal;
 }
 
-double MathUtilities::mean(const std::vector<double> &src,
-                           unsigned int start,
-                           unsigned int count)
+double MathUtilities::mean(const vector<double> &src,
+                           int start,
+                           int count)
 {
     double sum = 0.;
 	
@@ -142,9 +142,9 @@ double MathUtilities::mean(const std::vector<double> &src,
     return sum / count;
 }
 
-void MathUtilities::getFrameMinMax(const double *data, unsigned int len, double *min, double *max)
+void MathUtilities::getFrameMinMax(const double *data, int len, double *min, double *max)
 {
-    unsigned int i;
+    int i;
     double temp = 0.0;
 
     if (len == 0) {
@@ -171,10 +171,10 @@ void MathUtilities::getFrameMinMax(const double *data, unsigned int len, double
     }
 }
 
-int MathUtilities::getMax( double* pData, unsigned int Length, double* pMax )
+int MathUtilities::getMax( double* pData, int Length, double* pMax )
 {
-	unsigned int index = 0;
-	unsigned int i;
+	int index = 0;
+	int i;
 	double temp = 0.0;
 	
 	double max = pData[0];
@@ -197,15 +197,15 @@ int MathUtilities::getMax( double* pData, unsigned int Length, double* pMax )
 	return index;
 }
 
-int MathUtilities::getMax( const std::vector<double> & data, double* pMax )
+int MathUtilities::getMax( const vector<double> & data, double* pMax )
 {
-	unsigned int index = 0;
-	unsigned int i;
+	int index = 0;
+	int i;
 	double temp = 0.0;
 	
 	double max = data[0];
 
-	for( i = 0; i < data.size(); i++)
+	for( i = 0; i < int(data.size()); i++)
 	{
 		temp = data[ i ];
 
@@ -286,7 +286,7 @@ void MathUtilities::normalise(double *data, int length, NormaliseType type)
     }
 }
 
-void MathUtilities::normalise(std::vector<double> &data, NormaliseType type)
+void MathUtilities::normalise(vector<double> &data, NormaliseType type)
 {
     switch (type) {
 
@@ -317,20 +317,46 @@ void MathUtilities::normalise(std::vector<double> &data, NormaliseType type)
     }
 }
 
-void MathUtilities::adaptiveThreshold(std::vector<double> &data)
+double MathUtilities::getLpNorm(const vector<double> &data, int p)
+{
+    double tot = 0.0;
+    for (int i = 0; i < int(data.size()); ++i) {
+        tot += abs(pow(data[i], p));
+    }
+    return pow(tot, 1.0 / p);
+}
+
+vector<double> MathUtilities::normaliseLp(const vector<double> &data,
+                                               int p,
+                                               double threshold)
+{
+    int n = int(data.size());
+    if (n == 0 || p == 0) return data;
+    double norm = getLpNorm(data, p);
+    if (norm < threshold) {
+        return vector<double>(n, 1.0 / pow(n, 1.0 / p)); // unit vector
+    }
+    vector<double> out(n);
+    for (int i = 0; i < n; ++i) {
+        out[i] = data[i] / norm;
+    }
+    return out;
+}
+    
+void MathUtilities::adaptiveThreshold(vector<double> &data)
 {
     int sz = int(data.size());
     if (sz == 0) return;
 
-    std::vector<double> smoothed(sz);
+    vector<double> smoothed(sz);
 	
     int p_pre = 8;
     int p_post = 7;
 
     for (int i = 0; i < sz; ++i) {
 
-        int first = std::max(0,      i - p_pre);
-        int last  = std::min(sz - 1, i + p_post);
+        int first = max(0,      i - p_pre);
+        int last  = min(sz - 1, i + p_post);
 
         smoothed[i] = mean(data, first, last - first + 1);
     }
diff --git a/lib/qm-dsp/maths/MathUtilities.h b/lib/qm-dsp/maths/MathUtilities.h
index fac710af9aa..ec86efcf2fa 100644
--- a/lib/qm-dsp/maths/MathUtilities.h
+++ b/lib/qm-dsp/maths/MathUtilities.h
@@ -35,32 +35,32 @@ class MathUtilities
      * Return through min and max pointers the highest and lowest
      * values in the given array of the given length.
      */
-    static void	  getFrameMinMax( const double* data, unsigned int len,  double* min, double* max );
+    static void	  getFrameMinMax( const double* data, int len,  double* min, double* max );
 
     /**
      * Return the mean of the given array of the given length.
      */
-    static double mean( const double* src, unsigned int len );
+    static double mean( const double* src, int len );
 
     /**
      * Return the mean of the subset of the given vector identified by
      * start and count.
      */
     static double mean( const std::vector<double> &data,
-                        unsigned int start, unsigned int count );
+                        int start, int count );
     
     /**
      * Return the sum of the values in the given array of the given
      * length.
      */
-    static double sum( const double* src, unsigned int len );
+    static double sum( const double* src, int len );
 
     /**
      * Return the median of the values in the given array of the given
      * length. If the array is even in length, the returned value will
      * be half-way between the two values adjacent to median.
      */
-    static double median( const double* src, unsigned int len );
+    static double median( const double* src, int len );
 
     /**
      * The principle argument function. Map the phase angle ang into
@@ -73,14 +73,21 @@ class MathUtilities
      */
     static double mod( double x, double y);
 
-    static void	  getAlphaNorm(const double *data, unsigned int len, unsigned int alpha, double* ANorm);
-    static double getAlphaNorm(const std::vector <double> &data, unsigned int alpha );
-
-    static void   circShift( double* data, int length, int shift);
+    /**
+     * The alpha norm is the alpha'th root of the mean alpha'th power
+     * magnitude. For example if alpha = 2 this corresponds to the RMS
+     * of the input data, and when alpha = 1 this is the mean
+     * magnitude.
+     */
+    static void	  getAlphaNorm(const double *data, int len, int alpha, double* ANorm);
 
-    static int	  getMax( double* data, unsigned int length, double* max = 0 );
-    static int	  getMax( const std::vector<double> &data, double* max = 0 );
-    static int    compareInt(const void * a, const void * b);
+    /**
+     * The alpha norm is the alpha'th root of the mean alpha'th power
+     * magnitude. For example if alpha = 2 this corresponds to the RMS
+     * of the input data, and when alpha = 1 this is the mean
+     * magnitude.
+     */
+    static double getAlphaNorm(const std::vector <double> &data, int alpha );
 
     enum NormaliseType {
         NormaliseNone,
@@ -94,12 +101,35 @@ class MathUtilities
     static void normalise(std::vector<double> &data,
                           NormaliseType n = NormaliseUnitMax);
 
+    /**
+     * Calculate the L^p norm of a vector. Equivalent to MATLAB's
+     * norm(data, p).
+     */
+    static double getLpNorm(const std::vector<double> &data,
+                            int p);
+
+    /**
+     * Normalise a vector by dividing through by its L^p norm. If the
+     * norm is below the given threshold, the unit vector for that
+     * norm is returned. p may be 0, in which case no normalisation
+     * happens and the data is returned unchanged.
+     */
+    static std::vector<double> normaliseLp(const std::vector<double> &data,
+                                           int p,
+                                           double threshold = 1e-6);
+    
     /**
      * Threshold the input/output vector data against a moving-mean
      * average filter.
      */
     static void adaptiveThreshold(std::vector<double> &data);
 
+    static void   circShift( double* data, int length, int shift);
+
+    static int	  getMax( double* data, int length, double* max = 0 );
+    static int	  getMax( const std::vector<double> &data, double* max = 0 );
+    static int    compareInt(const void * a, const void * b);
+
     /** 
      * Return true if x is 2^n for some integer n >= 0.
      */
diff --git a/lib/qm-dsp/maths/Polyfit.h b/lib/qm-dsp/maths/Polyfit.h
index 5cf97d9df0f..3405b5cb70b 100644
--- a/lib/qm-dsp/maths/Polyfit.h
+++ b/lib/qm-dsp/maths/Polyfit.h
@@ -80,22 +80,36 @@ class TPolyFit
 
 // some utility functions
 
-namespace NSUtility
+struct NSUtility
 {
-    inline void swap(double &a, double &b) {double t = a; a = b; b = t;}
-    void zeroise(vector<double> &array, int n);
-    void zeroise(vector<int> &array, int n);
-    void zeroise(vector<vector<double> > &matrix, int m, int n);
-    void zeroise(vector<vector<int> > &matrix, int m, int n);
-    inline double sqr(const double &x) {return x * x;}
+    static void swap(double &a, double &b) {double t = a; a = b; b = t;}
+    // fills a vector with zeros.
+    static void zeroise(vector<double> &array, int n) { 
+        array.clear();
+        for(int j = 0; j < n; ++j) array.push_back(0);
+    }
+    // fills a vector with zeros.
+    static void zeroise(vector<int> &array, int n) {
+        array.clear();
+        for(int j = 0; j < n; ++j) array.push_back(0);
+    }
+    // fills a (m by n) matrix with zeros.
+    static void zeroise(vector<vector<double> > &matrix, int m, int n) {
+        vector<double> zero;
+        zeroise(zero, n);
+        matrix.clear();
+        for(int j = 0; j < m; ++j) matrix.push_back(zero);
+    }
+    // fills a (m by n) matrix with zeros.
+    static void zeroise(vector<vector<int> > &matrix, int m, int n) {
+        vector<int> zero;
+        zeroise(zero, n);
+        matrix.clear();
+        for(int j = 0; j < m; ++j) matrix.push_back(zero);
+    }
+    static double sqr(const double &x) {return x * x;}
 };
 
-//---------------------------------------------------------------------------
-// Implementation
-//---------------------------------------------------------------------------
-using namespace NSUtility;
-//------------------------------------------------------------------------------------------
-
 
 // main PolyFit routine
 
@@ -113,9 +127,9 @@ double TPolyFit::PolyFit2 (const vector<double> &x,
     const int npoints(x.size());
     const int nterms(coefs.size());
     double correl_coef;
-    zeroise(g, nterms);
-    zeroise(a, nterms, nterms);
-    zeroise(xmatr, npoints, nterms);
+    NSUtility::zeroise(g, nterms);
+    NSUtility::zeroise(a, nterms, nterms);
+    NSUtility::zeroise(xmatr, npoints, nterms);
     if (nterms < 1) {
         std::cerr << "ERROR: PolyFit called with less than one term" << std::endl;
         return 0;
@@ -148,13 +162,13 @@ double TPolyFit::PolyFit2 (const vector<double> &x,
 	yc = 0.0;
 	for(j = 0; j < nterms; ++j)
 	    yc += coefs [j] * xmatr [i][j];
-	srs += sqr (yc - yi);
+	srs += NSUtility::sqr (yc - yi);
 	sum_y += yi;
 	sum_y2 += yi * yi;
     }
 
     // If all Y values are the same, avoid dividing by zero
-    correl_coef = sum_y2 - sqr (sum_y) / npoints;
+    correl_coef = sum_y2 - NSUtility::sqr (sum_y) / npoints;
     // Either return 0 or the correct value of correlation coefficient
     if (correl_coef != 0)
 	correl_coef = srs / correl_coef;
@@ -229,8 +243,8 @@ bool TPolyFit::GaussJordan (Matrix &b,
     vector<vector<int> >index;
     Matrix w;
 
-    zeroise(w, ncol, ncol);
-    zeroise(index, ncol, 3);
+    NSUtility::zeroise(w, ncol, ncol);
+    NSUtility::zeroise(index, ncol, 3);
 
     if(!GaussJordan2(b, y, w, index))
 	return false;
@@ -278,7 +292,7 @@ bool TPolyFit::GaussJordan2(Matrix &b,
     double big, t;
     double pivot;
     double determ;
-    int irow, icol;
+    int irow = 0, icol = 0;
     int ncol(b.size());
     int nv = 1;                  // single constant vector
     for(int i = 0; i < ncol; ++i)
@@ -355,53 +369,6 @@ bool TPolyFit::GaussJordan2(Matrix &b,
     } // { i-loop }
     return true;
 }
-//----------------------------------------------------------------------------------------------
-
-//------------------------------------------------------------------------------------
-
-// Utility functions
-//--------------------------------------------------------------------------
-
-// fills a vector with zeros.
-void NSUtility::zeroise(vector<double> &array, int n)
-{
-    array.clear();
-    for(int j = 0; j < n; ++j)
-	array.push_back(0);
-}
-//--------------------------------------------------------------------------
-
-// fills a vector with zeros.
-void NSUtility::zeroise(vector<int> &array, int n)
-{
-    array.clear();
-    for(int j = 0; j < n; ++j)
-	array.push_back(0);
-}
-//--------------------------------------------------------------------------
-
-// fills a (m by n) matrix with zeros.
-void NSUtility::zeroise(vector<vector<double> > &matrix, int m, int n)
-{
-    vector<double> zero;
-    zeroise(zero, n);
-    matrix.clear();
-    for(int j = 0; j < m; ++j)
-	matrix.push_back(zero);
-}
-//--------------------------------------------------------------------------
-
-// fills a (m by n) matrix with zeros.
-void NSUtility::zeroise(vector<vector<int> > &matrix, int m, int n)
-{
-    vector<int> zero;
-    zeroise(zero, n);
-    matrix.clear();
-    for(int j = 0; j < m; ++j)
-	matrix.push_back(zero);
-}
-//--------------------------------------------------------------------------
-
 
 #endif
  
diff --git a/lib/qm-dsp/maths/pca/pca.c b/lib/qm-dsp/maths/pca/pca.c
index 9dadb8687dd..1a7bfdd549c 100644
--- a/lib/qm-dsp/maths/pca/pca.c
+++ b/lib/qm-dsp/maths/pca/pca.c
@@ -252,7 +252,7 @@ void tqli(double* d, double* e, int n, double** z)
 void pca_project(double** data, int n, int m, int ncomponents)
 {
 	int  i, j, k, k2;
-	double  **symmat, **symmat2, *evals, *interm;
+	double  **symmat, /* **symmat2, */ *evals, *interm;
 	
 	//TODO: assert ncomponents < m
 	

From 2c42cfca62834d8ca88da5f98c500a02eb63d904 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daschuer@mixxx.org>
Date: Mon, 3 Jun 2019 01:48:53 +0200
Subject: [PATCH 2/2] Apply mixxx-changes.patch

---
 lib/qm-dsp/dsp/chromagram/ConstantQ.cpp    | 132 +++++-----
 lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp |  35 +++
 lib/qm-dsp/dsp/transforms/FFT.cpp          |   4 +-
 lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c   |   2 +-
 lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h   |   2 +-
 lib/qm-dsp/mixxx-changes.patch             | 272 +++++++++++++++++++++
 6 files changed, 376 insertions(+), 71 deletions(-)
 create mode 100644 lib/qm-dsp/mixxx-changes.patch

diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
index 4585ddc236a..a5b1c12105a 100644
--- a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
+++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
@@ -117,31 +117,29 @@ void ConstantQ::sparsekernel()
 
     FFT m_FFT(m_FFTLength);
 	
-    for (unsigned k = m_uK; k--; ) 
-    {
-        for (unsigned u=0; u < m_FFTLength; u++) 
-        {
+    for (unsigned k = m_uK; k--;) {
+        for (unsigned u=0; u < m_FFTLength; u++) {
             hammingWindowRe[u] = 0;
             hammingWindowIm[u] = 0;
         }
         
-	// Computing a hamming window
-	const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
+        const double samplesPerCycle =
+                m_FS / (m_FMin * pow(2, (double)k / (double)m_BPO));
 
-//        cerr << "k = " << k << ", q = " << m_dQ << ", m_FMin = " << m_FMin << ", hammingLength = " << hammingLength << " (rounded up from " << (m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO))) << ")" << endl;
-        
+        // Computing a hamming window
+        const unsigned hammingLength = (int) ceil(
+                m_dQ * samplesPerCycle);
 
         unsigned origin = m_FFTLength/2 - hammingLength/2;
 
-	for (unsigned i=0; i<hammingLength; i++) 
-	{
-	    const double angle = 2*PI*m_dQ*i/hammingLength;
-	    const double real = cos(angle);
-	    const double imag = sin(angle);
-	    const double absol = hamming(hammingLength, i)/hammingLength;
-	    hammingWindowRe[ origin + i ] = absol*real;
-	    hammingWindowIm[ origin + i ] = absol*imag;
-	}
+        for (unsigned i=0; i<hammingLength; i++) {
+            const double angle = 2*PI*i/samplesPerCycle;
+            const double real = cos(angle);
+            const double imag = sin(angle);
+            const double absol = hamming(hammingLength, i)/hammingLength;
+            hammingWindowRe[ origin + i ] = absol*real;
+            hammingWindowIm[ origin + i ] = absol*imag;
+        }
 
         for (unsigned i = 0; i < m_FFTLength/2; ++i) {
             double temp = hammingWindowRe[i];
@@ -152,24 +150,24 @@ void ConstantQ::sparsekernel()
             hammingWindowIm[i + m_FFTLength/2] = temp;
         }
     
-	//do fft of hammingWindow
-	m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
-
-		
-	for (unsigned j=0; j<( m_FFTLength ); j++) 
-	{
-	    // perform thresholding
-	    const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
-	    if (squaredBin <= squareThreshold) continue;
-		
-	    // Insert non-zero position indexes
-	    sk->is.push_back(j);
-	    sk->js.push_back(k);
-
-	    // take conjugate, normalise and add to array sparkernel
-	    sk->real.push_back( transfHammingWindowRe[ j ]/m_FFTLength);
-	    sk->imag.push_back(-transfHammingWindowIm[ j ]/m_FFTLength);
-	}
+        //do fft of hammingWindow
+        m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
+
+
+        for (unsigned j=0; j<( m_FFTLength ); j++) {
+            // perform thresholding
+            const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
+            if (squaredBin <= squareThreshold) {
+                continue;
+            }
+            // Insert non-zero position indexes
+            sk->is.push_back(j);
+            sk->js.push_back(k);
+
+            // take conjugate, normalise and add to array sparkernel
+            sk->real.push_back( transfHammingWindowRe[ j ]/m_FFTLength);
+            sk->imag.push_back(-transfHammingWindowIm[ j ]/m_FFTLength);
+        }
 
     }
 
@@ -259,10 +257,9 @@ double* ConstantQ::process( const double* fftdata )
 
     SparseKernel *sk = m_sparseKernel;
 
-    for (unsigned row=0; row<2*m_uK; row++) 
-    {
-	m_CQdata[ row ] = 0;
-	m_CQdata[ row+1 ] = 0;
+    for (unsigned row=0; row<2*m_uK; row++) {
+        m_CQdata[ row ] = 0;
+        m_CQdata[ row+1 ] = 0;
     }
     const unsigned *fftbin = &(sk->is[0]);
     const unsigned *cqbin  = &(sk->js[0]);
@@ -270,18 +267,19 @@ double* ConstantQ::process( const double* fftdata )
     const double   *imag   = &(sk->imag[0]);
     const unsigned int sparseCells = sk->real.size();
 	
-    for (unsigned i = 0; i<sparseCells; i++)
-    {
-	const unsigned row = cqbin[i];
-	const unsigned col = fftbin[i];
-        if (col == 0) continue;
-	const double & r1  = real[i];
-	const double & i1  = imag[i];
-	const double & r2  = fftdata[ (2*m_FFTLength) - 2*col - 2 ];
-	const double & i2  = fftdata[ (2*m_FFTLength) - 2*col - 2 + 1 ];
-	// add the multiplication
-	m_CQdata[ 2*row  ] += (r1*r2 - i1*i2);
-	m_CQdata[ 2*row+1] += (r1*i2 + i1*r2);
+    for (unsigned i = 0; i<sparseCells; i++) {
+        const unsigned row = cqbin[i];
+        const unsigned col = fftbin[i];
+        if (col == 0) {
+            continue;
+        }
+        const double & r1  = real[i];
+        const double & i1  = imag[i];
+        const double & r2  = fftdata[ (2*m_FFTLength) - 2*col - 2 ];
+        const double & i2  = fftdata[ (2*m_FFTLength) - 2*col - 2 + 1 ];
+        // add the multiplication
+        m_CQdata[ 2*row  ] += (r1*r2 - i1*i2);
+        m_CQdata[ 2*row+1] += (r1*i2 + i1*r2);
     }
 
     return m_CQdata;
@@ -328,10 +326,9 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
 
     SparseKernel *sk = m_sparseKernel;
 
-    for (unsigned row=0; row<m_uK; row++) 
-    {
-	CQRe[ row ] = 0;
-	CQIm[ row ] = 0;
+    for (unsigned row=0; row<m_uK; row++) {
+        CQRe[ row ] = 0;
+        CQIm[ row ] = 0;
     }
 
     const unsigned *fftbin = &(sk->is[0]);
@@ -340,17 +337,18 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
     const double   *imag   = &(sk->imag[0]);
     const unsigned int sparseCells = sk->real.size();
 	
-    for (unsigned i = 0; i<sparseCells; i++)
-    {
-	const unsigned row = cqbin[i];
-	const unsigned col = fftbin[i];
-        if (col == 0) continue;
-	const double & r1  = real[i];
-	const double & i1  = imag[i];
-	const double & r2  = FFTRe[ m_FFTLength - col ];
-	const double & i2  = FFTIm[ m_FFTLength - col ];
-	// add the multiplication
-	CQRe[ row ] += (r1*r2 - i1*i2);
-	CQIm[ row ] += (r1*i2 + i1*r2);
+    for (unsigned i = 0; i<sparseCells; i++) {
+        const unsigned row = cqbin[i];
+        const unsigned col = fftbin[i];
+        if (col == 0) {
+            continue;
+        }
+        const double & r1  = real[i];
+        const double & i1  = imag[i];
+        const double & r2  = FFTRe[ m_FFTLength - col ];
+        const double & i2  = FFTIm[ m_FFTLength - col ];
+        // add the multiplication
+        CQRe[ row ] += (r1*r2 - i1*i2);
+        CQIm[ row ] += (r1*i2 + i1*r2);
     }
 }
diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
index cb8bc2477af..a4abcbca85b 100644
--- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
+++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
@@ -229,6 +229,41 @@ int GetKeyMode::process(double *PCMData)
     }
 
 
+/*
+    // filtered and normalized chromagram
+    for (unsigned int ii = 0; ii < m_BPO; ++ii) {
+      double value = m_MeanHPCP[(ii+m_BPO-1) % m_BPO];
+      if (value > 0 && maxNoteValue > 0.01) {
+          if (value > 0.99) {
+              std::cout << "Î";
+          } else if (value > 0.66) {
+              std::cout << "I";
+          } else if (value > 0.33) {
+              std::cout << "i";
+          } else {
+              std::cout << ";";
+          }
+      }
+      else
+      {
+          if (ii == 3 || ii == 9  || ii == 18 ||  ii == 24 || ii == 30 ||
+              ii == 4 || ii == 10 || ii == 19 ||  ii == 25 || ii == 31 ||
+              ii == 5 || ii == 11 || ii == 20 ||  ii == 26 || ii == 32) {
+              // Mark black keys
+              std::cout << "-";
+          }
+          else {
+              std::cout << "_";
+          }
+      }
+      if (ii % 3 == 2) {
+          std::cout << " ";
+      }
+    }
+*/
+
+
+
     for( k = 0; k < kBinsPerOctave; k++ )
     {
         // The Cromagram has the center of C at bin 0, while the major
diff --git a/lib/qm-dsp/dsp/transforms/FFT.cpp b/lib/qm-dsp/dsp/transforms/FFT.cpp
index da476b8a8b9..6f96b0ee47f 100644
--- a/lib/qm-dsp/dsp/transforms/FFT.cpp
+++ b/lib/qm-dsp/dsp/transforms/FFT.cpp
@@ -10,8 +10,8 @@
 
 #include "maths/MathUtilities.h"
 
-#include "kiss_fft.h"
-#include "kiss_fftr.h"
+#include "ext/kissfft/kiss_fft.h"
+#include "ext/kissfft/tools/kiss_fftr.h"
 
 #include <cmath>
 
diff --git a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
index b8e238b1e2e..8adb0f0b747 100644
--- a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
+++ b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
@@ -13,7 +13,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 */
 
 #include "kiss_fftr.h"
-#include "_kiss_fft_guts.h"
+#include "../_kiss_fft_guts.h"
 
 struct kiss_fftr_state{
     kiss_fft_cfg substate;
diff --git a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
index 72e5a57714d..81d8a8ec171 100644
--- a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
+++ b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
@@ -1,7 +1,7 @@
 #ifndef KISS_FTR_H
 #define KISS_FTR_H
 
-#include "kiss_fft.h"
+#include "../kiss_fft.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/lib/qm-dsp/mixxx-changes.patch b/lib/qm-dsp/mixxx-changes.patch
new file mode 100644
index 00000000000..5e0db497597
--- /dev/null
+++ b/lib/qm-dsp/mixxx-changes.patch
@@ -0,0 +1,272 @@
+diff --git a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
+index 4585ddc..a5b1c12 100644
+--- a/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
++++ b/lib/qm-dsp/dsp/chromagram/ConstantQ.cpp
+@@ -117,31 +117,29 @@ void ConstantQ::sparsekernel()
+ 
+     FFT m_FFT(m_FFTLength);
+ 	
+-    for (unsigned k = m_uK; k--; ) 
+-    {
+-        for (unsigned u=0; u < m_FFTLength; u++) 
+-        {
++    for (unsigned k = m_uK; k--;) {
++        for (unsigned u=0; u < m_FFTLength; u++) {
+             hammingWindowRe[u] = 0;
+             hammingWindowIm[u] = 0;
+         }
+         
+-	// Computing a hamming window
+-	const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
++        const double samplesPerCycle =
++                m_FS / (m_FMin * pow(2, (double)k / (double)m_BPO));
+ 
+-//        cerr << "k = " << k << ", q = " << m_dQ << ", m_FMin = " << m_FMin << ", hammingLength = " << hammingLength << " (rounded up from " << (m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO))) << ")" << endl;
+-        
++        // Computing a hamming window
++        const unsigned hammingLength = (int) ceil(
++                m_dQ * samplesPerCycle);
+ 
+         unsigned origin = m_FFTLength/2 - hammingLength/2;
+ 
+-	for (unsigned i=0; i<hammingLength; i++) 
+-	{
+-	    const double angle = 2*PI*m_dQ*i/hammingLength;
+-	    const double real = cos(angle);
+-	    const double imag = sin(angle);
+-	    const double absol = hamming(hammingLength, i)/hammingLength;
+-	    hammingWindowRe[ origin + i ] = absol*real;
+-	    hammingWindowIm[ origin + i ] = absol*imag;
+-	}
++        for (unsigned i=0; i<hammingLength; i++) {
++            const double angle = 2*PI*i/samplesPerCycle;
++            const double real = cos(angle);
++            const double imag = sin(angle);
++            const double absol = hamming(hammingLength, i)/hammingLength;
++            hammingWindowRe[ origin + i ] = absol*real;
++            hammingWindowIm[ origin + i ] = absol*imag;
++        }
+ 
+         for (unsigned i = 0; i < m_FFTLength/2; ++i) {
+             double temp = hammingWindowRe[i];
+@@ -152,24 +150,24 @@ void ConstantQ::sparsekernel()
+             hammingWindowIm[i + m_FFTLength/2] = temp;
+         }
+     
+-	//do fft of hammingWindow
+-	m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
+-
+-		
+-	for (unsigned j=0; j<( m_FFTLength ); j++) 
+-	{
+-	    // perform thresholding
+-	    const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
+-	    if (squaredBin <= squareThreshold) continue;
+-		
+-	    // Insert non-zero position indexes
+-	    sk->is.push_back(j);
+-	    sk->js.push_back(k);
+-
+-	    // take conjugate, normalise and add to array sparkernel
+-	    sk->real.push_back( transfHammingWindowRe[ j ]/m_FFTLength);
+-	    sk->imag.push_back(-transfHammingWindowIm[ j ]/m_FFTLength);
+-	}
++        //do fft of hammingWindow
++        m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
++
++
++        for (unsigned j=0; j<( m_FFTLength ); j++) {
++            // perform thresholding
++            const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
++            if (squaredBin <= squareThreshold) {
++                continue;
++            }
++            // Insert non-zero position indexes
++            sk->is.push_back(j);
++            sk->js.push_back(k);
++
++            // take conjugate, normalise and add to array sparkernel
++            sk->real.push_back( transfHammingWindowRe[ j ]/m_FFTLength);
++            sk->imag.push_back(-transfHammingWindowIm[ j ]/m_FFTLength);
++        }
+ 
+     }
+ 
+@@ -259,10 +257,9 @@ double* ConstantQ::process( const double* fftdata )
+ 
+     SparseKernel *sk = m_sparseKernel;
+ 
+-    for (unsigned row=0; row<2*m_uK; row++) 
+-    {
+-	m_CQdata[ row ] = 0;
+-	m_CQdata[ row+1 ] = 0;
++    for (unsigned row=0; row<2*m_uK; row++) {
++        m_CQdata[ row ] = 0;
++        m_CQdata[ row+1 ] = 0;
+     }
+     const unsigned *fftbin = &(sk->is[0]);
+     const unsigned *cqbin  = &(sk->js[0]);
+@@ -270,18 +267,19 @@ double* ConstantQ::process( const double* fftdata )
+     const double   *imag   = &(sk->imag[0]);
+     const unsigned int sparseCells = sk->real.size();
+ 	
+-    for (unsigned i = 0; i<sparseCells; i++)
+-    {
+-	const unsigned row = cqbin[i];
+-	const unsigned col = fftbin[i];
+-        if (col == 0) continue;
+-	const double & r1  = real[i];
+-	const double & i1  = imag[i];
+-	const double & r2  = fftdata[ (2*m_FFTLength) - 2*col - 2 ];
+-	const double & i2  = fftdata[ (2*m_FFTLength) - 2*col - 2 + 1 ];
+-	// add the multiplication
+-	m_CQdata[ 2*row  ] += (r1*r2 - i1*i2);
+-	m_CQdata[ 2*row+1] += (r1*i2 + i1*r2);
++    for (unsigned i = 0; i<sparseCells; i++) {
++        const unsigned row = cqbin[i];
++        const unsigned col = fftbin[i];
++        if (col == 0) {
++            continue;
++        }
++        const double & r1  = real[i];
++        const double & i1  = imag[i];
++        const double & r2  = fftdata[ (2*m_FFTLength) - 2*col - 2 ];
++        const double & i2  = fftdata[ (2*m_FFTLength) - 2*col - 2 + 1 ];
++        // add the multiplication
++        m_CQdata[ 2*row  ] += (r1*r2 - i1*i2);
++        m_CQdata[ 2*row+1] += (r1*i2 + i1*r2);
+     }
+ 
+     return m_CQdata;
+@@ -328,10 +326,9 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
+ 
+     SparseKernel *sk = m_sparseKernel;
+ 
+-    for (unsigned row=0; row<m_uK; row++) 
+-    {
+-	CQRe[ row ] = 0;
+-	CQIm[ row ] = 0;
++    for (unsigned row=0; row<m_uK; row++) {
++        CQRe[ row ] = 0;
++        CQIm[ row ] = 0;
+     }
+ 
+     const unsigned *fftbin = &(sk->is[0]);
+@@ -340,17 +337,18 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
+     const double   *imag   = &(sk->imag[0]);
+     const unsigned int sparseCells = sk->real.size();
+ 	
+-    for (unsigned i = 0; i<sparseCells; i++)
+-    {
+-	const unsigned row = cqbin[i];
+-	const unsigned col = fftbin[i];
+-        if (col == 0) continue;
+-	const double & r1  = real[i];
+-	const double & i1  = imag[i];
+-	const double & r2  = FFTRe[ m_FFTLength - col ];
+-	const double & i2  = FFTIm[ m_FFTLength - col ];
+-	// add the multiplication
+-	CQRe[ row ] += (r1*r2 - i1*i2);
+-	CQIm[ row ] += (r1*i2 + i1*r2);
++    for (unsigned i = 0; i<sparseCells; i++) {
++        const unsigned row = cqbin[i];
++        const unsigned col = fftbin[i];
++        if (col == 0) {
++            continue;
++        }
++        const double & r1  = real[i];
++        const double & i1  = imag[i];
++        const double & r2  = FFTRe[ m_FFTLength - col ];
++        const double & i2  = FFTIm[ m_FFTLength - col ];
++        // add the multiplication
++        CQRe[ row ] += (r1*r2 - i1*i2);
++        CQIm[ row ] += (r1*i2 + i1*r2);
+     }
+ }
+diff --git a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
+index cb8bc24..a4abcbc 100644
+--- a/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
++++ b/lib/qm-dsp/dsp/keydetection/GetKeyMode.cpp
+@@ -229,6 +229,41 @@ int GetKeyMode::process(double *PCMData)
+     }
+ 
+ 
++/*
++    // filtered and normalized chromagram
++    for (unsigned int ii = 0; ii < m_BPO; ++ii) {
++      double value = m_MeanHPCP[(ii+m_BPO-1) % m_BPO];
++      if (value > 0 && maxNoteValue > 0.01) {
++          if (value > 0.99) {
++              std::cout << "Î";
++          } else if (value > 0.66) {
++              std::cout << "I";
++          } else if (value > 0.33) {
++              std::cout << "i";
++          } else {
++              std::cout << ";";
++          }
++      }
++      else
++      {
++          if (ii == 3 || ii == 9  || ii == 18 ||  ii == 24 || ii == 30 ||
++              ii == 4 || ii == 10 || ii == 19 ||  ii == 25 || ii == 31 ||
++              ii == 5 || ii == 11 || ii == 20 ||  ii == 26 || ii == 32) {
++              // Mark black keys
++              std::cout << "-";
++          }
++          else {
++              std::cout << "_";
++          }
++      }
++      if (ii % 3 == 2) {
++          std::cout << " ";
++      }
++    }
++*/
++
++
++
+     for( k = 0; k < kBinsPerOctave; k++ )
+     {
+         // The Cromagram has the center of C at bin 0, while the major
+diff --git a/lib/qm-dsp/dsp/transforms/FFT.cpp b/lib/qm-dsp/dsp/transforms/FFT.cpp
+index da476b8..6f96b0e 100644
+--- a/lib/qm-dsp/dsp/transforms/FFT.cpp
++++ b/lib/qm-dsp/dsp/transforms/FFT.cpp
+@@ -10,8 +10,8 @@
+ 
+ #include "maths/MathUtilities.h"
+ 
+-#include "kiss_fft.h"
+-#include "kiss_fftr.h"
++#include "ext/kissfft/kiss_fft.h"
++#include "ext/kissfft/tools/kiss_fftr.h"
+ 
+ #include <cmath>
+ 
+diff --git a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
+index b8e238b..8adb0f0 100644
+--- a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
++++ b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.c
+@@ -13,7 +13,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ */
+ 
+ #include "kiss_fftr.h"
+-#include "_kiss_fft_guts.h"
++#include "../_kiss_fft_guts.h"
+ 
+ struct kiss_fftr_state{
+     kiss_fft_cfg substate;
+diff --git a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
+index 72e5a57..81d8a8e 100644
+--- a/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
++++ b/lib/qm-dsp/ext/kissfft/tools/kiss_fftr.h
+@@ -1,7 +1,7 @@
+ #ifndef KISS_FTR_H
+ #define KISS_FTR_H
+ 
+-#include "kiss_fft.h"
++#include "../kiss_fft.h"
+ #ifdef __cplusplus
+ extern "C" {
+ #endif