Skip to content

Commit

Permalink
google_rtc_audio_processing: Support multiple sample formats
Browse files Browse the repository at this point in the history
The processing code is now almost entirely sample-format-independent,
so finish the job:

Add kconfigs to control the sample formats of both the reference and
capture streams.  S32_LE and S16_LE are currently supported.

Use the float32 variant of the AEC library instead of the int16
wrapper.  The internal API has always been floating point, this just
moves the conversion into the wrapper code.  Longer term this is
probably a good thing for multiprocessor performance as it allows the
conversions to be decoupled from the analysis code and moved to
another component on another core.  (Not in this version though, it's
still synchronous here).

Signed-off-by: Andy Ross <andyross@google.com>
  • Loading branch information
andyross committed Dec 10, 2023
1 parent 6157326 commit 0400784
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 41 deletions.
14 changes: 14 additions & 0 deletions src/audio/google/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,18 @@ config GOOGLE_RTC_AUDIO_PROCESSING_MOCK
Mock Google real-time communication audio processing.
It allows for compilation check and basic audio flow checking.

config GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS
int "Sample format for capture input/output"
default 16
help
Bits in the signed integer sample format used for the
capture stream. 16 or 32.

config GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS
int "Sample format for reference input"
default 16
help
Bits in the signed integer sample format used for playback
reference. 16 or 32.

endmenu
139 changes: 98 additions & 41 deletions src/audio/google/google_rtc_audio_processing.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ uint8_t aec_mem_blob[CONFIG_COMP_GOOGLE_RTC_AUDIO_PROCESSING_MEMORY_BUFFER_SIZE_
#define REFOUT_CHAN MAX(REF_CHAN_MAX, MIC_CHAN_MAX)

static __aligned(PLATFORM_DCACHE_ALIGN)
int16_t refoutbuf[sizeof(uint16_t) * NUM_FRAMES * REF_CHAN_MAX];
float refoutbuf[REFOUT_CHAN][NUM_FRAMES];

static __aligned(PLATFORM_DCACHE_ALIGN)
int16_t micbuf[sizeof(uint16_t) * NUM_FRAMES * REFOUT_CHAN];
float micbuf[MIC_CHAN_MAX][NUM_FRAMES];

struct google_rtc_audio_processing_comp_data {
#if CONFIG_IPC_MAJOR_4
Expand All @@ -84,9 +84,9 @@ struct google_rtc_audio_processing_comp_data {
int num_capture_channels;
GoogleRtcAudioProcessingState *state;
int aec_reference_frame_index;
int16_t *raw_mic_buffer;
float *raw_mic_buffers[MIC_CHAN_MAX];
int raw_mic_buffer_frame_index;
int16_t *refout_buffer;
float *refout_buffers[REFOUT_CHAN];
int output_buffer_frame_index;
struct comp_data_blob_handler *tuning_handler;
bool reconfigure;
Expand All @@ -95,6 +95,37 @@ struct google_rtc_audio_processing_comp_data {
struct comp_buffer *ref_comp_buffer;
};

#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_MIC_BITS == 16
typedef int16_t mic_sample_t;
#define MIC_SCALE ((float)SHRT_MAX)
#else
typedef int32_t mic_sample_t;
#define MIC_SCALE ((float)INT_MAX)
#endif

#if CONFIG_GOOGLE_RTC_AUDIO_PROCESSING_REF_BITS == 16
typedef int16_t ref_sample_t;
#define REF_SCALE ((float)SHRT_MAX)
#else
typedef int32_t ref_sample_t;
#define REF_SCALE ((float)INT_MAX)
#endif

static inline float mic_to_float(mic_sample_t x)
{
return (1.0f / MIC_SCALE) * (float)x;
}

static inline mic_sample_t float_to_mic(float x)
{
return (mic_sample_t)(MIC_SCALE * x);
}

static inline float ref_to_float(ref_sample_t x)
{
return (1.0f / REF_SCALE) * (float)x;
}

void *GoogleRtcMalloc(size_t size)
{
return rballoc(0, SOF_MEM_CAPS_RAM, size);
Expand Down Expand Up @@ -398,7 +429,7 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
struct module_data *md = &mod->priv;
struct comp_dev *dev = mod->dev;
struct google_rtc_audio_processing_comp_data *cd;
int ret;
int ret, i;

comp_info(dev, "google_rtc_audio_processing_init()");

Expand Down Expand Up @@ -466,13 +497,10 @@ static int google_rtc_audio_processing_init(struct processing_module *mod)
goto fail;
}

cd->raw_mic_buffer = &micbuf[0];
cd->refout_buffer = &refoutbuf[0];

#ifdef __ZEPHYR__
cd->raw_mic_buffer = arch_xtensa_cached_ptr(cd->raw_mic_buffer);
cd->refout_buffer = &refoutbuf[0];
#endif
for (i = 0; i < MIC_CHAN_MAX; i++)
cd->raw_mic_buffers[i] = arch_xtensa_cached_ptr(&micbuf[i][0]);
for (i = 0; i < REFOUT_CHAN; i++)
cd->refout_buffers[i] = arch_xtensa_cached_ptr(&refoutbuf[i][0]);

cd->raw_mic_buffer_frame_index = 0;
cd->aec_reference_frame_index = 0;
Expand Down Expand Up @@ -583,15 +611,12 @@ static int google_rtc_audio_processing_prepare(struct processing_module *mod,
rate = audio_stream_get_rate(&output->stream);
output_stream_channels = audio_stream_get_channels(&output->stream);

if (cd->num_capture_channels > microphone_stream_channels) {
comp_err(dev, "unsupported number of microphone channels: %d",
microphone_stream_channels);
if (microphone_stream_channels != output_stream_channels)
return -EINVAL;
}

if (cd->num_capture_channels > output_stream_channels) {
comp_err(dev, "unsupported number of output channels: %d",
output_stream_channels);
if (microphone_stream_channels > MIC_CHAN_MAX) {
comp_warn(dev, "Too many mic channels: %d (max %d), truncating",
microphone_stream_channels, MIC_CHAN_MAX);
return -EINVAL;
}

Expand Down Expand Up @@ -649,60 +674,92 @@ static int google_rtc_audio_processing_reset(struct processing_module *mod)
/* FunctionMostlyExistsToKeepLineLengthsUnderControl */
static inline void execute_aec(struct google_rtc_audio_processing_comp_data *cd)
{
/* FIXME: sample/frame format is platform dependent, these are
* hard-configured format APIs and need indirection. Note
* that the calling code in process() is format-independent.
*/
/* Note that reference input and mic output share the same
* buffer for efficiency
*/
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
cd->refout_buffer);
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
cd->raw_mic_buffer,
cd->refout_buffer);
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
(const float **)cd->refout_buffers);
GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
(const float **)cd->raw_mic_buffers,
cd->refout_buffers);
cd->raw_mic_buffer_frame_index = 0;
}

static void source_copy(struct sof_source *src, int frames, int16_t *dst)
static void mic_in_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = source_get_channels(src);
size_t chan = MIN(MIC_CHAN_MAX, source_get_channels(src));
size_t samples = frames * chan;
size_t bytes = samples * sizeof(int16_t);
const int16_t *buf, *bufstart, *bufend;
size_t bytes = samples * sizeof(mic_sample_t);
const mic_sample_t *buf, *bufstart, *bufend;
float *dst[MIC_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
*dst++ = *buf++;
*dst[c]++ = mic_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
}
source_release_data(src, bytes);
}

static void sink_copy(struct sof_sink *sink, int frames, int16_t *src)
/* Nearly verbatim except for types. Needs macro/inlining attention */
static void ref_copy(struct sof_source *src, int frames, float **dst_bufs, int frame0)
{
size_t chan = sink_get_channels(sink);
size_t chan = MIN(REF_CHAN_MAX, source_get_channels(src));
size_t samples = frames * chan;
size_t bytes = samples * sizeof(int16_t);
int16_t *buf, *bufstart, *bufend;
size_t bytes = samples * sizeof(ref_sample_t);
const ref_sample_t *buf, *bufstart, *bufend;
float *dst[REF_CHAN_MAX];
int i, c, err;
size_t bufsz;

for (i = 0; i < chan; i++)
dst[i] = &dst_bufs[i][frame0];

err = source_get_data(src, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
*dst[c]++ = ref_to_float(*buf++);
if (buf >= bufend)
buf = bufstart;
}
}
source_release_data(src, bytes);
}

static void mic_out_copy(struct sof_sink *sink, int frames, float **src_bufs)
{
size_t chan = MIN(MIC_CHAN_MAX, sink_get_channels(sink));
size_t samples = frames * chan;
size_t bytes = samples * sizeof(mic_sample_t);
mic_sample_t *buf, *bufstart, *bufend;
int i, c, err;
size_t bufsz;
float *src[MIC_CHAN_MAX];

for (i = 0; i < chan; i++)
src[i] = src_bufs[i];

err = sink_get_buffer(sink, bytes, (void *)&buf, (void *)&bufstart, &bufsz);
assert(err == 0);
bufend = &bufstart[bufsz];

for (i = 0; i < frames; i++) {
for (c = 0; c < chan; c++) {
*buf++ = *src++;
*buf++ = float_to_mic(*src[c]++);
if (buf >= bufend)
buf = bufstart;
}
Expand Down Expand Up @@ -750,16 +807,16 @@ static int mod_process(struct processing_module *mod, struct sof_source **source
int smic = cd->raw_mic_buffer_frame_index * source_get_channels(mic);
int sref = cd->aec_reference_frame_index * source_get_channels(ref);

source_copy(mic, n, &cd->raw_mic_buffer[smic]);
mic_in_copy(mic, n, cd->raw_mic_buffers, smic);

if (ref_ok)
source_copy(ref, n, &cd->refout_buffer[sref]);
ref_copy(ref, n, cd->refout_buffers, sref);

cd->raw_mic_buffer_frame_index += n;

if (cd->raw_mic_buffer_frame_index >= cd->num_frames) {
execute_aec(cd);
sink_copy(out, n, cd->refout_buffer);
mic_out_copy(out, n, cd->refout_buffers);
}
}
return 0;
Expand Down

0 comments on commit 0400784

Please sign in to comment.