Closed #589: Update Speex AEC to the latest version to get multichannel EC

git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@6129 74dad513-b988-da41-8d7b-12977e46ad98
This commit is contained in:
Sauw Ming 2020-01-09 09:05:50 +00:00
parent 3147bc0235
commit e98fe43314
25 changed files with 1467 additions and 547 deletions

View File

@ -99,7 +99,7 @@ static struct ec_operations echo_supp_op =
#if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0
static struct ec_operations speex_aec_op =
{
"AEC",
"Speex AEC",
&speex_aec_create,
&speex_aec_destroy,
&speex_aec_reset,

View File

@ -35,10 +35,12 @@
typedef struct speex_ec
{
SpeexEchoState *state;
SpeexPreprocessState *preprocess;
SpeexDecorrState *decorr;
SpeexPreprocessState **preprocess;
unsigned samples_per_frame;
unsigned prefetch;
unsigned channel_count;
unsigned spf_per_channel;
unsigned options;
pj_int16_t *tmp_frame;
} speex_ec;
@ -57,18 +59,20 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
void **p_echo )
{
speex_ec *echo;
int sampling_rate;
int i, sampling_rate;
*p_echo = NULL;
echo = PJ_POOL_ZALLOC_T(pool, speex_ec);
PJ_ASSERT_RETURN(echo != NULL, PJ_ENOMEM);
echo->channel_count = channel_count;
echo->samples_per_frame = samples_per_frame;
echo->spf_per_channel = samples_per_frame / channel_count;
echo->options = options;
#if 0
echo->state = speex_echo_state_init_mc(echo->samples_per_frame,
#if 1
echo->state = speex_echo_state_init_mc(echo->spf_per_channel,
clock_rate * tail_ms / 1000,
channel_count, channel_count);
#else
@ -79,57 +83,63 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
echo->state = speex_echo_state_init(echo->samples_per_frame,
clock_rate * tail_ms / 1000);
#endif
if (echo->state == NULL) {
if (echo->state == NULL)
return PJ_ENOMEM;
echo->decorr = speex_decorrelate_new(clock_rate, channel_count,
echo->spf_per_channel);
if (echo->decorr == NULL)
return PJ_ENOMEM;
}
/* Set sampling rate */
sampling_rate = clock_rate;
speex_echo_ctl(echo->state, SPEEX_ECHO_SET_SAMPLING_RATE,
&sampling_rate);
echo->preprocess = speex_preprocess_state_init(echo->samples_per_frame,
clock_rate);
if (echo->preprocess == NULL) {
speex_echo_state_destroy(echo->state);
return PJ_ENOMEM;
}
/* Disable all preprocessing, we only want echo cancellation */
#if 0
disabled = 0;
enabled = 1;
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DENOISE,
&enabled);
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_AGC,
&disabled);
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_VAD,
&disabled);
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DEREVERB,
&enabled);
#endif
/* Enable/disable AGC & denoise */
{
/* We need to create one state per channel processed. */
echo->preprocess = PJ_POOL_ZALLOC_T(pool, SpeexPreprocessState *);
for (i = 0; i < channel_count; i++) {
spx_int32_t enabled;
echo->preprocess[i] = speex_preprocess_state_init(
echo->spf_per_channel, clock_rate);
if (echo->preprocess[i] == NULL) {
speex_aec_destroy(echo);
return PJ_ENOMEM;
}
/* Enable/disable AGC & denoise */
enabled = PJMEDIA_SPEEX_AEC_USE_AGC;
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_AGC,
speex_preprocess_ctl(echo->preprocess[i], SPEEX_PREPROCESS_SET_AGC,
&enabled);
enabled = PJMEDIA_SPEEX_AEC_USE_DENOISE;
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DENOISE,
speex_preprocess_ctl(echo->preprocess[i],
SPEEX_PREPROCESS_SET_DENOISE, &enabled);
/* Currently, VAD and dereverb are set at default setting. */
/*
enabled = 1;
speex_preprocess_ctl(echo->preprocess[i], SPEEX_PREPROCESS_SET_VAD,
&enabled);
speex_preprocess_ctl(echo->preprocess[i],
SPEEX_PREPROCESS_SET_DEREVERB,
&enabled);
*/
/* Control echo cancellation in the preprocessor */
speex_preprocess_ctl(echo->preprocess[i],
SPEEX_PREPROCESS_SET_ECHO_STATE, echo->state);
}
/* Control echo cancellation in the preprocessor */
speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_ECHO_STATE,
echo->state);
/* Create temporary frame for echo cancellation */
echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2*samples_per_frame);
PJ_ASSERT_RETURN(echo->tmp_frame != NULL, PJ_ENOMEM);
echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, sizeof(pj_int16_t) *
channel_count *
samples_per_frame);
if (!echo->tmp_frame) {
speex_aec_destroy(echo);
return PJ_ENOMEM;
}
/* Done */
*p_echo = echo;
@ -144,6 +154,7 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
PJ_DEF(pj_status_t) speex_aec_destroy(void *state )
{
speex_ec *echo = (speex_ec*) state;
unsigned i;
PJ_ASSERT_RETURN(echo && echo->state, PJ_EINVAL);
@ -152,8 +163,18 @@ PJ_DEF(pj_status_t) speex_aec_destroy(void *state )
echo->state = NULL;
}
if (echo->decorr) {
speex_decorrelate_destroy(echo->decorr);
echo->decorr = NULL;
}
if (echo->preprocess) {
speex_preprocess_state_destroy(echo->preprocess);
for (i = 0; i < echo->channel_count; i++) {
if (echo->preprocess[i]) {
speex_preprocess_state_destroy(echo->preprocess[i]);
echo->preprocess[i] = NULL;
}
}
echo->preprocess = NULL;
}
@ -181,6 +202,7 @@ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state,
void *reserved )
{
speex_ec *echo = (speex_ec*) state;
unsigned i;
/* Sanity checks */
PJ_ASSERT_RETURN(echo && rec_frm && play_frm && options==0 &&
@ -192,8 +214,27 @@ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state,
(spx_int16_t*)echo->tmp_frame);
/* Preprocess output */
speex_preprocess_run(echo->preprocess, (spx_int16_t*)echo->tmp_frame);
/* Preprocess output per channel */
for (i = 0; i < echo->channel_count; i++) {
spx_int16_t *buf = (spx_int16_t*)echo->tmp_frame;
unsigned j;
/* De-interleave each channel. */
if (echo->channel_count > 1) {
for (j = 0; j < echo->spf_per_channel; j++) {
rec_frm[j] = echo->tmp_frame[j * echo->channel_count + i];
}
buf = (spx_int16_t*)rec_frm;
}
speex_preprocess_run(echo->preprocess[i], buf);
if (echo->channel_count > 1) {
for (j = 0; j < echo->spf_per_channel; j++) {
echo->tmp_frame[j * echo->channel_count + i] = rec_frm[j];
}
}
}
/* Copy temporary buffer back to original rec_frm */
pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame);
@ -213,6 +254,15 @@ PJ_DEF(pj_status_t) speex_aec_playback( void *state,
/* Sanity checks */
PJ_ASSERT_RETURN(echo && play_frm, PJ_EINVAL);
/* Channel decorrelation algorithm is useful for multi-channel echo
* cancellation only .
*/
if (echo->channel_count > 1) {
pjmedia_copy_samples(echo->tmp_frame, play_frm, echo->samples_per_frame);
speex_decorrelate(echo->decorr, (spx_int16_t*)echo->tmp_frame,
(spx_int16_t*)play_frm, 100);
}
speex_echo_playback(echo->state, (spx_int16_t*)play_frm);
return PJ_SUCCESS;
@ -227,6 +277,7 @@ PJ_DEF(pj_status_t) speex_aec_capture( void *state,
unsigned options )
{
speex_ec *echo = (speex_ec*) state;
unsigned i;
/* Sanity checks */
PJ_ASSERT_RETURN(echo && rec_frm, PJ_EINVAL);
@ -234,13 +285,33 @@ PJ_DEF(pj_status_t) speex_aec_capture( void *state,
PJ_UNUSED_ARG(options);
/* Cancel echo */
pjmedia_copy_samples(echo->tmp_frame, rec_frm, echo->samples_per_frame);
speex_echo_capture(echo->state,
(spx_int16_t*)echo->tmp_frame,
(spx_int16_t*)rec_frm);
(spx_int16_t*)rec_frm,
(spx_int16_t*)echo->tmp_frame);
/* Apply preprocessing */
speex_preprocess_run(echo->preprocess, (spx_int16_t*)rec_frm);
/* Apply preprocessing per channel. */
for (i = 0; i < echo->channel_count; i++) {
spx_int16_t *buf = (spx_int16_t*)echo->tmp_frame;
unsigned j;
/* De-interleave each channel. */
if (echo->channel_count > 1) {
for (j = 0; j < echo->spf_per_channel; j++) {
rec_frm[j] = echo->tmp_frame[j * echo->channel_count + i];
}
buf = (spx_int16_t*)rec_frm;
}
speex_preprocess_run(echo->preprocess[i], buf);
if (echo->channel_count > 1) {
for (j = 0; j < echo->spf_per_channel; j++) {
echo->tmp_frame[j * echo->channel_count + i] = rec_frm[j];
}
}
}
pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame);
return PJ_SUCCESS;
}

View File

@ -39,7 +39,7 @@ export SPEEX_OBJS = bits.o cb_search.o exc_10_16_table.o \
nb_celp.o preprocess.o \
quant_lsp.o resample.o sb_celp.o smallft.o \
speex.o speex_callbacks.o speex_header.o \
stereo.o vbr.o vq.o window.o
scal.o stereo.o vbr.o vq.o window.o
export SPEEX_CFLAGS = -DHAVE_CONFIG_H $(_CFLAGS)
export SPEEX_LDFLAGS := $(PJLIB_LDLIB) $(_LDFLAGS)

View File

@ -34,7 +34,7 @@
#ifndef SPEEX_BUFFER_H
#define SPEEX_BUFFER_H
#include "speex/speex_types.h"
#include "speexdsp_types.h"
#ifdef __cplusplus
extern "C" {

View File

@ -37,7 +37,7 @@
* This is the acoustic echo canceller module.
* @{
*/
#include "speex/speex_types.h"
#include "speexdsp_types.h"
#ifdef __cplusplus
extern "C" {

View File

@ -41,7 +41,7 @@
* @{
*/
#include "speex/speex_types.h"
#include "speexdsp_types.h"
#ifdef __cplusplus
extern "C" {

View File

@ -43,7 +43,7 @@
* @{
*/
#include "speex/speex_types.h"
#include "speexdsp_types.h"
#ifdef __cplusplus
extern "C" {

View File

@ -82,9 +82,11 @@
#define spx_uint16_t unsigned short
#define spx_uint32_t unsigned int
#define speex_assert(cond)
#else /* OUTSIDE_SPEEX */
#include "speex/speex_types.h"
#include "speexdsp_types.h"
#endif /* OUTSIDE_SPEEX */
@ -104,6 +106,7 @@ enum {
RESAMPLER_ERR_BAD_STATE = 2,
RESAMPLER_ERR_INVALID_ARG = 3,
RESAMPLER_ERR_PTR_OVERLAP = 4,
RESAMPLER_ERR_OVERFLOW = 5,
RESAMPLER_ERR_MAX_ERROR
};
@ -302,12 +305,12 @@ void speex_resampler_set_output_stride(SpeexResamplerState *st,
void speex_resampler_get_output_stride(SpeexResamplerState *st,
spx_uint32_t *stride);
/** Get the latency in input samples introduced by the resampler.
/** Get the latency introduced by the resampler measured in input samples.
* @param st Resampler state
*/
int speex_resampler_get_input_latency(SpeexResamplerState *st);
/** Get the latency in output samples introduced by the resampler.
/** Get the latency introduced by the resampler measured in output samples.
* @param st Resampler state
*/
int speex_resampler_get_output_latency(SpeexResamplerState *st);

View File

@ -0,0 +1,126 @@
/* speexdsp_types.h taken from libogg */
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: #ifdef jail to whip a few platforms into the UNIX ideal.
last mod: $Id$
********************************************************************/
/**
@file speexdsp_types.h
@brief Speex types
*/
#ifndef _SPEEX_TYPES_H
#define _SPEEX_TYPES_H
#if defined(_WIN32)
# if defined(__CYGWIN__)
# include <_G_config.h>
typedef _G_int32_t spx_int32_t;
typedef _G_uint32_t spx_uint32_t;
typedef _G_int16_t spx_int16_t;
typedef _G_uint16_t spx_uint16_t;
# elif defined(__MINGW32__)
typedef short spx_int16_t;
typedef unsigned short spx_uint16_t;
typedef int spx_int32_t;
typedef unsigned int spx_uint32_t;
# elif defined(__MWERKS__)
typedef int spx_int32_t;
typedef unsigned int spx_uint32_t;
typedef short spx_int16_t;
typedef unsigned short spx_uint16_t;
# else
/* MSVC/Borland */
typedef __int32 spx_int32_t;
typedef unsigned __int32 spx_uint32_t;
typedef __int16 spx_int16_t;
typedef unsigned __int16 spx_uint16_t;
# endif
#elif defined(__MACOS__)
# include <sys/types.h>
typedef SInt16 spx_int16_t;
typedef UInt16 spx_uint16_t;
typedef SInt32 spx_int32_t;
typedef UInt32 spx_uint32_t;
#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
# include <sys/types.h>
typedef int16_t spx_int16_t;
typedef u_int16_t spx_uint16_t;
typedef int32_t spx_int32_t;
typedef u_int32_t spx_uint32_t;
#elif defined(__BEOS__)
/* Be */
# include <inttypes.h>
typedef int16_t spx_int16_t;
typedef u_int16_t spx_uint16_t;
typedef int32_t spx_int32_t;
typedef u_int32_t spx_uint32_t;
#elif defined (__EMX__)
/* OS/2 GCC */
typedef short spx_int16_t;
typedef unsigned short spx_uint16_t;
typedef int spx_int32_t;
typedef unsigned int spx_uint32_t;
#elif defined (DJGPP)
/* DJGPP */
typedef short spx_int16_t;
typedef int spx_int32_t;
typedef unsigned int spx_uint32_t;
#elif defined(R5900)
/* PS2 EE */
typedef int spx_int32_t;
typedef unsigned spx_uint32_t;
typedef short spx_int16_t;
#elif defined(__SYMBIAN32__)
/* Symbian GCC */
typedef signed short spx_int16_t;
typedef unsigned short spx_uint16_t;
typedef signed int spx_int32_t;
typedef unsigned int spx_uint32_t;
#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
typedef short spx_int16_t;
typedef unsigned short spx_uint16_t;
typedef long spx_int32_t;
typedef unsigned long spx_uint32_t;
#elif defined(CONFIG_TI_C6X)
typedef short spx_int16_t;
typedef unsigned short spx_uint16_t;
typedef int spx_int32_t;
typedef unsigned int spx_uint32_t;
#else
#include "speexdsp_config_types.h"
#endif
#endif /* _SPEEX_TYPES_H */

View File

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -49,7 +49,7 @@
#ifdef FLOATING_POINT
#error You cannot compile as floating point and fixed point at the same time
#endif
#ifdef _USE_SSE
#ifdef USE_SSE
#error SSE is only for floating-point
#endif
#if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
@ -75,7 +75,7 @@
#endif
#ifndef OUTSIDE_SPEEX
#include "speex/speex_types.h"
#include "speex/speexdsp_types.h"
#endif
#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */
@ -89,7 +89,7 @@
#ifdef FIXED_POINT
typedef spx_int16_t spx_word16_t;
typedef spx_int32_t spx_word32_t;
typedef spx_int32_t spx_word32_t;
typedef spx_word32_t spx_mem_t;
typedef spx_word16_t spx_coef_t;
typedef spx_word16_t spx_lsp_t;
@ -109,6 +109,8 @@ typedef spx_word32_t spx_sig_t;
#define SIG_SHIFT 14
#define GAIN_SHIFT 6
#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
#define VERY_SMALL 0
#define VERY_LARGE32 ((spx_word32_t)2147483647)
#define VERY_LARGE16 ((spx_word16_t)32767)
@ -171,6 +173,7 @@ typedef float spx_word32_t;
#define VSHR32(a,shift) (a)
#define SATURATE16(x,a) (x)
#define SATURATE32(x,a) (x)
#define SATURATE32PSHR(x,shift,a) (x)
#define PSHR(a,shift) (a)
#define SHR(a,shift) (a)
@ -210,18 +213,19 @@ typedef float spx_word32_t;
#define DIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b))
#define PDIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b))
#define WORD2INT(x) ((x) < -32767.5f ? -32768 : \
((x) > 32766.5f ? 32767 : (spx_int16_t)floor(.5 + (x))))
#endif
#if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
/* 2 on TI C5x DSP */
#define BYTES_PER_CHAR 2
#define BYTES_PER_CHAR 2
#define BITS_PER_CHAR 16
#define LOG2_BITS_PER_CHAR 4
#else
#else
#define BYTES_PER_CHAR 1
#define BITS_PER_CHAR 8

15
third_party/speex/libspeex/bfin.h vendored Normal file
View File

@ -0,0 +1,15 @@
/* Common Blackfin assembly defines
*
* Copyright (C) 2005-2009 Analog Devices
*/
#if __GNUC__ <= 3
/* GCC-3.4 and older did not use hardware loops and thus did not have
* register constraints for declaring clobbers.
*/
# define BFIN_HWLOOP0_REGS
# define BFIN_HWLOOP1_REGS
#else
# define BFIN_HWLOOP0_REGS , "LB0", "LT0", "LC0"
# define BFIN_HWLOOP1_REGS , "LB1", "LT1", "LC1"
#endif

View File

@ -1,5 +1,5 @@
/* Copyright (C) 2007 Jean-Marc Valin
File: buffer.c
This is a very simple ring buffer implementation. It is not thread-safe
so you need to do your own locking.
@ -38,7 +38,7 @@
#include "os_support.h"
#include "arch.h"
#include <speex/speex_buffer.h>
#include "speex/speex_buffer.h"
struct SpeexBuffer_ {
char *data;
@ -99,7 +99,7 @@ EXPORT int speex_buffer_write(SpeexBuffer *st, void *_data, int len)
EXPORT int speex_buffer_writezeros(SpeexBuffer *st, int len)
{
/* This is almost the same as for speex_buffer_write() but using
/* This is almost the same as for speex_buffer_write() but using
SPEEX_MEMSET() instead of SPEEX_COPY(). Update accordingly. */
int end;
int end1;
@ -135,7 +135,7 @@ EXPORT int speex_buffer_read(SpeexBuffer *st, void *_data, int len)
char *data = _data;
if (len > st->available)
{
SPEEX_MEMSET(data+st->available, 0, st->size-st->available);
SPEEX_MEMSET(data+st->available, 0, len - st->available);
len = st->available;
}
end = st->read_ptr + len;

View File

@ -1,23 +1,23 @@
/* Copyright (C) 2005-2006 Jean-Marc Valin
/* Copyright (C) 2005-2006 Jean-Marc Valin
File: fftwrap.c
Wrapper for various FFTs
Wrapper for various FFTs
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -62,7 +62,7 @@ static int maximize_range(spx_word16_t *in, spx_word16_t *out, spx_word16_t boun
for (i=0;i<len;i++)
{
out[i] = SHL16(in[i], shift);
}
}
return shift;
}
@ -165,6 +165,57 @@ void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
DftiComputeBackward(t->desc, in, out);
}
#elif defined(USE_INTEL_IPP)
#include <ipps.h>
struct ipp_fft_config
{
IppsDFTSpec_R_32f *dftSpec;
Ipp8u *buffer;
};
void *spx_fft_init(int size)
{
int bufferSize = 0;
int hint;
struct ipp_fft_config *table;
table = (struct ipp_fft_config *)speex_alloc(sizeof(struct ipp_fft_config));
/* there appears to be no performance difference between ippAlgHintFast and
ippAlgHintAccurate when using the with the floating point version
of the fft. */
hint = ippAlgHintAccurate;
ippsDFTInitAlloc_R_32f(&table->dftSpec, size, IPP_FFT_DIV_FWD_BY_N, hint);
ippsDFTGetBufSize_R_32f(table->dftSpec, &bufferSize);
table->buffer = ippsMalloc_8u(bufferSize);
return table;
}
void spx_fft_destroy(void *table)
{
struct ipp_fft_config *t = (struct ipp_fft_config *)table;
ippsFree(t->buffer);
ippsDFTFree_R_32f(t->dftSpec);
speex_free(t);
}
void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out)
{
struct ipp_fft_config *t = (struct ipp_fft_config *)table;
ippsDFTFwd_RToPack_32f(in, out, t->dftSpec, t->buffer);
}
void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
{
struct ipp_fft_config *t = (struct ipp_fft_config *)table;
ippsDFTInv_PackToR_32f(in, out, t->dftSpec, t->buffer);
}
#elif defined(USE_GPL_FFTW3)
#include <fftw3.h>
@ -219,7 +270,7 @@ void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out)
out[i] = optr[i+1];
}
void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
{
int i;
struct fftw_config *t = (struct fftw_config *) table;
@ -234,7 +285,7 @@ void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
iptr[N+1] = 0.0f;
fftwf_execute(t->ifft);
for(i=0;i<N;++i)
out[i] = optr[i];
}

View File

@ -8,18 +8,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,6 +36,8 @@
#ifndef FIXED_BFIN_H
#define FIXED_BFIN_H
#include "bfin.h"
#undef PDIV32_16
static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
{
@ -57,7 +59,7 @@ static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
"%0 = R0;\n\t"
: "=m" (res)
: "m" (a), "m" (bb)
: "P0", "R0", "R1", "cc");
: "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
return res;
}
@ -66,9 +68,9 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
{
spx_word32_t res, bb;
bb = b;
/* Make the roundinf consistent with the C version
/* Make the roundinf consistent with the C version
(do we need to do that?)*/
if (a<0)
if (a<0)
a += (b-1);
__asm__ (
"P0 = 15;\n\t"
@ -84,7 +86,7 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
"%0 = R0;\n\t"
: "=m" (res)
: "m" (a), "m" (bb)
: "P0", "R0", "R1", "cc");
: "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
return res;
}
@ -98,6 +100,7 @@ static inline spx_word16_t MAX16(spx_word16_t a, spx_word16_t b)
"%0 = MAX(%1,%2);"
: "=d" (res)
: "%d" (a), "d" (b)
: "ASTAT"
);
return res;
}
@ -113,7 +116,7 @@ static inline spx_word32_t MULT16_32_Q15(spx_word16_t a, spx_word32_t b)
"%0 = (A1 += %2.L*%1.H) ;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b)
: "A1"
: "A1", "ASTAT"
);
return res;
}
@ -130,7 +133,7 @@ static inline spx_word32_t MAC16_32_Q15(spx_word32_t c, spx_word16_t a, spx_word
"%0 = %0 + %4;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b), "d" (c)
: "A1"
: "A1", "ASTAT"
);
return res;
}
@ -147,7 +150,7 @@ static inline spx_word32_t MULT16_32_Q14(spx_word16_t a, spx_word32_t b)
"%0 = (A1 += %1.L*%2.H);\n\t"
: "=W" (res), "=d" (a), "=d" (b)
: "1" (a), "2" (b)
: "A1"
: "A1", "ASTAT"
);
return res;
}
@ -165,7 +168,7 @@ static inline spx_word32_t MAC16_32_Q14(spx_word32_t c, spx_word16_t a, spx_word
"%0 = %0 + %4;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b), "d" (c)
: "A1"
: "A1", "ASTAT"
);
return res;
}

View File

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -52,6 +52,10 @@
#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
#define SATURATE32PSHR(x,shift,a) (((x)>=(SHL32(a,shift))) ? (a) : \
(x)<=-(SHL32(a,shift)) ? -(a) : \
(PSHR32(x, shift)))
#define SHR(a,shift) ((a) >> (shift))
#define SHL(a,shift) ((spx_word32_t)(a) << (shift))
#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))

View File

@ -55,8 +55,6 @@ TODO:
#include "arch.h"
#include <speex/speex.h>
#include <speex/speex_bits.h>
#include <speex/speex_jitter.h>
#include "os_support.h"
@ -466,7 +464,6 @@ EXPORT int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, s
{
int i;
unsigned int j;
int incomplete = 0;
spx_int16_t opt;
if (start_offset != NULL)
@ -571,7 +568,6 @@ EXPORT int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, s
if (found)
{
i=besti;
incomplete = 1;
/*fprintf (stderr, "incomplete: %d %d %d %d\n", jitter->packets[i].timestamp, jitter->pointer_timestamp, chunk_size, jitter->packets[i].span);*/
}
}

View File

@ -7,18 +7,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -168,11 +168,11 @@ static inline spx_word16_t spx_acos(spx_word16_t x)
x = NEG16(x);
}
x = SUB16(16384,x);
x = x >> 1;
sq = MULT16_16_Q13(x, ADD16(A1, MULT16_16_Q13(x, ADD16(A2, MULT16_16_Q13(x, (A3))))));
ret = spx_sqrt(SHL32(EXTEND32(sq),13));
/*ret = spx_sqrt(67108864*(-1.6129e-04 + 2.0104e+00*f + 2.7373e-01*f*f + 1.8136e-01*f*f*f));*/
if (s)
ret = SUB16(25736,ret);
@ -208,7 +208,7 @@ static inline spx_word16_t spx_cos(spx_word16_t x)
static inline spx_word16_t _spx_cos_pi_2(spx_word16_t x)
{
spx_word16_t x2;
x2 = MULT16_16_P15(x,x);
return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2))))))));
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2003-2006 Jean-Marc Valin
/* Copyright (C) 2003-2008 Jean-Marc Valin
File: mdf.c
Echo canceller based on the MDF algorithm (see below)
@ -33,36 +33,36 @@
/*
The echo canceller is based on the MDF algorithm described in:
J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
February 1990.
We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
double-talk is achieved using a variable learning rate as described in:
Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
Cancellation With Double-Talk. IEEE Transactions on Audio,
Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
There is no explicit double-talk detection, but a continuous variation
in the learning rate based on residual echo, double-talk and background
noise.
About the fixed-point version:
All the signals are represented with 16-bit words. The filter weights
All the signals are represented with 16-bit words. The filter weights
are represented with 32-bit words, but only the top 16 bits are used
in most cases. The lower 16 bits are completely unreliable (due to the
fact that the update is done only on the top bits), but help in the
adaptation -- probably by removing a "threshold effect" due to
quantization (rounding going to zero) when the gradient is small.
Another kludge that seems to work good: when performing the weight
update, we only move half the way toward the "goal" this seems to
reduce the effect of quantization noise in the update phase. This
can be seen as applying a gradient descent on a "soft constraint"
instead of having a hard constraint.
*/
#ifdef HAVE_CONFIG_H
@ -131,13 +131,15 @@ struct SpeexEchoState_ {
int adapted;
int saturated;
int screwed_up;
int C; /** Number of input channels (microphones) */
int K; /** Number of output channels (loudspeakers) */
spx_int32_t sampling_rate;
spx_word16_t spec_average;
spx_word16_t beta0;
spx_word16_t beta_max;
spx_word32_t sum_adapt;
spx_word16_t leak_estimate;
spx_word16_t *e; /* scratch */
spx_word16_t *x; /* Far-end input buffer (2N) */
spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
@ -171,10 +173,10 @@ struct SpeexEchoState_ {
spx_word16_t *window;
spx_word16_t *prop;
void *fft_table;
spx_word16_t memX, memD, memE;
spx_word16_t *memX, *memD, *memE;
spx_word16_t preemph;
spx_word16_t notch_radius;
spx_mem_t notch_mem[2];
spx_mem_t *notch_mem;
/* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
spx_int16_t *play_buf;
@ -182,7 +184,7 @@ struct SpeexEchoState_ {
int play_buf_started;
};
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
{
int i;
spx_word16_t den2;
@ -190,11 +192,11 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius,
den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
#else
den2 = radius*radius + .7*(1-radius)*(1-radius);
#endif
#endif
/*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
for (i=0;i<len;i++)
{
spx_word16_t vin = in[i];
spx_word16_t vin = in[i*stride];
spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
#ifdef FIXED_POINT
mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
@ -234,6 +236,18 @@ static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N
ps[j]=MULT16_16(X[i],X[i]);
}
/** Compute power spectrum of a half-complex (packed) vector and accumulate */
static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
{
int i, j;
ps[0]+=MULT16_16(X[0],X[0]);
for (i=1,j=1;i<N-1;i+=2,j++)
{
ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
}
ps[j]+=MULT16_16(X[i],X[i]);
}
/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
#ifdef FIXED_POINT
static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
@ -330,16 +344,17 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_fl
prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
}
static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)
static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
{
int i, j;
int i, j, p;
spx_word16_t max_sum = 1;
spx_word32_t prop_sum = 1;
for (i=0;i<M;i++)
{
spx_word32_t tmp = 1;
for (j=0;j<N;j++)
tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18)));
for (p=0;p<P;p++)
for (j=0;j<N;j++)
tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
#ifdef FIXED_POINT
/* Just a security in case an overflow were to occur */
tmp = MIN32(ABS32(tmp), 536870912);
@ -378,11 +393,20 @@ static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const sp
#endif
/** Creates a new echo canceller state */
SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
{
int i,N,M;
return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
}
EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
{
int i,N,M, C, K;
SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
st->K = nb_speakers;
st->C = nb_mic;
C=st->C;
K=st->K;
#ifdef DUMP_ECHO_CANCEL_DATA
if (rFile || pFile || oFile)
speex_fatal("Opening dump files twice");
@ -390,7 +414,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
pFile = fopen("aec_play.sw", "wb");
oFile = fopen("aec_out.sw", "wb");
#endif
st->frame_size = frame_size;
st->window_size = 2*frame_size;
N = st->window_size;
@ -412,24 +436,24 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
st->leak_estimate = 0;
st->fft_table = spx_fft_init(N);
st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t));
st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t));
st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
#ifdef TWO_PATH
st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
#endif
st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
@ -450,7 +474,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
#endif
for (i=0;i<=st->frame_size;i++)
st->power_1[i] = FLOAT_ONE;
for (i=0;i<N*M;i++)
for (i=0;i<N*M*K*C;i++)
st->W[i] = 0;
{
spx_word32_t sum = 0;
@ -465,11 +489,13 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
}
for (i=M-1;i>=0;i--)
{
st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum);
st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
}
}
st->memX=st->memD=st->memE=0;
st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
st->preemph = QCONST16(.9,15);
if (st->sampling_rate<12000)
st->notch_radius = QCONST16(.9, 15);
@ -478,30 +504,32 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
else
st->notch_radius = QCONST16(.992, 15);
st->notch_mem[0] = st->notch_mem[1] = 0;
st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
st->adapted = 0;
st->Pey = st->Pyy = FLOAT_ONE;
#ifdef TWO_PATH
st->Davg1 = st->Davg2 = 0;
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
#endif
st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
st->play_buf_started = 0;
return st;
}
/** Resets echo canceller state */
void speex_echo_state_reset(SpeexEchoState *st)
EXPORT void speex_echo_state_reset(SpeexEchoState *st)
{
int i, M, N;
int i, M, N, C, K;
st->cancel_count=0;
st->screwed_up = 0;
N = st->window_size;
M = st->M;
C=st->C;
K=st->K;
for (i=0;i<N*M;i++)
st->W[i] = 0;
#ifdef TWO_PATH
@ -521,13 +549,20 @@ void speex_echo_state_reset(SpeexEchoState *st)
{
st->last_y[i] = 0;
}
for (i=0;i<N;i++)
for (i=0;i<N*C;i++)
{
st->E[i] = 0;
}
for (i=0;i<N*K;i++)
{
st->x[i] = 0;
}
st->notch_mem[0] = st->notch_mem[1] = 0;
st->memX=st->memD=st->memE=0;
for (i=0;i<2*C;i++)
st->notch_mem[i] = 0;
for (i=0;i<C;i++)
st->memD[i]=st->memE[i]=0;
for (i=0;i<K;i++)
st->memX[i]=0;
st->saturated = 0;
st->adapted = 0;
@ -545,7 +580,7 @@ void speex_echo_state_reset(SpeexEchoState *st)
}
/** Destroys an echo canceller state */
void speex_echo_state_destroy(SpeexEchoState *st)
EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
{
spx_fft_destroy(st->fft_table);
@ -576,9 +611,14 @@ void speex_echo_state_destroy(SpeexEchoState *st)
#ifdef FIXED_POINT
speex_free(st->wtmp2);
#endif
speex_free(st->memX);
speex_free(st->memD);
speex_free(st->memE);
speex_free(st->notch_mem);
speex_free(st->play_buf);
speex_free(st);
#ifdef DUMP_ECHO_CANCEL_DATA
fclose(rFile);
fclose(pFile);
@ -587,7 +627,7 @@ void speex_echo_state_destroy(SpeexEchoState *st)
#endif
}
void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
{
int i;
/*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
@ -610,7 +650,7 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t
}
}
void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
{
/*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
if (!st->play_buf_started)
@ -637,16 +677,16 @@ void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
}
/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
{
speex_echo_cancellation(st, in, far_end, out);
}
/** Performs echo cancellation on a frame */
void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
{
int i,j;
int N,M;
int i,j, chan, speak;
int N,M, C, K;
spx_word32_t Syy,See,Sxx,Sdd, Sff;
#ifdef TWO_PATH
spx_word32_t Dbf;
@ -658,9 +698,12 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
spx_float_t alpha, alpha_1;
spx_word16_t RER;
spx_word32_t tmp32;
N = st->window_size;
M = st->M;
C = st->C;
K = st->K;
st->cancel_count++;
#ifdef FIXED_POINT
ss=DIV32_16(11469,M);
@ -670,157 +713,198 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
ss_1 = 1-ss;
#endif
/* Apply a notch filter to make sure DC doesn't end up causing problems */
filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
/* Copy input data to buffer and apply pre-emphasis */
for (i=0;i<st->frame_size;i++)
for (chan = 0; chan < C; chan++)
{
spx_word32_t tmp32;
tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
/* Apply a notch filter to make sure DC doesn't end up causing problems */
filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
/* Copy input data to buffer and apply pre-emphasis */
/* Copy input data to buffer */
for (i=0;i<st->frame_size;i++)
{
spx_word32_t tmp32;
/* FIXME: This core has changed a bit, need to merge properly */
tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
#ifdef FIXED_POINT
/* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
if (tmp32 > 32767)
{
tmp32 = 32767;
st->saturated = M+1;
}
if (tmp32 < -32767)
{
tmp32 = -32767;
st->saturated = M+1;
}
if (tmp32 > 32767)
{
tmp32 = 32767;
if (st->saturated == 0)
st->saturated = 1;
}
if (tmp32 < -32767)
{
tmp32 = -32767;
if (st->saturated == 0)
st->saturated = 1;
}
#endif
st->x[i+st->frame_size] = EXTRACT16(tmp32);
st->memX = far_end[i];
tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
#ifdef FIXED_POINT
if (tmp32 > 32767)
{
tmp32 = 32767;
if (st->saturated == 0)
st->saturated = 1;
}
if (tmp32 < -32767)
{
tmp32 = -32767;
if (st->saturated == 0)
st->saturated = 1;
st->memD[chan] = st->input[chan*st->frame_size+i];
st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
}
#endif
st->memD = st->input[i];
st->input[i] = tmp32;
}
/* Shift memory: this could be optimized eventually*/
for (j=M-1;j>=0;j--)
for (speak = 0; speak < K; speak++)
{
for (i=0;i<N;i++)
st->X[(j+1)*N+i] = st->X[j*N+i];
for (i=0;i<st->frame_size;i++)
{
spx_word32_t tmp32;
st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
#ifdef FIXED_POINT
/*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
if (tmp32 > 32767)
{
tmp32 = 32767;
st->saturated = M+1;
}
if (tmp32 < -32767)
{
tmp32 = -32767;
st->saturated = M+1;
}
#endif
st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
st->memX[speak] = far_end[i*K+speak];
}
}
/* Convert x (far end) to frequency domain */
spx_fft(st->fft_table, st->x, &st->X[0]);
for (i=0;i<N;i++)
st->last_y[i] = st->x[i];
Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
for (i=0;i<st->frame_size;i++)
st->x[i] = st->x[i+st->frame_size];
/* From here on, the top part of x is used as scratch space */
for (speak = 0; speak < K; speak++)
{
/* Shift memory: this could be optimized eventually*/
for (j=M-1;j>=0;j--)
{
for (i=0;i<N;i++)
st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
}
/* Convert x (echo input) to frequency domain */
spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
}
Sxx = 0;
for (speak = 0; speak < K; speak++)
{
Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
power_spectrum_accum(st->X+speak*N, st->Xf, N);
}
Sff = 0;
for (chan = 0; chan < C; chan++)
{
#ifdef TWO_PATH
/* Compute foreground filter */
spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);
spx_ifft(st->fft_table, st->Y, st->e);
for (i=0;i<st->frame_size;i++)
st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]);
Sff = mdf_inner_prod(st->e, st->e, st->frame_size);
/* Compute foreground filter */
spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
for (i=0;i<st->frame_size;i++)
st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
#endif
}
/* Adjust proportional adaption rate */
mdf_adjust_prop (st->W, N, M, st->prop);
/* FIXME: Adjust that for C, K*/
if (st->adapted)
mdf_adjust_prop (st->W, N, M, C*K, st->prop);
/* Compute weight gradient */
if (st->saturated == 0)
{
for (j=M-1;j>=0;j--)
for (chan = 0; chan < C; chan++)
{
weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N);
for (i=0;i<N;i++)
st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]);
for (speak = 0; speak < K; speak++)
{
for (j=M-1;j>=0;j--)
{
weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
for (i=0;i<N;i++)
st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
}
}
}
} else {
st->saturated--;
}
/* FIXME: MC conversion required */
/* Update weight to prevent circular convolution (MDF / AUMDF) */
for (j=0;j<M;j++)
for (chan = 0; chan < C; chan++)
{
/* This is a variant of the Alternatively Updated MDF (AUMDF) */
/* Remove the "if" to make this an MDF filter */
if (j==0 || st->cancel_count%(M-1) == j-1)
for (speak = 0; speak < K; speak++)
{
for (j=0;j<M;j++)
{
/* This is a variant of the Alternatively Updated MDF (AUMDF) */
/* Remove the "if" to make this an MDF filter */
if (j==0 || st->cancel_count%(M-1) == j-1)
{
#ifdef FIXED_POINT
for (i=0;i<N;i++)
st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
for (i=0;i<st->frame_size;i++)
{
st->wtmp[i]=0;
}
for (i=st->frame_size;i<N;i++)
{
st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
}
spx_fft(st->fft_table, st->wtmp, st->wtmp2);
/* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
for (i=0;i<N;i++)
st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
for (i=0;i<N;i++)
st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
for (i=0;i<st->frame_size;i++)
{
st->wtmp[i]=0;
}
for (i=st->frame_size;i<N;i++)
{
st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
}
spx_fft(st->fft_table, st->wtmp, st->wtmp2);
/* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
for (i=0;i<N;i++)
st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
#else
spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
for (i=st->frame_size;i<N;i++)
{
st->wtmp[i]=0;
}
spx_fft(st->fft_table, st->wtmp, &st->W[j*N]);
spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
for (i=st->frame_size;i<N;i++)
{
st->wtmp[i]=0;
}
spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
#endif
}
}
}
}
/* Compute filter response Y */
spectral_mul_accum(st->X, st->W, st->Y, N, M);
spx_ifft(st->fft_table, st->Y, st->y);
/* So we can use power_spectrum_accum */
for (i=0;i<=st->frame_size;i++)
st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
Dbf = 0;
See = 0;
#ifdef TWO_PATH
/* Difference in response, this is used to estimate the variance of our residual power estimate */
for (i=0;i<st->frame_size;i++)
st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]);
Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size);
for (chan = 0; chan < C; chan++)
{
spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
for (i=0;i<st->frame_size;i++)
st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
for (i=0;i<st->frame_size;i++)
st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
}
#endif
for (i=0;i<st->frame_size;i++)
st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
See = mdf_inner_prod(st->e, st->e, st->frame_size);
#ifndef TWO_PATH
Sff = See;
#endif
#ifdef TWO_PATH
/* Logic for updating the foreground filter */
/* For two time windows, compute the mean of the energy difference, as well as the variance */
st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
/* Equivalent float code:
st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
*/
update_foreground = 0;
/* Check if we have a statistically significant reduction in the residual echo */
/* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
@ -830,18 +914,19 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
update_foreground = 1;
else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
update_foreground = 1;
/* Do we update? */
if (update_foreground)
{
st->Davg1 = st->Davg2 = 0;
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
/* Copy background filter to foreground filter */
for (i=0;i<N*M;i++)
for (i=0;i<N*M*C*K;i++)
st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
/* Apply a smooth transition so as to not introduce blocking artifacts */
for (i=0;i<st->frame_size;i++)
st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
for (chan = 0; chan < C; chan++)
for (i=0;i<st->frame_size;i++)
st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
} else {
int reset_background=0;
/* Otherwise, check if the background filter is significantly worse */
@ -854,13 +939,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
if (reset_background)
{
/* Copy foreground filter to background filter */
for (i=0;i<N*M;i++)
for (i=0;i<N*M*C*K;i++)
st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
/* We also need to copy the output so as to get correct adaptation */
for (i=0;i<st->frame_size;i++)
st->y[i+st->frame_size] = st->e[i+st->frame_size];
for (i=0;i<st->frame_size;i++)
st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
for (chan = 0; chan < C; chan++)
{
for (i=0;i<st->frame_size;i++)
st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
for (i=0;i<st->frame_size;i++)
st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
}
See = Sff;
st->Davg1 = st->Davg2 = 0;
st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
@ -868,50 +956,60 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
}
#endif
/* Compute error signal (for the output with de-emphasis) */
for (i=0;i<st->frame_size;i++)
Sey = Syy = Sdd = 0;
for (chan = 0; chan < C; chan++)
{
spx_word32_t tmp_out;
#ifdef TWO_PATH
tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size]));
#else
tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size]));
#endif
/* Saturation */
if (tmp_out>32767)
tmp_out = 32767;
else if (tmp_out<-32768)
tmp_out = -32768;
tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
/* This is an arbitrary test for saturation in the microphone signal */
if (in[i] <= -32000 || in[i] >= 32000)
/* Compute error signal (for the output with de-emphasis) */
for (i=0;i<st->frame_size;i++)
{
tmp_out = 0;
spx_word32_t tmp_out;
#ifdef TWO_PATH
tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
#else
tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
#endif
tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
/* This is an arbitrary test for saturation in the microphone signal */
if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
{
if (st->saturated == 0)
st->saturated = 1;
}
out[i*C+chan] = WORD2INT(tmp_out);
st->memE[chan] = tmp_out;
}
out[i] = (spx_int16_t)tmp_out;
st->memE = tmp_out;
}
#ifdef DUMP_ECHO_CANCEL_DATA
dump_audio(in, far_end, out, st->frame_size);
dump_audio(in, far_end, out, st->frame_size);
#endif
/* Compute error signal (filter update version) */
for (i=0;i<st->frame_size;i++)
{
st->e[i+st->frame_size] = st->e[i];
st->e[i] = 0;
/* Compute error signal (filter update version) */
for (i=0;i<st->frame_size;i++)
{
st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
st->e[chan*N+i] = 0;
}
/* Compute a bunch of correlations */
/* FIXME: bad merge */
Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
/* Convert error to frequency domain */
spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
for (i=0;i<st->frame_size;i++)
st->y[i+chan*N] = 0;
spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
/* Compute power spectrum of echo (X), error (E) and filter response (Y) */
power_spectrum_accum(st->E+chan*N, st->Rf, N);
power_spectrum_accum(st->Y+chan*N, st->Yf, N);
}
/* Compute a bunch of correlations */
Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
/*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
/* Do some sanity check */
if (!(Syy>=0 && Sxx>=0 && See >= 0)
#ifndef FIXED_POINT
@ -921,7 +1019,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
{
/* Things have gone really bad */
st->screwed_up += 50;
for (i=0;i<st->frame_size;i++)
for (i=0;i<st->frame_size*C;i++)
out[i] = 0;
} else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
{
@ -941,35 +1039,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
/* Add a small noise floor to make sure not to have problems when dividing */
See = MAX32(See, SHR32(MULT16_16(N, 100),6));
/* Convert error to frequency domain */
spx_fft(st->fft_table, st->e, st->E);
for (i=0;i<st->frame_size;i++)
st->y[i] = 0;
spx_fft(st->fft_table, st->y, st->Y);
for (speak = 0; speak < K; speak++)
{
Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
power_spectrum_accum(st->X+speak*N, st->Xf, N);
}
/* Compute power spectrum of far end (X), error (E) and filter response (Y) */
power_spectrum(st->E, st->Rf, N);
power_spectrum(st->Y, st->Yf, N);
power_spectrum(st->X, st->Xf, N);
/* Smooth far end energy estimate over time */
for (j=0;j<=st->frame_size;j++)
st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
/* Enable this to compute the power based only on the tail (would need to compute more
efficiently to make this really useful */
if (0)
{
float scale2 = .5f/M;
for (j=0;j<=st->frame_size;j++)
st->power[j] = 100;
for (i=0;i<M;i++)
{
power_spectrum(&st->X[i*N], st->Xf, N);
for (j=0;j<=st->frame_size;j++)
st->power[j] += scale2*st->Xf[j];
}
}
/* Compute filtered spectra and (cross-)correlations */
for (j=st->frame_size;j>=0;j--)
@ -987,7 +1066,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
#endif
}
Pyy = FLOAT_SQRT(Pyy);
Pey = FLOAT_DIVU(Pey,Pyy);
@ -1015,7 +1094,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
else
st->leak_estimate = SHL16(st->leak_estimate,1);
/*printf ("%f\n", st->leak_estimate);*/
/* Compute Residual to Error Ratio */
#ifdef FIXED_POINT
tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
@ -1071,7 +1150,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
/* Temporary adaption rate if filter is not yet adapted enough */
spx_word16_t adapt_rate=0;
if (Sxx > SHR32(MULT16_16(N, 1000),6))
if (Sxx > SHR32(MULT16_16(N, 1000),6))
{
tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
#ifdef FIXED_POINT
@ -1091,12 +1170,12 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
}
/* Save residual echo so it can be used by the nonlinear processor */
/* FIXME: MC conversion required */
for (i=0;i<st->frame_size;i++)
st->last_y[i] = st->last_y[st->frame_size+i];
if (st->adapted)
{
/* If the filter is adapted, take the filtered echo */
for (i=0;i<st->frame_size;i++)
st->last_y[i] = st->last_y[st->frame_size+i];
for (i=0;i<st->frame_size;i++)
st->last_y[st->frame_size+i] = in[i]-out[i];
} else {
@ -1113,17 +1192,17 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
int i;
spx_word16_t leak2;
int N;
N = st->window_size;
/* Apply hanning window (should pre-compute it)*/
for (i=0;i<N;i++)
st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
/* Compute power spectrum of the echo */
spx_fft(st->fft_table, st->y, st->Y);
power_spectrum(st->Y, residual_echo, N);
#ifdef FIXED_POINT
if (st->leak_estimate > 16383)
leak2 = 32767;
@ -1138,14 +1217,14 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
/* Estimate residual echo */
for (i=0;i<=st->frame_size;i++)
residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
}
int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
{
switch(request)
{
case SPEEX_ECHO_GET_FRAME_SIZE:
(*(int*)ptr) = st->frame_size;
break;
@ -1169,6 +1248,29 @@ int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
case SPEEX_ECHO_GET_SAMPLING_RATE:
(*(int*)ptr) = st->sampling_rate;
break;
case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
/*FIXME: Implement this for multiple channels */
*((spx_int32_t *)ptr) = st->M * st->frame_size;
break;
case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
{
int M = st->M, N = st->window_size, n = st->frame_size, i, j;
spx_int32_t *filt = (spx_int32_t *) ptr;
for(j=0;j<M;j++)
{
/*FIXME: Implement this for multiple channels */
#ifdef FIXED_POINT
for (i=0;i<N;i++)
st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
#else
spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
#endif
for(i=0;i<n;i++)
filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
}
}
break;
default:
speex_warning_int("Unknown speex_echo_ctl request: ", request);
return -1;

View File

@ -1,25 +1,25 @@
/* Copyright (C) 2005 Analog Devices */
/**
@file misc_bfin.h
@author Jean-Marc Valin
@author Jean-Marc Valin
@brief Various compatibility routines for Speex (Blackfin version)
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bfin.h"
#define OVERRIDE_SPEEX_MOVE
void *speex_move (void *dest, void *src, int n)
{
@ -48,7 +50,7 @@ void *speex_move (void *dest, void *src, int n)
"[%1++] = R0;\n\t"
: "=a" (src), "=a" (dest)
: "a" ((n>>2)-1), "0" (src), "1" (dest)
: "R0", "I0", "L0", "memory"
: "R0", "I0", "L0", "memory" BFIN_HWLOOP0_REGS
);
return dest;
}

View File

@ -1,5 +1,5 @@
/* Copyright (C) 2007 Jean-Marc Valin
File: os_support.h
This is the (tiny) OS abstraction layer. Aside from math.h, this is the
only place where system headers are allowed.
@ -45,12 +45,12 @@
#include "os_support_custom.h"
#endif
/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free
/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free
NOTE: speex_alloc needs to CLEAR THE MEMORY */
#ifndef OVERRIDE_SPEEX_ALLOC
static inline void *speex_alloc (int size)
{
/* WARNING: this is not equivalent to malloc(). If you want to use malloc()
/* WARNING: this is not equivalent to malloc(). If you want to use malloc()
or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise
you will experience strange bugs */
return calloc(size,1);
@ -90,18 +90,18 @@ static inline void speex_free_scratch (void *ptr)
}
#endif
/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */
/** Copy n elements from src to dst. The 0* term provides compile-time type checking */
#ifndef OVERRIDE_SPEEX_COPY
#define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term
/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
provides compile-time type checking */
#ifndef OVERRIDE_SPEEX_MOVE
#define SPEEX_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
#endif
/** Set n bytes of memory to value of c, starting at address s */
/** For n elements worth of memory, set every byte to the value of c, starting at address dst */
#ifndef OVERRIDE_SPEEX_MEMSET
#define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n)*sizeof(*(dst))))
#endif

View File

@ -1,6 +1,6 @@
/* Copyright (C) 2007-2008 Jean-Marc Valin
Copyright (C) 2008 Thorvald Natvig
File: resample.c
Arbitrary resampling code
@ -38,22 +38,22 @@
- Low memory requirement
- Good *perceptual* quality (and not best SNR)
Warning: This resampler is relatively new. Although I think I got rid of
Warning: This resampler is relatively new. Although I think I got rid of
all the major bugs and I don't expect the API to change anymore, there
may be something I've missed. So use with caution.
This algorithm is based on this original resampling algorithm:
Smith, Julius O. Digital Audio Resampling Home Page
Center for Computer Research in Music and Acoustics (CCRMA),
Center for Computer Research in Music and Acoustics (CCRMA),
Stanford University, 2007.
Web published at http://www-ccrma.stanford.edu/~jos/resample/.
Web published at https://ccrma.stanford.edu/~jos/resample/.
There is one main difference, though. This resampler uses cubic
There is one main difference, though. This resampler uses cubic
interpolation instead of linear interpolation in the above paper. This
makes the table much smaller and makes it possible to compute that table
on a per-stream basis. In turn, being able to tweak the table for each
stream makes it possible to both reduce complexity on simple ratios
(e.g. 2/3), and get rid of the rounding operations in the inner loop.
on a per-stream basis. In turn, being able to tweak the table for each
stream makes it possible to both reduce complexity on simple ratios
(e.g. 2/3), and get rid of the rounding operations in the inner loop.
The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
*/
@ -63,31 +63,28 @@
#ifdef OUTSIDE_SPEEX
#include <stdlib.h>
static void *speex_alloc (int size) {return calloc(size,1);}
static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
static void speex_free (void *ptr) {free(ptr);}
static void *speex_alloc(int size) {return calloc(size,1);}
static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
static void speex_free(void *ptr) {free(ptr);}
#ifndef EXPORT
#define EXPORT
#endif
#include "speex_resampler.h"
#include "arch.h"
#else /* OUTSIDE_SPEEX */
#include "speex/speex_resampler.h"
#include "arch.h"
#include "os_support.h"
#endif /* OUTSIDE_SPEEX */
#include "stack_alloc.h"
#include <math.h>
#include <limits.h>
#ifndef M_PI
#define M_PI 3.14159263
#define M_PI 3.14159265358979323846
#endif
#ifdef FIXED_POINT
#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
#else
#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
#endif
#define IMAX(a,b) ((a) > (b) ? (a) : (b))
#define IMIN(a,b) ((a) < (b) ? (a) : (b))
@ -95,10 +92,18 @@ static void speex_free (void *ptr) {free(ptr);}
#define NULL 0
#endif
#ifdef _USE_SSE
#ifndef UINT32_MAX
#define UINT32_MAX 4294967295U
#endif
#ifdef USE_SSE
#include "resample_sse.h"
#endif
#ifdef USE_NEON
#include "resample_neon.h"
#endif
/* Numer of elements to allocate on the stack */
#ifdef VAR_ARRAYS
#define FIXED_STACK_ALLOC 8192
@ -113,7 +118,7 @@ struct SpeexResamplerState_ {
spx_uint32_t out_rate;
spx_uint32_t num_rate;
spx_uint32_t den_rate;
int quality;
spx_uint32_t nb_channels;
spx_uint32_t filt_len;
@ -125,22 +130,22 @@ struct SpeexResamplerState_ {
spx_uint32_t oversample;
int initialised;
int started;
/* These are per-channel */
spx_int32_t *last_sample;
spx_uint32_t *samp_frac_num;
spx_uint32_t *magic_samples;
spx_word16_t *mem;
spx_word16_t *sinc_table;
spx_uint32_t sinc_table_length;
resampler_basic_func resampler_ptr;
int in_stride;
int out_stride;
} ;
static double kaiser12_table[68] = {
static const double kaiser12_table[68] = {
0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
@ -154,7 +159,7 @@ static double kaiser12_table[68] = {
0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
0.00001000, 0.00000000};
/*
static double kaiser12_table[36] = {
static const double kaiser12_table[36] = {
0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
@ -162,7 +167,7 @@ static double kaiser12_table[36] = {
0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
*/
static double kaiser10_table[36] = {
static const double kaiser10_table[36] = {
0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
@ -170,15 +175,15 @@ static double kaiser10_table[36] = {
0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000};
static double kaiser8_table[36] = {
static const double kaiser8_table[36] = {
0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
static double kaiser6_table[36] = {
static const double kaiser6_table[36] = {
0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
@ -187,32 +192,30 @@ static double kaiser6_table[36] = {
0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000};
struct FuncDef {
double *table;
const double *table;
int oversample;
};
static struct FuncDef _KAISER12 = {kaiser12_table, 64};
#define KAISER12 (&_KAISER12)
/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
#define KAISER12 (&_KAISER12)*/
static struct FuncDef _KAISER10 = {kaiser10_table, 32};
#define KAISER10 (&_KAISER10)
static struct FuncDef _KAISER8 = {kaiser8_table, 32};
#define KAISER8 (&_KAISER8)
static struct FuncDef _KAISER6 = {kaiser6_table, 32};
#define KAISER6 (&_KAISER6)
static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
#define KAISER12 (&kaiser12_funcdef)
static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
#define KAISER10 (&kaiser10_funcdef)
static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
#define KAISER8 (&kaiser8_funcdef)
static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
#define KAISER6 (&kaiser6_funcdef)
struct QualityMapping {
int base_length;
int oversample;
float downsample_bandwidth;
float upsample_bandwidth;
struct FuncDef *window_func;
const struct FuncDef *window_func;
};
/* This table maps conversion quality to internal parameters. There are two
reasons that explain why the up-sampling bandwidth is larger than the
reasons that explain why the up-sampling bandwidth is larger than the
down-sampling bandwidth:
1) When up-sampling, we can assume that the spectrum is already attenuated
close to the Nyquist rate (from an A/D or a previous resampling filter)
@ -234,11 +237,11 @@ static const struct QualityMapping quality_map[11] = {
{256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
};
/*8,24,40,56,80,104,128,160,200,256,320*/
static double compute_func(float x, struct FuncDef *func)
static double compute_func(float x, const struct FuncDef *func)
{
float y, frac;
double interp[4];
int ind;
int ind;
y = x*func->oversample;
ind = (int)floor(y);
frac = (y-ind);
@ -249,7 +252,7 @@ static double compute_func(float x, struct FuncDef *func)
interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
/* Just to make sure we don't have rounding problems */
interp[1] = 1.f-interp[3]-interp[2]-interp[0];
/*sum = frac*accum[1] + (1-frac)*accum[2];*/
return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
}
@ -269,7 +272,7 @@ int main(int argc, char **argv)
#ifdef FIXED_POINT
/* The slow way of computing a sinc for the table. Should improve that some day */
static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
{
/*fprintf (stderr, "%f ", x);*/
float xx = x * cutoff;
@ -282,7 +285,7 @@ static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_fu
}
#else
/* The slow way of computing a sinc for the table. Should improve that some day */
static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
{
/*fprintf (stderr, "%f ", x);*/
float xx = x * cutoff;
@ -337,28 +340,35 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
spx_word32_t sum;
int j;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];
#ifndef OVERRIDE_INNER_PRODUCT_SINGLE
float accum[4] = {0,0,0,0};
int j;
sum = 0;
for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
/* This code is slower on most DSPs which have only 2 accumulators.
Plus this this forces truncation to 32 bits and you lose the HW guard bits.
I think we can trust the compiler and let it vectorize and/or unroll itself.
spx_word32_t accum[4] = {0,0,0,0};
for(j=0;j<N;j+=4) {
accum[0] += sinc[j]*iptr[j];
accum[1] += sinc[j+1]*iptr[j+1];
accum[2] += sinc[j+2]*iptr[j+2];
accum[3] += sinc[j+3]*iptr[j+3];
accum[0] += MULT16_16(sinct[j], iptr[j]);
accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
*/
sum = SATURATE32PSHR(sum, 15, 32767);
#else
sum = inner_product_single(sinc, iptr, N);
sum = inner_product_single(sinct, iptr, N);
#endif
out[out_stride * out_sample++] = PSHR32(sum, 15);
out[out_stride * out_sample++] = sum;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
@ -388,25 +398,25 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
double sum;
int j;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
const spx_word16_t *iptr = & in[last_sample];
#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
int j;
double accum[4] = {0,0,0,0};
for(j=0;j<N;j+=4) {
accum[0] += sinc[j]*iptr[j];
accum[1] += sinc[j+1]*iptr[j+1];
accum[2] += sinc[j+2]*iptr[j+2];
accum[3] += sinc[j+3]*iptr[j+3];
accum[0] += sinct[j]*iptr[j];
accum[1] += sinct[j+1]*iptr[j+1];
accum[2] += sinct[j+2]*iptr[j+2];
accum[3] += sinct[j+3]*iptr[j+3];
}
sum = accum[0] + accum[1] + accum[2] + accum[3];
#else
sum = inner_product_double(sinc, iptr, N);
sum = inner_product_double(sinct, iptr, N);
#endif
out[out_stride * out_sample++] = PSHR32(sum, 15);
@ -435,7 +445,6 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
int j;
spx_word32_t sum;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -452,6 +461,7 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
int j;
spx_word32_t accum[4] = {0,0,0,0};
for(j=0;j<N;j++) {
@ -463,13 +473,14 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
}
cubic_coef(frac, interp);
sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
sum = SATURATE32PSHR(sum, 15, 32767);
#else
cubic_coef(frac, interp);
sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
#endif
out[out_stride * out_sample++] = PSHR32(sum,15);
out[out_stride * out_sample++] = sum;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
@ -497,7 +508,6 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
int j;
spx_word32_t sum;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -514,6 +524,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
int j;
double accum[4] = {0,0,0,0};
for(j=0;j<N;j++) {
@ -530,7 +541,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
cubic_coef(frac, interp);
sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
#endif
out[out_stride * out_sample++] = PSHR32(sum,15);
last_sample += int_advance;
samp_frac_num += frac_advance;
@ -547,22 +558,71 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
}
#endif
static void update_filter(SpeexResamplerState *st)
/* This resampler is used to produce zero output in situations where memory
for the filter could not be allocated. The expected numbers of input and
output samples are still processed so that callers failing to check error
codes are not surprised, possibly getting into infinite loops. */
static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
{
spx_uint32_t old_length;
old_length = st->filt_len;
int out_sample = 0;
int last_sample = st->last_sample[channel_index];
spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
const int out_stride = st->out_stride;
const int int_advance = st->int_advance;
const int frac_advance = st->frac_advance;
const spx_uint32_t den_rate = st->den_rate;
(void)in;
while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
{
out[out_stride * out_sample++] = 0;
last_sample += int_advance;
samp_frac_num += frac_advance;
if (samp_frac_num >= den_rate)
{
samp_frac_num -= den_rate;
last_sample++;
}
}
st->last_sample[channel_index] = last_sample;
st->samp_frac_num[channel_index] = samp_frac_num;
return out_sample;
}
static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
{
spx_uint32_t major = value / den;
spx_uint32_t remain = value % den;
/* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
if (remain > UINT32_MAX / num || major > UINT32_MAX / num
|| major * num > UINT32_MAX - remain * num / den)
return RESAMPLER_ERR_OVERFLOW;
*result = remain * num / den + major * num;
return RESAMPLER_ERR_SUCCESS;
}
static int update_filter(SpeexResamplerState *st)
{
spx_uint32_t old_length = st->filt_len;
spx_uint32_t old_alloc_size = st->mem_alloc_size;
int use_direct;
spx_uint32_t min_sinc_table_length;
spx_uint32_t min_alloc_size;
st->int_advance = st->num_rate/st->den_rate;
st->frac_advance = st->num_rate%st->den_rate;
st->oversample = quality_map[st->quality].oversample;
st->filt_len = quality_map[st->quality].base_length;
if (st->num_rate > st->den_rate)
{
/* down-sampling */
st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
/* FIXME: divide the numerator and denominator by a certain amount if they're too large */
st->filt_len = st->filt_len*st->num_rate / st->den_rate;
/* Round down to make sure we have a multiple of 4 */
st->filt_len &= (~0x3);
if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
goto fail;
/* Round up to make sure we have a multiple of 8 for SSE */
st->filt_len = ((st->filt_len-1)&(~0x7))+8;
if (2*st->den_rate < st->num_rate)
st->oversample >>= 1;
if (4*st->den_rate < st->num_rate)
@ -577,18 +637,37 @@ static void update_filter(SpeexResamplerState *st)
/* up-sampling */
st->cutoff = quality_map[st->quality].upsample_bandwidth;
}
#ifdef RESAMPLE_FULL_SINC_TABLE
use_direct = 1;
if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
goto fail;
#else
/* Choose the resampling type that requires the least amount of memory */
if (st->den_rate <= st->oversample)
use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
&& INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
#endif
if (use_direct)
{
min_sinc_table_length = st->filt_len*st->den_rate;
} else {
if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
goto fail;
min_sinc_table_length = st->filt_len*st->oversample+8;
}
if (st->sinc_table_length < min_sinc_table_length)
{
spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
if (!sinc_table)
goto fail;
st->sinc_table = sinc_table;
st->sinc_table_length = min_sinc_table_length;
}
if (use_direct)
{
spx_uint32_t i;
if (!st->sinc_table)
st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
else if (st->sinc_table_length < st->filt_len*st->den_rate)
{
st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
st->sinc_table_length = st->filt_len*st->den_rate;
}
for (i=0;i<st->den_rate;i++)
{
spx_int32_t j;
@ -608,13 +687,6 @@ static void update_filter(SpeexResamplerState *st)
/*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
} else {
spx_int32_t i;
if (!st->sinc_table)
st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
else if (st->sinc_table_length < st->filt_len*st->oversample+8)
{
st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
st->sinc_table_length = st->filt_len*st->oversample+8;
}
for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
#ifdef FIXED_POINT
@ -627,51 +699,47 @@ static void update_filter(SpeexResamplerState *st)
#endif
/*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
}
st->int_advance = st->num_rate/st->den_rate;
st->frac_advance = st->num_rate%st->den_rate;
/* Here's the place where we update the filter memory to take into account
the change in filter length. It's probably the messiest part of the code
due to handling of lots of corner cases. */
if (!st->mem)
/* Adding buffer_size to filt_len won't overflow here because filt_len
could be multiplied by sizeof(spx_word16_t) above. */
min_alloc_size = st->filt_len-1 + st->buffer_size;
if (min_alloc_size > st->mem_alloc_size)
{
spx_word16_t *mem;
if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
goto fail;
else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
goto fail;
st->mem = mem;
st->mem_alloc_size = min_alloc_size;
}
if (!st->started)
{
spx_uint32_t i;
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
st->mem[i] = 0;
/*speex_warning("init filter");*/
} else if (!st->started)
{
spx_uint32_t i;
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
st->mem[i] = 0;
/*speex_warning("reinit filter");*/
} else if (st->filt_len > old_length)
{
spx_int32_t i;
spx_uint32_t i;
/* Increase the filter length */
/*speex_warning("increase filter size");*/
int old_alloc_size = st->mem_alloc_size;
if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
for (i=st->nb_channels;i--;)
{
st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
}
for (i=st->nb_channels-1;i>=0;i--)
{
spx_int32_t j;
spx_uint32_t j;
spx_uint32_t olen = old_length;
/*if (st->magic_samples[i])*/
{
/* Try and remove the magic samples as if nothing had happened */
/* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
olen = old_length + 2*st->magic_samples[i];
for (j=old_length-2+st->magic_samples[i];j>=0;j--)
for (j=old_length-1+st->magic_samples[i];j--;)
st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
for (j=0;j<st->magic_samples[i];j++)
st->mem[i*st->mem_alloc_size+j] = 0;
@ -712,7 +780,15 @@ static void update_filter(SpeexResamplerState *st)
st->magic_samples[i] += old_magic;
}
}
return RESAMPLER_ERR_SUCCESS;
fail:
st->resampler_ptr = resampler_basic_zero;
/* st->mem may still contain consumed input samples for the filter.
Restore filt_len so that filt_len - 1 still points to the position after
the last of these samples. */
st->filt_len = old_length;
return RESAMPLER_ERR_ALLOC_FAILED;
}
EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
@ -722,15 +798,22 @@ EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_u
EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
{
spx_uint32_t i;
SpeexResamplerState *st;
if (quality > 10 || quality < 0)
int filter_err;
if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0)
{
if (err)
*err = RESAMPLER_ERR_INVALID_ARG;
return NULL;
}
st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
if (!st)
{
if (err)
*err = RESAMPLER_ERR_ALLOC_FAILED;
return NULL;
}
st->initialised = 0;
st->started = 0;
st->in_rate = 0;
@ -743,40 +826,43 @@ EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
st->filt_len = 0;
st->mem = 0;
st->resampler_ptr = 0;
st->cutoff = 1.f;
st->nb_channels = nb_channels;
st->in_stride = 1;
st->out_stride = 1;
#ifdef FIXED_POINT
st->buffer_size = 160;
#else
st->buffer_size = 160;
#endif
/* Per channel data */
st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(int));
st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
for (i=0;i<nb_channels;i++)
{
st->last_sample[i] = 0;
st->magic_samples[i] = 0;
st->samp_frac_num[i] = 0;
}
if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
goto fail;
if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
goto fail;
if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
goto fail;
speex_resampler_set_quality(st, quality);
speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
update_filter(st);
st->initialised = 1;
filter_err = update_filter(st);
if (filter_err == RESAMPLER_ERR_SUCCESS)
{
st->initialised = 1;
} else {
speex_resampler_destroy(st);
st = NULL;
}
if (err)
*err = RESAMPLER_ERR_SUCCESS;
*err = filter_err;
return st;
fail:
if (err)
*err = RESAMPLER_ERR_ALLOC_FAILED;
speex_resampler_destroy(st);
return NULL;
}
EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
@ -796,17 +882,17 @@ static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t
int out_sample = 0;
spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
spx_uint32_t ilen;
st->started = 1;
/* Call the right resampler through the function ptr */
out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
*in_len = st->last_sample[channel_index];
*out_len = out_sample;
st->last_sample[channel_index] -= *in_len;
ilen = *in_len;
for(j=0;j<N-1;++j)
@ -819,11 +905,11 @@ static int speex_resampler_magic(SpeexResamplerState *st, spx_uint32_t channel_i
spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
const int N = st->filt_len;
speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
st->magic_samples[channel_index] -= tmp_in_len;
/* If we couldn't process all "magic" input samples, save the rest for next time */
if (st->magic_samples[channel_index])
{
@ -849,13 +935,13 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
const int istride = st->in_stride;
if (st->magic_samples[channel_index])
if (st->magic_samples[channel_index])
olen -= speex_resampler_magic(st, channel_index, &out, olen);
if (! st->magic_samples[channel_index]) {
while (ilen && olen) {
spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
spx_uint32_t ochunk = olen;
if (in) {
for(j=0;j<ichunk;++j)
x[j+filt_offs]=in[j*istride];
@ -873,7 +959,7 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
}
*in_len -= ilen;
*out_len -= olen;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
#ifdef FIXED_POINT
@ -891,15 +977,14 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
#ifdef VAR_ARRAYS
const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
VARDECL(spx_word16_t *ystack);
ALLOC(ystack, ylen, spx_word16_t);
spx_word16_t ystack[ylen];
#else
const unsigned int ylen = FIXED_STACK_ALLOC;
spx_word16_t ystack[FIXED_STACK_ALLOC];
#endif
st->out_stride = 1;
while (ilen && olen) {
spx_word16_t *y = ystack;
spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
@ -936,7 +1021,7 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
#else
out[j*ostride_save] = WORD2INT(ystack[j]);
#endif
ilen -= ichunk;
olen -= ochunk;
out += (ochunk+omagic) * ostride_save;
@ -947,20 +1032,22 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
*in_len -= ilen;
*out_len -= olen;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
{
spx_uint32_t i;
int istride_save, ostride_save;
spx_uint32_t bak_len = *out_len;
spx_uint32_t bak_out_len = *out_len;
spx_uint32_t bak_in_len = *in_len;
istride_save = st->in_stride;
ostride_save = st->out_stride;
st->in_stride = st->out_stride = st->nb_channels;
for (i=0;i<st->nb_channels;i++)
{
*out_len = bak_len;
*out_len = bak_out_len;
*in_len = bak_in_len;
if (in != NULL)
speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len);
else
@ -968,20 +1055,22 @@ EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, co
}
st->in_stride = istride_save;
st->out_stride = ostride_save;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
{
spx_uint32_t i;
int istride_save, ostride_save;
spx_uint32_t bak_len = *out_len;
spx_uint32_t bak_out_len = *out_len;
spx_uint32_t bak_in_len = *in_len;
istride_save = st->in_stride;
ostride_save = st->out_stride;
st->in_stride = st->out_stride = st->nb_channels;
for (i=0;i<st->nb_channels;i++)
{
*out_len = bak_len;
*out_len = bak_out_len;
*in_len = bak_in_len;
if (in != NULL)
speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len);
else
@ -989,7 +1078,7 @@ EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, cons
}
st->in_stride = istride_save;
st->out_stride = ostride_save;
return RESAMPLER_ERR_SUCCESS;
return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
}
EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
@ -1003,42 +1092,55 @@ EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_r
*out_rate = st->out_rate;
}
static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
{
while (b != 0)
{
spx_uint32_t temp = a;
a = b;
b = temp % b;
}
return a;
}
EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
{
spx_uint32_t fact;
spx_uint32_t old_den;
spx_uint32_t i;
if (ratio_num == 0 || ratio_den == 0)
return RESAMPLER_ERR_INVALID_ARG;
if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
return RESAMPLER_ERR_SUCCESS;
old_den = st->den_rate;
st->in_rate = in_rate;
st->out_rate = out_rate;
st->num_rate = ratio_num;
st->den_rate = ratio_den;
/* FIXME: This is terribly inefficient, but who cares (at least for now)? */
for (fact=2;fact<=IMIN(st->num_rate, st->den_rate);fact++)
{
while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0))
{
st->num_rate /= fact;
st->den_rate /= fact;
}
}
fact = compute_gcd(st->num_rate, st->den_rate);
st->num_rate /= fact;
st->den_rate /= fact;
if (old_den > 0)
{
for (i=0;i<st->nb_channels;i++)
{
st->samp_frac_num[i]=st->samp_frac_num[i]*st->den_rate/old_den;
if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
return RESAMPLER_ERR_OVERFLOW;
/* Safety net */
if (st->samp_frac_num[i] >= st->den_rate)
st->samp_frac_num[i] = st->den_rate-1;
}
}
if (st->initialised)
update_filter(st);
return update_filter(st);
return RESAMPLER_ERR_SUCCESS;
}
@ -1056,7 +1158,7 @@ EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
return RESAMPLER_ERR_SUCCESS;
st->quality = quality;
if (st->initialised)
update_filter(st);
return update_filter(st);
return RESAMPLER_ERR_SUCCESS;
}
@ -1106,6 +1208,12 @@ EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
{
spx_uint32_t i;
for (i=0;i<st->nb_channels;i++)
{
st->last_sample[i] = 0;
st->magic_samples[i] = 0;
st->samp_frac_num[i] = 0;
}
for (i=0;i<st->nb_channels*(st->filt_len-1);i++)
st->mem[i] = 0;
return RESAMPLER_ERR_SUCCESS;

View File

@ -0,0 +1,339 @@
/* Copyright (C) 2007-2008 Jean-Marc Valin
* Copyright (C) 2008 Thorvald Natvig
* Copyright (C) 2011 Texas Instruments
* author Jyri Sarha
*/
/**
@file resample_neon.h
@brief Resampler functions (NEON version)
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef FIXED_POINT
#if defined(__aarch64__)
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("fmov s0, %w[a]\n"
"sqxtn h0, s0\n"
"sxtl v0.4s, v0.4h\n"
"fmov %w[ret], s0\n"
: [ret] "=r" (ret)
: [a] "r" (a)
: "v0" );
return ret;
}
#elif defined(__thumb2__)
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("ssat %[ret], #16, %[a]"
: [ret] "=r" (ret)
: [a] "r" (a)
: );
return ret;
}
#else
static inline int32_t saturate_32bit_to_16bit(int32_t a) {
int32_t ret;
asm ("vmov.s32 d0[0], %[a]\n"
"vqmovn.s32 d0, q0\n"
"vmov.s16 %[ret], d0[0]\n"
: [ret] "=r" (ret)
: [a] "r" (a)
: "q0");
return ret;
}
#endif
#undef WORD2INT
#define WORD2INT(x) (saturate_32bit_to_16bit(x))
#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 and len >= 4 */
#if defined(__aarch64__)
static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %w[len], #0\n"
" b.ne 1f\n"
" ld1 {v16.4h}, [%[b]], #8\n"
" ld1 {v20.4h}, [%[a]], #8\n"
" subs %w[remainder], %w[remainder], #4\n"
" smull v0.4s, v16.4h, v20.4h\n"
" b.ne 4f\n"
" b 5f\n"
"1:"
" ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
" ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
" subs %w[len], %w[len], #16\n"
" smull v0.4s, v16.4h, v20.4h\n"
" smlal v0.4s, v17.4h, v21.4h\n"
" smlal v0.4s, v18.4h, v22.4h\n"
" smlal v0.4s, v19.4h, v23.4h\n"
" b.eq 3f\n"
"2:"
" ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
" ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
" subs %w[len], %w[len], #16\n"
" smlal v0.4s, v16.4h, v20.4h\n"
" smlal v0.4s, v17.4h, v21.4h\n"
" smlal v0.4s, v18.4h, v22.4h\n"
" smlal v0.4s, v19.4h, v23.4h\n"
" b.ne 2b\n"
"3:"
" cmp %w[remainder], #0\n"
" b.eq 5f\n"
"4:"
" ld1 {v18.4h}, [%[b]], #8\n"
" ld1 {v22.4h}, [%[a]], #8\n"
" subs %w[remainder], %w[remainder], #4\n"
" smlal v0.4s, v18.4h, v22.4h\n"
" b.ne 4b\n"
"5:"
" saddlv d0, v0.4s\n"
" sqxtn s0, d0\n"
" sqrshrn h0, s0, #15\n"
" sxtl v0.4s, v0.4h\n"
" fmov %w[ret], s0\n"
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "v0",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
return ret;
}
#else
static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
{
int32_t ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.16 {d16}, [%[b]]!\n"
" vld1.16 {d20}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmull.s16 q0, d16, d20\n"
" beq 5f\n"
" b 4f\n"
"1:"
" vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
" vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmull.s16 q0, d16, d20\n"
" vmlal.s16 q0, d17, d21\n"
" vmlal.s16 q0, d18, d22\n"
" vmlal.s16 q0, d19, d23\n"
" beq 3f\n"
"2:"
" vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
" vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmlal.s16 q0, d16, d20\n"
" vmlal.s16 q0, d17, d21\n"
" vmlal.s16 q0, d18, d22\n"
" vmlal.s16 q0, d19, d23\n"
" bne 2b\n"
"3:"
" cmp %[remainder], #0\n"
" beq 5f\n"
"4:"
" vld1.16 {d16}, [%[b]]!\n"
" vld1.16 {d20}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmlal.s16 q0, d16, d20\n"
" bne 4b\n"
"5:"
" vaddl.s32 q0, d0, d1\n"
" vadd.s64 d0, d0, d1\n"
" vqmovn.s64 d0, q0\n"
" vqrshrn.s32 d0, q0, #15\n"
" vmov.s16 %[ret], d0[0]\n"
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "q0",
"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23");
return ret;
}
#endif // !defined(__aarch64__)
#elif defined(FLOATING_POINT)
#if defined(__aarch64__)
static inline int32_t saturate_float_to_16bit(float a) {
int32_t ret;
asm ("fcvtas s1, %s[a]\n"
"sqxtn h1, s1\n"
"sxtl v1.4s, v1.4h\n"
"fmov %w[ret], s1\n"
: [ret] "=r" (ret)
: [a] "w" (a)
: "v1");
return ret;
}
#else
static inline int32_t saturate_float_to_16bit(float a) {
int32_t ret;
asm ("vmov.f32 d0[0], %[a]\n"
"vcvt.s32.f32 d0, d0, #15\n"
"vqrshrn.s32 d0, q0, #15\n"
"vmov.s16 %[ret], d0[0]\n"
: [ret] "=r" (ret)
: [a] "r" (a)
: "q0");
return ret;
}
#endif
#undef WORD2INT
#define WORD2INT(x) (saturate_float_to_16bit(x))
#define OVERRIDE_INNER_PRODUCT_SINGLE
/* Only works when len % 4 == 0 and len >= 4 */
#if defined(__aarch64__)
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %w[len], #0\n"
" b.ne 1f\n"
" ld1 {v16.4s}, [%[b]], #16\n"
" ld1 {v20.4s}, [%[a]], #16\n"
" subs %w[remainder], %w[remainder], #4\n"
" fmul v1.4s, v16.4s, v20.4s\n"
" b.ne 4f\n"
" b 5f\n"
"1:"
" ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
" ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
" subs %w[len], %w[len], #16\n"
" fmul v1.4s, v16.4s, v20.4s\n"
" fmul v2.4s, v17.4s, v21.4s\n"
" fmul v3.4s, v18.4s, v22.4s\n"
" fmul v4.4s, v19.4s, v23.4s\n"
" b.eq 3f\n"
"2:"
" ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
" ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
" subs %w[len], %w[len], #16\n"
" fmla v1.4s, v16.4s, v20.4s\n"
" fmla v2.4s, v17.4s, v21.4s\n"
" fmla v3.4s, v18.4s, v22.4s\n"
" fmla v4.4s, v19.4s, v23.4s\n"
" b.ne 2b\n"
"3:"
" fadd v16.4s, v1.4s, v2.4s\n"
" fadd v17.4s, v3.4s, v4.4s\n"
" cmp %w[remainder], #0\n"
" fadd v1.4s, v16.4s, v17.4s\n"
" b.eq 5f\n"
"4:"
" ld1 {v18.4s}, [%[b]], #16\n"
" ld1 {v22.4s}, [%[a]], #16\n"
" subs %w[remainder], %w[remainder], #4\n"
" fmla v1.4s, v18.4s, v22.4s\n"
" b.ne 4b\n"
"5:"
" faddp v1.4s, v1.4s, v1.4s\n"
" faddp %[ret].4s, v1.4s, v1.4s\n"
: [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+r" (len), [remainder] "+r" (remainder)
:
: "cc", "v1", "v2", "v3", "v4",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
return ret;
}
#else
static inline float inner_product_single(const float *a, const float *b, unsigned int len)
{
float ret;
uint32_t remainder = len % 16;
len = len - remainder;
asm volatile (" cmp %[len], #0\n"
" bne 1f\n"
" vld1.32 {q4}, [%[b]]!\n"
" vld1.32 {q8}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmul.f32 q0, q4, q8\n"
" bne 4f\n"
" b 5f\n"
"1:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmul.f32 q0, q4, q8\n"
" vmul.f32 q1, q5, q9\n"
" vmul.f32 q2, q6, q10\n"
" vmul.f32 q3, q7, q11\n"
" beq 3f\n"
"2:"
" vld1.32 {q4, q5}, [%[b]]!\n"
" vld1.32 {q8, q9}, [%[a]]!\n"
" vld1.32 {q6, q7}, [%[b]]!\n"
" vld1.32 {q10, q11}, [%[a]]!\n"
" subs %[len], %[len], #16\n"
" vmla.f32 q0, q4, q8\n"
" vmla.f32 q1, q5, q9\n"
" vmla.f32 q2, q6, q10\n"
" vmla.f32 q3, q7, q11\n"
" bne 2b\n"
"3:"
" vadd.f32 q4, q0, q1\n"
" vadd.f32 q5, q2, q3\n"
" cmp %[remainder], #0\n"
" vadd.f32 q0, q4, q5\n"
" beq 5f\n"
"4:"
" vld1.32 {q6}, [%[b]]!\n"
" vld1.32 {q10}, [%[a]]!\n"
" subs %[remainder], %[remainder], #4\n"
" vmla.f32 q0, q6, q10\n"
" bne 4b\n"
"5:"
" vadd.f32 d0, d0, d1\n"
" vpadd.f32 d0, d0, d0\n"
" vmov.f32 %[ret], d0[0]\n"
: [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
[len] "+l" (len), [remainder] "+l" (remainder)
:
: "cc", "q0", "q1", "q2", "q3",
"q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
return ret;
}
#endif // defined(__aarch64__)
#endif

View File

@ -9,18 +9,18 @@
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -71,7 +71,7 @@ static inline float interpolate_product_single(const float *a, const float *b, u
return ret;
}
#ifdef _USE_SSE2
#ifdef USE_SSE2
#include <emmintrin.h>
#define OVERRIDE_INNER_PRODUCT_DOUBLE
@ -91,7 +91,7 @@ static inline double inner_product_double(const float *a, const float *b, unsign
sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
}
sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_mm_store_sd(&ret, sum);
return ret;
}
@ -120,7 +120,7 @@ static inline double interpolate_product_double(const float *a, const float *b,
sum1 = _mm_mul_pd(f1, sum1);
sum2 = _mm_mul_pd(f2, sum2);
sum = _mm_add_pd(sum1, sum2);
sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
_mm_store_sd(&ret, sum);
return ret;
}

View File

@ -52,6 +52,10 @@ The algorithm implemented here is described in:
#include <math.h>
#include <stdlib.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#define ALLPASS_ORDER 20
struct SpeexDecorrState_ {
@ -171,7 +175,6 @@ EXPORT void speex_decorrelate(SpeexDecorrState *st, const spx_int16_t *in, spx_i
buff[i+st->frame_size] = in[i*st->channels+ch];
x = buff+st->frame_size;
beta = 1.-.3*amount*amount;
if (amount>1)
beta = 1-sqrt(.4*amount);
else

View File

@ -0,0 +1,93 @@
/* Copyright (C) 2007 Jean-Marc Valin
File: testresample2.c
Testing the resampling code
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "speex/speex_resampler.h"
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define PERIOD 32
#define INBLOCK 1024
#define RATE 48000
int main()
{
spx_uint32_t i;
float *fin, *fout;
int rate = 1000, off = 0, avail = INBLOCK;
SpeexResamplerState *st = speex_resampler_init(1, RATE, RATE, 4, NULL);
speex_resampler_set_rate(st, RATE, rate);
speex_resampler_skip_zeros(st);
fin = malloc(INBLOCK*2*sizeof(float));
for (i=0; i<INBLOCK*2;i++)
fin[i] = sinf ((float)i/PERIOD * 2 * M_PI) * 0.9;
fout = malloc(INBLOCK*4*sizeof(float));
while (1)
{
spx_uint32_t in_len;
spx_uint32_t out_len;
in_len = avail;
out_len = (in_len * rate + RATE-1) / RATE;
fprintf (stderr, "%d %d %d %d -> ", rate, off, in_len, out_len);
speex_resampler_process_float(st, 0, fin + off, &in_len, fout, &out_len);
fprintf (stderr, "%d %d\n", in_len, out_len);
off += in_len;
avail = avail - in_len + INBLOCK;
if (off >= INBLOCK)
off -= INBLOCK;
fwrite(fout, sizeof(float), out_len, stdout);
rate += 100;
if (rate > 128000)
break;
speex_resampler_set_rate(st, RATE, rate);
}
speex_resampler_destroy(st);
free(fin);
free(fout);
return 0;
}