Closed #589: Update Speex AEC to the latest version to get multichannel EC

git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@6129 74dad513-b988-da41-8d7b-12977e46ad98
2020-01-09 09:05:50 +00:00 · 2020-01-09 09:05:50 +00:00 · e98fe43314
parent 3147bc0235
commit e98fe43314
25 changed files with 1467 additions and 547 deletions
--- a/pjmedia/src/pjmedia/echo_common.c
+++ b/pjmedia/src/pjmedia/echo_common.c
@ -99,7 +99,7 @@ static struct ec_operations echo_supp_op =
 #if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0
 static struct ec_operations speex_aec_op = 
 {
-    "AEC",
+    "Speex AEC",
    &speex_aec_create,
    &speex_aec_destroy,
    &speex_aec_reset,
--- a/pjmedia/src/pjmedia/echo_speex.c
+++ b/pjmedia/src/pjmedia/echo_speex.c
@ -35,10 +35,12 @@
 typedef struct speex_ec
 {
    SpeexEchoState	 *state;
-    SpeexPreprocessState *preprocess;
+    SpeexDecorrState 	 *decorr;
+    SpeexPreprocessState **preprocess;

    unsigned		  samples_per_frame;
-    unsigned		  prefetch;
+    unsigned		  channel_count;
+    unsigned		  spf_per_channel;
    unsigned		  options;
    pj_int16_t		 *tmp_frame;
 } speex_ec;
@ -57,18 +59,20 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
 				     void **p_echo )
 {
    speex_ec *echo;
-    int sampling_rate;
+    int i, sampling_rate;

    *p_echo = NULL;

    echo = PJ_POOL_ZALLOC_T(pool, speex_ec);
    PJ_ASSERT_RETURN(echo != NULL, PJ_ENOMEM);

+    echo->channel_count = channel_count;
    echo->samples_per_frame = samples_per_frame;
+    echo->spf_per_channel = samples_per_frame / channel_count;
    echo->options = options;

-#if 0
-    echo->state = speex_echo_state_init_mc(echo->samples_per_frame,
+#if 1
+    echo->state = speex_echo_state_init_mc(echo->spf_per_channel,
 					   clock_rate * tail_ms / 1000,
 					   channel_count, channel_count);
 #else
@ -79,57 +83,63 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
    echo->state = speex_echo_state_init(echo->samples_per_frame,
    					clock_rate * tail_ms / 1000);
 #endif
-    if (echo->state == NULL) {
+    if (echo->state == NULL)
+	return PJ_ENOMEM;
+
+    echo->decorr = speex_decorrelate_new(clock_rate, channel_count,
+    					 echo->spf_per_channel);
+    if (echo->decorr == NULL)
 	return PJ_ENOMEM;
-    }

    /* Set sampling rate */
    sampling_rate = clock_rate;
    speex_echo_ctl(echo->state, SPEEX_ECHO_SET_SAMPLING_RATE, 
 		   &sampling_rate);

-    echo->preprocess = speex_preprocess_state_init(echo->samples_per_frame,
-						   clock_rate);
-    if (echo->preprocess == NULL) {
-	speex_echo_state_destroy(echo->state);
-	return PJ_ENOMEM;
-    }
-
-    /* Disable all preprocessing, we only want echo cancellation */
-#if 0
-    disabled = 0;
-    enabled = 1;
-    speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DENOISE, 
-			 &enabled);
-    speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_AGC, 
-			 &disabled);
-    speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_VAD, 
-			 &disabled);
-    speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DEREVERB, 
-			 &enabled);
-#endif
-
-    /* Enable/disable AGC & denoise */
-    {
+    /* We need to create one state per channel processed. */
+    echo->preprocess = PJ_POOL_ZALLOC_T(pool, SpeexPreprocessState *);
+    for (i = 0; i < channel_count; i++) {
 	spx_int32_t enabled;

+    	echo->preprocess[i] = speex_preprocess_state_init(
+    				  echo->spf_per_channel, clock_rate);
+    	if (echo->preprocess[i] == NULL) {
+    	    speex_aec_destroy(echo);
+	    return PJ_ENOMEM;
+	}
+
+    	/* Enable/disable AGC & denoise */
 	enabled = PJMEDIA_SPEEX_AEC_USE_AGC;
-	speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_AGC, 
+	speex_preprocess_ctl(echo->preprocess[i], SPEEX_PREPROCESS_SET_AGC,
 			     &enabled);

 	enabled = PJMEDIA_SPEEX_AEC_USE_DENOISE;
-	speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DENOISE, 
+	speex_preprocess_ctl(echo->preprocess[i],
+			     SPEEX_PREPROCESS_SET_DENOISE, &enabled);
+
+    	/* Currently, VAD and dereverb are set at default setting. */
+    	/*
+    	enabled = 1;
+    	speex_preprocess_ctl(echo->preprocess[i], SPEEX_PREPROCESS_SET_VAD,
 			     &enabled);
+    	speex_preprocess_ctl(echo->preprocess[i],
+    			     SPEEX_PREPROCESS_SET_DEREVERB,
+			     &enabled);
+	*/
+
+    	/* Control echo cancellation in the preprocessor */
+   	speex_preprocess_ctl(echo->preprocess[i],
+   			     SPEEX_PREPROCESS_SET_ECHO_STATE, echo->state);
    }

-    /* Control echo cancellation in the preprocessor */
-   speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, 
-			echo->state);
-
-
    /* Create temporary frame for echo cancellation */
-    echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2*samples_per_frame);
-    PJ_ASSERT_RETURN(echo->tmp_frame != NULL, PJ_ENOMEM);
+    echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, sizeof(pj_int16_t) *
+    						   channel_count *
+    						   samples_per_frame);
+    if (!echo->tmp_frame) {
+    	speex_aec_destroy(echo);
+	return PJ_ENOMEM;
+    }

    /* Done */
    *p_echo = echo;
@ -144,6 +154,7 @@ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
 PJ_DEF(pj_status_t) speex_aec_destroy(void *state )
 {
    speex_ec *echo = (speex_ec*) state;
+    unsigned i;

    PJ_ASSERT_RETURN(echo && echo->state, PJ_EINVAL);

@ -152,8 +163,18 @@ PJ_DEF(pj_status_t) speex_aec_destroy(void *state )
 	echo->state = NULL;
    }

+    if (echo->decorr) {
+	speex_decorrelate_destroy(echo->decorr);
+	echo->decorr = NULL;
+    }
+
    if (echo->preprocess) {
-	speex_preprocess_state_destroy(echo->preprocess);
+        for (i = 0; i < echo->channel_count; i++) {
+	    if (echo->preprocess[i]) {
+	    	speex_preprocess_state_destroy(echo->preprocess[i]);
+	    	echo->preprocess[i] = NULL;
+	    }
+	}
 	echo->preprocess = NULL;
    }

@ -181,6 +202,7 @@ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state,
 					   void *reserved )
 {
    speex_ec *echo = (speex_ec*) state;
+    unsigned i;

    /* Sanity checks */
    PJ_ASSERT_RETURN(echo && rec_frm && play_frm && options==0 &&
@ -192,8 +214,27 @@ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state,
 			    (spx_int16_t*)echo->tmp_frame);


-    /* Preprocess output */
-    speex_preprocess_run(echo->preprocess, (spx_int16_t*)echo->tmp_frame);
+    /* Preprocess output per channel */
+    for (i = 0; i < echo->channel_count; i++) {
+    	spx_int16_t *buf = (spx_int16_t*)echo->tmp_frame;
+    	unsigned j;
+
+        /* De-interleave each channel. */
+        if (echo->channel_count > 1) {
+    	    for (j = 0; j < echo->spf_per_channel; j++) {
+    	        rec_frm[j] = echo->tmp_frame[j * echo->channel_count + i];
+    	    }
+    	    buf = (spx_int16_t*)rec_frm;
+    	}
+
+	speex_preprocess_run(echo->preprocess[i], buf);
+
+        if (echo->channel_count > 1) {
+    	    for (j = 0; j < echo->spf_per_channel; j++) {
+    	        echo->tmp_frame[j * echo->channel_count + i] = rec_frm[j];
+    	    }
+    	}
+    }

    /* Copy temporary buffer back to original rec_frm */
    pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame);
@ -213,6 +254,15 @@ PJ_DEF(pj_status_t) speex_aec_playback( void *state,
    /* Sanity checks */
    PJ_ASSERT_RETURN(echo && play_frm, PJ_EINVAL);

+    /* Channel decorrelation algorithm is useful for multi-channel echo
+     * cancellation only .
+     */
+    if (echo->channel_count > 1) {
+    	pjmedia_copy_samples(echo->tmp_frame, play_frm, echo->samples_per_frame);
+    	speex_decorrelate(echo->decorr, (spx_int16_t*)echo->tmp_frame,
+    		      	  (spx_int16_t*)play_frm, 100);
+    }
+
    speex_echo_playback(echo->state, (spx_int16_t*)play_frm);

    return PJ_SUCCESS;
@ -227,6 +277,7 @@ PJ_DEF(pj_status_t) speex_aec_capture( void *state,
 				       unsigned options )
 {
    speex_ec *echo = (speex_ec*) state;
+    unsigned i;

    /* Sanity checks */
    PJ_ASSERT_RETURN(echo && rec_frm, PJ_EINVAL);
@ -234,13 +285,33 @@ PJ_DEF(pj_status_t) speex_aec_capture( void *state,
    PJ_UNUSED_ARG(options);

    /* Cancel echo */
-    pjmedia_copy_samples(echo->tmp_frame, rec_frm, echo->samples_per_frame);
    speex_echo_capture(echo->state,
-		       (spx_int16_t*)echo->tmp_frame,
-		       (spx_int16_t*)rec_frm);
+		       (spx_int16_t*)rec_frm,
+		       (spx_int16_t*)echo->tmp_frame);

-    /* Apply preprocessing */
-    speex_preprocess_run(echo->preprocess, (spx_int16_t*)rec_frm);
+    /* Apply preprocessing per channel. */
+    for (i = 0; i < echo->channel_count; i++) {
+    	spx_int16_t *buf = (spx_int16_t*)echo->tmp_frame;
+    	unsigned j;
+
+        /* De-interleave each channel. */
+        if (echo->channel_count > 1) {
+    	    for (j = 0; j < echo->spf_per_channel; j++) {
+    	        rec_frm[j] = echo->tmp_frame[j * echo->channel_count + i];
+    	    }
+    	    buf = (spx_int16_t*)rec_frm;
+    	}
+
+	speex_preprocess_run(echo->preprocess[i], buf);
+
+        if (echo->channel_count > 1) {
+    	    for (j = 0; j < echo->spf_per_channel; j++) {
+    	        echo->tmp_frame[j * echo->channel_count + i] = rec_frm[j];
+    	    }
+    	}
+    }
+
+    pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame);

    return PJ_SUCCESS;
 }
--- a/third_party/build/speex/Makefile
+++ b/third_party/build/speex/Makefile
@ -39,7 +39,7 @@ export SPEEX_OBJS = 	bits.o cb_search.o exc_10_16_table.o  \
 			nb_celp.o preprocess.o \
 			quant_lsp.o resample.o sb_celp.o smallft.o \
 			speex.o speex_callbacks.o speex_header.o \
-			stereo.o vbr.o vq.o window.o
+			scal.o stereo.o vbr.o vq.o window.o

 export SPEEX_CFLAGS = -DHAVE_CONFIG_H $(_CFLAGS)
 export SPEEX_LDFLAGS := $(PJLIB_LDLIB) $(_LDFLAGS)
--- a/third_party/speex/include/speex/speex_buffer.h
+++ b/third_party/speex/include/speex/speex_buffer.h
@ -34,7 +34,7 @@
 #ifndef SPEEX_BUFFER_H
 #define SPEEX_BUFFER_H

-#include "speex/speex_types.h"
+#include "speexdsp_types.h"

 #ifdef __cplusplus
 extern "C" {
--- a/third_party/speex/include/speex/speex_echo.h
+++ b/third_party/speex/include/speex/speex_echo.h
@ -37,7 +37,7 @@
 *  This is the acoustic echo canceller module.
 *  @{
 */
-#include "speex/speex_types.h"
+#include "speexdsp_types.h"

 #ifdef __cplusplus
 extern "C" {
--- a/third_party/speex/include/speex/speex_jitter.h
+++ b/third_party/speex/include/speex/speex_jitter.h
@ -41,7 +41,7 @@
 *  @{
 */

-#include "speex/speex_types.h"
+#include "speexdsp_types.h"

 #ifdef __cplusplus
 extern "C" {
--- a/third_party/speex/include/speex/speex_preprocess.h
+++ b/third_party/speex/include/speex/speex_preprocess.h
@ -43,7 +43,7 @@
 *  @{
 */

-#include "speex/speex_types.h"
+#include "speexdsp_types.h"

 #ifdef __cplusplus
 extern "C" {
--- a/third_party/speex/include/speex/speex_resampler.h
+++ b/third_party/speex/include/speex/speex_resampler.h
@ -82,9 +82,11 @@
 #define spx_uint16_t unsigned short
 #define spx_uint32_t unsigned int
      
+#define speex_assert(cond)
+
 #else /* OUTSIDE_SPEEX */

-#include "speex/speex_types.h"
+#include "speexdsp_types.h"

 #endif /* OUTSIDE_SPEEX */

@ -104,6 +106,7 @@ enum {
   RESAMPLER_ERR_BAD_STATE       = 2,
   RESAMPLER_ERR_INVALID_ARG     = 3,
   RESAMPLER_ERR_PTR_OVERLAP     = 4,
+   RESAMPLER_ERR_OVERFLOW        = 5,
   
   RESAMPLER_ERR_MAX_ERROR
 };
@ -302,12 +305,12 @@ void speex_resampler_set_output_stride(SpeexResamplerState *st,
 void speex_resampler_get_output_stride(SpeexResamplerState *st, 
                                      spx_uint32_t *stride);

-/** Get the latency in input samples introduced by the resampler.
+/** Get the latency introduced by the resampler measured in input samples.
 * @param st Resampler state
 */
 int speex_resampler_get_input_latency(SpeexResamplerState *st);

-/** Get the latency in output samples introduced by the resampler.
+/** Get the latency introduced by the resampler measured in output samples.
 * @param st Resampler state
 */
 int speex_resampler_get_output_latency(SpeexResamplerState *st);
--- a/third_party/speex/include/speex/speexdsp_types.h
+++ b/third_party/speex/include/speex/speexdsp_types.h
@ -0,0 +1,126 @@
+/* speexdsp_types.h taken from libogg */
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: #ifdef jail to whip a few platforms into the UNIX ideal.
+ last mod: $Id$
+
+ ********************************************************************/
+/**
+   @file speexdsp_types.h
+   @brief Speex types
+*/
+#ifndef _SPEEX_TYPES_H
+#define _SPEEX_TYPES_H
+
+#if defined(_WIN32)
+
+#  if defined(__CYGWIN__)
+#    include <_G_config.h>
+     typedef _G_int32_t spx_int32_t;
+     typedef _G_uint32_t spx_uint32_t;
+     typedef _G_int16_t spx_int16_t;
+     typedef _G_uint16_t spx_uint16_t;
+#  elif defined(__MINGW32__)
+     typedef short spx_int16_t;
+     typedef unsigned short spx_uint16_t;
+     typedef int spx_int32_t;
+     typedef unsigned int spx_uint32_t;
+#  elif defined(__MWERKS__)
+     typedef int spx_int32_t;
+     typedef unsigned int spx_uint32_t;
+     typedef short spx_int16_t;
+     typedef unsigned short spx_uint16_t;
+#  else
+     /* MSVC/Borland */
+     typedef __int32 spx_int32_t;
+     typedef unsigned __int32 spx_uint32_t;
+     typedef __int16 spx_int16_t;
+     typedef unsigned __int16 spx_uint16_t;
+#  endif
+
+#elif defined(__MACOS__)
+
+#  include <sys/types.h>
+   typedef SInt16 spx_int16_t;
+   typedef UInt16 spx_uint16_t;
+   typedef SInt32 spx_int32_t;
+   typedef UInt32 spx_uint32_t;
+
+#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */
+
+#  include <sys/types.h>
+   typedef int16_t spx_int16_t;
+   typedef u_int16_t spx_uint16_t;
+   typedef int32_t spx_int32_t;
+   typedef u_int32_t spx_uint32_t;
+
+#elif defined(__BEOS__)
+
+   /* Be */
+#  include <inttypes.h>
+   typedef int16_t spx_int16_t;
+   typedef u_int16_t spx_uint16_t;
+   typedef int32_t spx_int32_t;
+   typedef u_int32_t spx_uint32_t;
+
+#elif defined (__EMX__)
+
+   /* OS/2 GCC */
+   typedef short spx_int16_t;
+   typedef unsigned short spx_uint16_t;
+   typedef int spx_int32_t;
+   typedef unsigned int spx_uint32_t;
+
+#elif defined (DJGPP)
+
+   /* DJGPP */
+   typedef short spx_int16_t;
+   typedef int spx_int32_t;
+   typedef unsigned int spx_uint32_t;
+
+#elif defined(R5900)
+
+   /* PS2 EE */
+   typedef int spx_int32_t;
+   typedef unsigned spx_uint32_t;
+   typedef short spx_int16_t;
+
+#elif defined(__SYMBIAN32__)
+
+   /* Symbian GCC */
+   typedef signed short spx_int16_t;
+   typedef unsigned short spx_uint16_t;
+   typedef signed int spx_int32_t;
+   typedef unsigned int spx_uint32_t;
+
+#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)
+
+   typedef short spx_int16_t;
+   typedef unsigned short spx_uint16_t;
+   typedef long spx_int32_t;
+   typedef unsigned long spx_uint32_t;
+
+#elif defined(CONFIG_TI_C6X)
+
+   typedef short spx_int16_t;
+   typedef unsigned short spx_uint16_t;
+   typedef int spx_int32_t;
+   typedef unsigned int spx_uint32_t;
+
+#else
+
+#include "speexdsp_config_types.h"
+
+#endif
+
+#endif  /* _SPEEX_TYPES_H */
--- a/third_party/speex/libspeex/arch.h
+++ b/third_party/speex/libspeex/arch.h
@ -7,18 +7,18 @@
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -49,7 +49,7 @@
 #ifdef FLOATING_POINT
 #error You cannot compile as floating point and fixed point at the same time
 #endif
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #error SSE is only for floating-point
 #endif
 #if ((defined (ARM4_ASM)||defined (ARM4_ASM)) && defined(BFIN_ASM)) || (defined (ARM4_ASM)&&defined(ARM5E_ASM))
@ -75,7 +75,7 @@
 #endif

 #ifndef OUTSIDE_SPEEX
-#include "speex/speex_types.h"
+#include "speex/speexdsp_types.h"
 #endif

 #define ABS(x) ((x) < 0 ? (-(x)) : (x))      /**< Absolute integer value. */
@ -89,7 +89,7 @@
 #ifdef FIXED_POINT

 typedef spx_int16_t spx_word16_t;
-typedef spx_int32_t   spx_word32_t;
+typedef spx_int32_t spx_word32_t;
 typedef spx_word32_t spx_mem_t;
 typedef spx_word16_t spx_coef_t;
 typedef spx_word16_t spx_lsp_t;
@ -109,6 +109,8 @@ typedef spx_word32_t spx_sig_t;
 #define SIG_SHIFT    14
 #define GAIN_SHIFT   6

+#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
+
 #define VERY_SMALL 0
 #define VERY_LARGE32 ((spx_word32_t)2147483647)
 #define VERY_LARGE16 ((spx_word16_t)32767)
@ -171,6 +173,7 @@ typedef float spx_word32_t;
 #define VSHR32(a,shift) (a)
 #define SATURATE16(x,a) (x)
 #define SATURATE32(x,a) (x)
+#define SATURATE32PSHR(x,shift,a) (x)

 #define PSHR(a,shift)       (a)
 #define SHR(a,shift)       (a)
@ -210,18 +213,19 @@ typedef float spx_word32_t;
 #define DIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))
 #define PDIV32(a,b)     (((spx_word32_t)(a))/(spx_word32_t)(b))

-
+#define WORD2INT(x) ((x) < -32767.5f ? -32768 : \
+                    ((x) > 32766.5f ? 32767 : (spx_int16_t)floor(.5 + (x))))
 #endif


 #if defined (CONFIG_TI_C54X) || defined (CONFIG_TI_C55X)

 /* 2 on TI C5x DSP */
-#define BYTES_PER_CHAR 2 
+#define BYTES_PER_CHAR 2
 #define BITS_PER_CHAR 16
 #define LOG2_BITS_PER_CHAR 4

-#else 
+#else

 #define BYTES_PER_CHAR 1
 #define BITS_PER_CHAR 8
--- a/third_party/speex/libspeex/bfin.h
+++ b/third_party/speex/libspeex/bfin.h
@ -0,0 +1,15 @@
+/* Common Blackfin assembly defines
+ *
+ * Copyright (C) 2005-2009 Analog Devices
+ */
+
+#if __GNUC__ <= 3
+/* GCC-3.4 and older did not use hardware loops and thus did not have
+ * register constraints for declaring clobbers.
+ */
+# define BFIN_HWLOOP0_REGS
+# define BFIN_HWLOOP1_REGS
+#else
+# define BFIN_HWLOOP0_REGS , "LB0", "LT0", "LC0"
+# define BFIN_HWLOOP1_REGS , "LB1", "LT1", "LC1"
+#endif
--- a/third_party/speex/libspeex/buffer.c
+++ b/third_party/speex/libspeex/buffer.c
@ -1,5 +1,5 @@
 /* Copyright (C) 2007 Jean-Marc Valin
-      
+
   File: buffer.c
   This is a very simple ring buffer implementation. It is not thread-safe
   so you need to do your own locking.
@ -38,7 +38,7 @@

 #include "os_support.h"
 #include "arch.h"
-#include <speex/speex_buffer.h>
+#include "speex/speex_buffer.h"

 struct SpeexBuffer_ {
   char *data;
@ -99,7 +99,7 @@ EXPORT int speex_buffer_write(SpeexBuffer *st, void *_data, int len)

 EXPORT int speex_buffer_writezeros(SpeexBuffer *st, int len)
 {
-   /* This is almost the same as for speex_buffer_write() but using 
+   /* This is almost the same as for speex_buffer_write() but using
   SPEEX_MEMSET() instead of SPEEX_COPY(). Update accordingly. */
   int end;
   int end1;
@ -135,7 +135,7 @@ EXPORT int speex_buffer_read(SpeexBuffer *st, void *_data, int len)
   char *data = _data;
   if (len > st->available)
   {
-      SPEEX_MEMSET(data+st->available, 0, st->size-st->available);
+      SPEEX_MEMSET(data+st->available, 0, len - st->available);
      len = st->available;
   }
   end = st->read_ptr + len;
--- a/third_party/speex/libspeex/fftwrap.c
+++ b/third_party/speex/libspeex/fftwrap.c
@ -1,23 +1,23 @@
-/* Copyright (C) 2005-2006 Jean-Marc Valin 
+/* Copyright (C) 2005-2006 Jean-Marc Valin
   File: fftwrap.c

-   Wrapper for various FFTs 
+   Wrapper for various FFTs

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -62,7 +62,7 @@ static int maximize_range(spx_word16_t *in, spx_word16_t *out, spx_word16_t boun
   for (i=0;i<len;i++)
   {
      out[i] = SHL16(in[i], shift);
-   }   
+   }
   return shift;
 }

@ -165,6 +165,57 @@ void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
  DftiComputeBackward(t->desc, in, out);
 }

+#elif defined(USE_INTEL_IPP)
+
+#include <ipps.h>
+
+struct ipp_fft_config
+{
+  IppsDFTSpec_R_32f *dftSpec;
+  Ipp8u *buffer;
+};
+
+void *spx_fft_init(int size)
+{
+  int bufferSize = 0;
+  int hint;
+  struct ipp_fft_config *table;
+
+  table = (struct ipp_fft_config *)speex_alloc(sizeof(struct ipp_fft_config));
+
+  /* there appears to be no performance difference between ippAlgHintFast and
+     ippAlgHintAccurate when using the with the floating point version
+     of the fft. */
+  hint = ippAlgHintAccurate;
+
+  ippsDFTInitAlloc_R_32f(&table->dftSpec, size, IPP_FFT_DIV_FWD_BY_N, hint);
+
+  ippsDFTGetBufSize_R_32f(table->dftSpec, &bufferSize);
+  table->buffer = ippsMalloc_8u(bufferSize);
+
+  return table;
+}
+
+void spx_fft_destroy(void *table)
+{
+  struct ipp_fft_config *t = (struct ipp_fft_config *)table;
+  ippsFree(t->buffer);
+  ippsDFTFree_R_32f(t->dftSpec);
+  speex_free(t);
+}
+
+void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out)
+{
+  struct ipp_fft_config *t = (struct ipp_fft_config *)table;
+  ippsDFTFwd_RToPack_32f(in, out, t->dftSpec, t->buffer);
+}
+
+void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
+{
+  struct ipp_fft_config *t = (struct ipp_fft_config *)table;
+  ippsDFTInv_PackToR_32f(in, out, t->dftSpec, t->buffer);
+}
+
 #elif defined(USE_GPL_FFTW3)

 #include <fftw3.h>
@ -219,7 +270,7 @@ void spx_fft(void *table, spx_word16_t *in, spx_word16_t *out)
    out[i] = optr[i+1];
 }

-void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out) 
+void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
 {
  int i;
  struct fftw_config *t = (struct fftw_config *) table;
@ -234,7 +285,7 @@ void spx_ifft(void *table, spx_word16_t *in, spx_word16_t *out)
  iptr[N+1] = 0.0f;

  fftwf_execute(t->ifft);
-  
+
  for(i=0;i<N;++i)
    out[i] = optr[i];
 }
--- a/third_party/speex/libspeex/fixed_bfin.h
+++ b/third_party/speex/libspeex/fixed_bfin.h
@ -8,18 +8,18 @@
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -36,6 +36,8 @@
 #ifndef FIXED_BFIN_H
 #define FIXED_BFIN_H

+#include "bfin.h"
+
 #undef PDIV32_16
 static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
 {
@ -57,7 +59,7 @@ static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
         "%0 = R0;\n\t"
   : "=m" (res)
   : "m" (a), "m" (bb)
-   : "P0", "R0", "R1", "cc");
+   : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
   return res;
 }

@ -66,9 +68,9 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
 {
   spx_word32_t res, bb;
   bb = b;
-   /* Make the roundinf consistent with the C version 
+   /* Make the roundinf consistent with the C version
      (do we need to do that?)*/
-   if (a<0) 
+   if (a<0)
      a += (b-1);
   __asm__  (
         "P0 = 15;\n\t"
@ -84,7 +86,7 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
         "%0 = R0;\n\t"
   : "=m" (res)
   : "m" (a), "m" (bb)
-   : "P0", "R0", "R1", "cc");
+   : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
   return res;
 }

@ -98,6 +100,7 @@ static inline spx_word16_t MAX16(spx_word16_t a, spx_word16_t b)
         "%0 = MAX(%1,%2);"
   : "=d" (res)
   : "%d" (a), "d" (b)
+   : "ASTAT"
   );
   return res;
 }
@ -113,7 +116,7 @@ static inline spx_word32_t MULT16_32_Q15(spx_word16_t a, spx_word32_t b)
         "%0 = (A1 += %2.L*%1.H) ;\n\t"
   : "=&W" (res), "=&d" (b)
   : "d" (a), "1" (b)
-   : "A1"
+   : "A1", "ASTAT"
   );
   return res;
 }
@ -130,7 +133,7 @@ static inline spx_word32_t MAC16_32_Q15(spx_word32_t c, spx_word16_t a, spx_word
         "%0 = %0 + %4;\n\t"
   : "=&W" (res), "=&d" (b)
   : "d" (a), "1" (b), "d" (c)
-   : "A1"
+   : "A1", "ASTAT"
         );
   return res;
 }
@ -147,7 +150,7 @@ static inline spx_word32_t MULT16_32_Q14(spx_word16_t a, spx_word32_t b)
         "%0 = (A1 += %1.L*%2.H);\n\t"
   : "=W" (res), "=d" (a), "=d" (b)
   : "1" (a), "2" (b)
-   : "A1"
+   : "A1", "ASTAT"
         );
   return res;
 }
@ -165,7 +168,7 @@ static inline spx_word32_t MAC16_32_Q14(spx_word32_t c, spx_word16_t a, spx_word
         "%0 = %0 + %4;\n\t"
   : "=&W" (res), "=&d" (b)
   : "d" (a), "1" (b), "d" (c)
-   : "A1"
+   : "A1", "ASTAT"
         );
   return res;
 }
--- a/third_party/speex/libspeex/fixed_generic.h
+++ b/third_party/speex/libspeex/fixed_generic.h
@ -7,18 +7,18 @@
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -52,6 +52,10 @@
 #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
 #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))

+#define SATURATE32PSHR(x,shift,a) (((x)>=(SHL32(a,shift))) ? (a) : \
+                                   (x)<=-(SHL32(a,shift)) ? -(a) : \
+                                   (PSHR32(x, shift)))
+
 #define SHR(a,shift) ((a) >> (shift))
 #define SHL(a,shift) ((spx_word32_t)(a) << (shift))
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
--- a/third_party/speex/libspeex/jitter.c
+++ b/third_party/speex/libspeex/jitter.c
@ -55,8 +55,6 @@ TODO:


 #include "arch.h"
-#include <speex/speex.h>
-#include <speex/speex_bits.h>
 #include <speex/speex_jitter.h>
 #include "os_support.h"

@ -466,7 +464,6 @@ EXPORT int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, s
 {
   int i;
   unsigned int j;
-   int incomplete = 0;
   spx_int16_t opt;
   
   if (start_offset != NULL)
@ -571,7 +568,6 @@ EXPORT int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, s
      if (found)
      {
         i=besti;
-         incomplete = 1;
         /*fprintf (stderr, "incomplete: %d %d %d %d\n", jitter->packets[i].timestamp, jitter->pointer_timestamp, chunk_size, jitter->packets[i].span);*/
      }
   }
--- a/third_party/speex/libspeex/math_approx.h
+++ b/third_party/speex/libspeex/math_approx.h
@ -7,18 +7,18 @@
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -168,11 +168,11 @@ static inline spx_word16_t spx_acos(spx_word16_t x)
      x = NEG16(x);
   }
   x = SUB16(16384,x);
-   
+
   x = x >> 1;
   sq = MULT16_16_Q13(x, ADD16(A1, MULT16_16_Q13(x, ADD16(A2, MULT16_16_Q13(x, (A3))))));
   ret = spx_sqrt(SHL32(EXTEND32(sq),13));
-   
+
   /*ret = spx_sqrt(67108864*(-1.6129e-04 + 2.0104e+00*f + 2.7373e-01*f*f + 1.8136e-01*f*f*f));*/
   if (s)
      ret = SUB16(25736,ret);
@ -208,7 +208,7 @@ static inline spx_word16_t spx_cos(spx_word16_t x)
 static inline spx_word16_t _spx_cos_pi_2(spx_word16_t x)
 {
   spx_word16_t x2;
-   
+
   x2 = MULT16_16_P15(x,x);
   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2))))))));
 }
--- a/third_party/speex/libspeex/mdf.c
+++ b/third_party/speex/libspeex/mdf.c
@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2006 Jean-Marc Valin
+/* Copyright (C) 2003-2008 Jean-Marc Valin

   File: mdf.c
   Echo canceller based on the MDF algorithm (see below)
@ -33,36 +33,36 @@
 /*
   The echo canceller is based on the MDF algorithm described in:

-   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 
-   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 
+   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
+   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
   February 1990.
-   
-   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 
+
+   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
   double-talk is achieved using a variable learning rate as described in:
-   
-   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 
+
+   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
   Cancellation With Double-Talk. IEEE Transactions on Audio,
   Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
   http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
-   
+
   There is no explicit double-talk detection, but a continuous variation
   in the learning rate based on residual echo, double-talk and background
   noise.
-   
+
   About the fixed-point version:
-   All the signals are represented with 16-bit words. The filter weights 
+   All the signals are represented with 16-bit words. The filter weights
   are represented with 32-bit words, but only the top 16 bits are used
   in most cases. The lower 16 bits are completely unreliable (due to the
   fact that the update is done only on the top bits), but help in the
   adaptation -- probably by removing a "threshold effect" due to
   quantization (rounding going to zero) when the gradient is small.
-   
+
   Another kludge that seems to work good: when performing the weight
   update, we only move half the way toward the "goal" this seems to
   reduce the effect of quantization noise in the update phase. This
   can be seen as applying a gradient descent on a "soft constraint"
   instead of having a hard constraint.
-   
+
 */

 #ifdef HAVE_CONFIG_H
@ -131,13 +131,15 @@ struct SpeexEchoState_ {
   int adapted;
   int saturated;
   int screwed_up;
+   int C;                    /** Number of input channels (microphones) */
+   int K;                    /** Number of output channels (loudspeakers) */
   spx_int32_t sampling_rate;
   spx_word16_t spec_average;
   spx_word16_t beta0;
   spx_word16_t beta_max;
   spx_word32_t sum_adapt;
   spx_word16_t leak_estimate;
-   
+
   spx_word16_t *e;      /* scratch */
   spx_word16_t *x;      /* Far-end input buffer (2N) */
   spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
@ -171,10 +173,10 @@ struct SpeexEchoState_ {
   spx_word16_t *window;
   spx_word16_t *prop;
   void *fft_table;
-   spx_word16_t memX, memD, memE;
+   spx_word16_t *memX, *memD, *memE;
   spx_word16_t preemph;
   spx_word16_t notch_radius;
-   spx_mem_t notch_mem[2];
+   spx_mem_t *notch_mem;

   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
   spx_int16_t *play_buf;
@ -182,7 +184,7 @@ struct SpeexEchoState_ {
   int play_buf_started;
 };

-static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
+static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
 {
   int i;
   spx_word16_t den2;
@ -190,11 +192,11 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius,
   den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
 #else
   den2 = radius*radius + .7*(1-radius)*(1-radius);
-#endif   
+#endif
   /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
   for (i=0;i<len;i++)
   {
-      spx_word16_t vin = in[i];
+      spx_word16_t vin = in[i*stride];
      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
 #ifdef FIXED_POINT
      mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
@ -234,6 +236,18 @@ static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N
   ps[j]=MULT16_16(X[i],X[i]);
 }

+/** Compute power spectrum of a half-complex (packed) vector and accumulate */
+static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
+{
+   int i, j;
+   ps[0]+=MULT16_16(X[0],X[0]);
+   for (i=1,j=1;i<N-1;i+=2,j++)
+   {
+      ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
+   }
+   ps[j]+=MULT16_16(X[i],X[i]);
+}
+
 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 #ifdef FIXED_POINT
 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
@ -330,16 +344,17 @@ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_fl
   prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
 }

-static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)
+static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
 {
-   int i, j;
+   int i, j, p;
   spx_word16_t max_sum = 1;
   spx_word32_t prop_sum = 1;
   for (i=0;i<M;i++)
   {
      spx_word32_t tmp = 1;
-      for (j=0;j<N;j++)
-         tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18)));
+      for (p=0;p<P;p++)
+         for (j=0;j<N;j++)
+            tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
 #ifdef FIXED_POINT
      /* Just a security in case an overflow were to occur */
      tmp = MIN32(ABS32(tmp), 536870912);
@ -378,11 +393,20 @@ static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const sp
 #endif

 /** Creates a new echo canceller state */
-SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
+EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 {
-   int i,N,M;
+   return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
+}
+
+EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
+{
+   int i,N,M, C, K;
   SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));

+   st->K = nb_speakers;
+   st->C = nb_mic;
+   C=st->C;
+   K=st->K;
 #ifdef DUMP_ECHO_CANCEL_DATA
   if (rFile || pFile || oFile)
      speex_fatal("Opening dump files twice");
@ -390,7 +414,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
   pFile = fopen("aec_play.sw", "wb");
   oFile = fopen("aec_out.sw", "wb");
 #endif
-   
+
   st->frame_size = frame_size;
   st->window_size = 2*frame_size;
   N = st->window_size;
@ -412,24 +436,24 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
   st->leak_estimate = 0;

   st->fft_table = spx_fft_init(N);
-   
-   st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t));
-   st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+
+   st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
+   st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
+   st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
+   st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
+   st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
   st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
   st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
   st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));

-   st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t));
-   st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
+   st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
+   st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
+   st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
+   st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
 #ifdef TWO_PATH
-   st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
+   st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
 #endif
   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
   st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
@ -450,7 +474,7 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 #endif
   for (i=0;i<=st->frame_size;i++)
      st->power_1[i] = FLOAT_ONE;
-   for (i=0;i<N*M;i++)
+   for (i=0;i<N*M*K*C;i++)
      st->W[i] = 0;
   {
      spx_word32_t sum = 0;
@ -465,11 +489,13 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
      }
      for (i=M-1;i>=0;i--)
      {
-         st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum);
+         st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
      }
   }
-   
-   st->memX=st->memD=st->memE=0;
+
+   st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
+   st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
+   st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
   st->preemph = QCONST16(.9,15);
   if (st->sampling_rate<12000)
      st->notch_radius = QCONST16(.9, 15);
@ -478,30 +504,32 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
   else
      st->notch_radius = QCONST16(.992, 15);

-   st->notch_mem[0] = st->notch_mem[1] = 0;
+   st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
   st->adapted = 0;
   st->Pey = st->Pyy = FLOAT_ONE;
-   
+
 #ifdef TWO_PATH
   st->Davg1 = st->Davg2 = 0;
   st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 #endif
-   
-   st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
+
+   st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
   st->play_buf_started = 0;
-   
+
   return st;
 }

 /** Resets echo canceller state */
-void speex_echo_state_reset(SpeexEchoState *st)
+EXPORT void speex_echo_state_reset(SpeexEchoState *st)
 {
-   int i, M, N;
+   int i, M, N, C, K;
   st->cancel_count=0;
   st->screwed_up = 0;
   N = st->window_size;
   M = st->M;
+   C=st->C;
+   K=st->K;
   for (i=0;i<N*M;i++)
      st->W[i] = 0;
 #ifdef TWO_PATH
@ -521,13 +549,20 @@ void speex_echo_state_reset(SpeexEchoState *st)
   {
      st->last_y[i] = 0;
   }
-   for (i=0;i<N;i++)
+   for (i=0;i<N*C;i++)
   {
      st->E[i] = 0;
+   }
+   for (i=0;i<N*K;i++)
+   {
      st->x[i] = 0;
   }
-   st->notch_mem[0] = st->notch_mem[1] = 0;
-   st->memX=st->memD=st->memE=0;
+   for (i=0;i<2*C;i++)
+      st->notch_mem[i] = 0;
+   for (i=0;i<C;i++)
+      st->memD[i]=st->memE[i]=0;
+   for (i=0;i<K;i++)
+      st->memX[i]=0;

   st->saturated = 0;
   st->adapted = 0;
@ -545,7 +580,7 @@ void speex_echo_state_reset(SpeexEchoState *st)
 }

 /** Destroys an echo canceller state */
-void speex_echo_state_destroy(SpeexEchoState *st)
+EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
 {
   spx_fft_destroy(st->fft_table);

@ -576,9 +611,14 @@ void speex_echo_state_destroy(SpeexEchoState *st)
 #ifdef FIXED_POINT
   speex_free(st->wtmp2);
 #endif
+   speex_free(st->memX);
+   speex_free(st->memD);
+   speex_free(st->memE);
+   speex_free(st->notch_mem);
+
   speex_free(st->play_buf);
   speex_free(st);
-   
+
 #ifdef DUMP_ECHO_CANCEL_DATA
   fclose(rFile);
   fclose(pFile);
@ -587,7 +627,7 @@ void speex_echo_state_destroy(SpeexEchoState *st)
 #endif
 }

-void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
+EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
 {
   int i;
   /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
@ -610,7 +650,7 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t
   }
 }

-void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
+EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
 {
   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
   if (!st->play_buf_started)
@ -637,16 +677,16 @@ void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
 }

 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
-void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
+EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
 {
   speex_echo_cancellation(st, in, far_end, out);
 }

 /** Performs echo cancellation on a frame */
-void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
+EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
 {
-   int i,j;
-   int N,M;
+   int i,j, chan, speak;
+   int N,M, C, K;
   spx_word32_t Syy,See,Sxx,Sdd, Sff;
 #ifdef TWO_PATH
   spx_word32_t Dbf;
@ -658,9 +698,12 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   spx_float_t alpha, alpha_1;
   spx_word16_t RER;
   spx_word32_t tmp32;
-   
+
   N = st->window_size;
   M = st->M;
+   C = st->C;
+   K = st->K;
+
   st->cancel_count++;
 #ifdef FIXED_POINT
   ss=DIV32_16(11469,M);
@ -670,157 +713,198 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   ss_1 = 1-ss;
 #endif

-   /* Apply a notch filter to make sure DC doesn't end up causing problems */
-   filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
-   /* Copy input data to buffer and apply pre-emphasis */
-   for (i=0;i<st->frame_size;i++)
+   for (chan = 0; chan < C; chan++)
   {
-      spx_word32_t tmp32;
-      tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
+      /* Apply a notch filter to make sure DC doesn't end up causing problems */
+      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
+      /* Copy input data to buffer and apply pre-emphasis */
+      /* Copy input data to buffer */
+      for (i=0;i<st->frame_size;i++)
+      {
+         spx_word32_t tmp32;
+         /* FIXME: This core has changed a bit, need to merge properly */
+         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
 #ifdef FIXED_POINT
-      /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
-      if (tmp32 > 32767)
-      {
-         tmp32 = 32767;
-         st->saturated = M+1;
-      }
-      if (tmp32 < -32767)
-      {
-         tmp32 = -32767;
-         st->saturated = M+1;
-      }      
+         if (tmp32 > 32767)
+         {
+            tmp32 = 32767;
+            if (st->saturated == 0)
+               st->saturated = 1;
+         }
+         if (tmp32 < -32767)
+         {
+            tmp32 = -32767;
+            if (st->saturated == 0)
+               st->saturated = 1;
+         }
 #endif
-      st->x[i+st->frame_size] = EXTRACT16(tmp32);
-      st->memX = far_end[i];
-      
-      tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
-#ifdef FIXED_POINT
-      if (tmp32 > 32767)
-      {
-         tmp32 = 32767;
-         if (st->saturated == 0)
-            st->saturated = 1;
-      }      
-      if (tmp32 < -32767)
-      {
-         tmp32 = -32767;
-         if (st->saturated == 0)
-            st->saturated = 1;
+         st->memD[chan] = st->input[chan*st->frame_size+i];
+         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
      }
-#endif
-      st->memD = st->input[i];
-      st->input[i] = tmp32;
   }

-   /* Shift memory: this could be optimized eventually*/
-   for (j=M-1;j>=0;j--)
+   for (speak = 0; speak < K; speak++)
   {
-      for (i=0;i<N;i++)
-         st->X[(j+1)*N+i] = st->X[j*N+i];
+      for (i=0;i<st->frame_size;i++)
+      {
+         spx_word32_t tmp32;
+         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
+         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
+#ifdef FIXED_POINT
+         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
+         if (tmp32 > 32767)
+         {
+            tmp32 = 32767;
+            st->saturated = M+1;
+         }
+         if (tmp32 < -32767)
+         {
+            tmp32 = -32767;
+            st->saturated = M+1;
+         }
+#endif
+         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
+         st->memX[speak] = far_end[i*K+speak];
+      }
   }

-   /* Convert x (far end) to frequency domain */
-   spx_fft(st->fft_table, st->x, &st->X[0]);
-   for (i=0;i<N;i++)
-      st->last_y[i] = st->x[i];
-   Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
-   for (i=0;i<st->frame_size;i++)
-      st->x[i] = st->x[i+st->frame_size];
-   /* From here on, the top part of x is used as scratch space */
-   
+   for (speak = 0; speak < K; speak++)
+   {
+      /* Shift memory: this could be optimized eventually*/
+      for (j=M-1;j>=0;j--)
+      {
+         for (i=0;i<N;i++)
+            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
+      }
+      /* Convert x (echo input) to frequency domain */
+      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
+   }
+
+   Sxx = 0;
+   for (speak = 0; speak < K; speak++)
+   {
+      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
+      power_spectrum_accum(st->X+speak*N, st->Xf, N);
+   }
+
+   Sff = 0;
+   for (chan = 0; chan < C; chan++)
+   {
 #ifdef TWO_PATH
-   /* Compute foreground filter */
-   spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);   
-   spx_ifft(st->fft_table, st->Y, st->e);
-   for (i=0;i<st->frame_size;i++)
-      st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]);
-   Sff = mdf_inner_prod(st->e, st->e, st->frame_size);
+      /* Compute foreground filter */
+      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
+      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
+      for (i=0;i<st->frame_size;i++)
+         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
+      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
 #endif
-   
+   }
+
   /* Adjust proportional adaption rate */
-   mdf_adjust_prop (st->W, N, M, st->prop);
+   /* FIXME: Adjust that for C, K*/
+   if (st->adapted)
+      mdf_adjust_prop (st->W, N, M, C*K, st->prop);
   /* Compute weight gradient */
   if (st->saturated == 0)
   {
-      for (j=M-1;j>=0;j--)
+      for (chan = 0; chan < C; chan++)
      {
-         weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N);
-         for (i=0;i<N;i++)
-            st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]);
-         
+         for (speak = 0; speak < K; speak++)
+         {
+            for (j=M-1;j>=0;j--)
+            {
+               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
+               for (i=0;i<N;i++)
+                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
+            }
+         }
      }
   } else {
      st->saturated--;
   }
-   
+
+   /* FIXME: MC conversion required */
   /* Update weight to prevent circular convolution (MDF / AUMDF) */
-   for (j=0;j<M;j++)
+   for (chan = 0; chan < C; chan++)
   {
-      /* This is a variant of the Alternatively Updated MDF (AUMDF) */
-      /* Remove the "if" to make this an MDF filter */
-      if (j==0 || st->cancel_count%(M-1) == j-1)
+      for (speak = 0; speak < K; speak++)
      {
+         for (j=0;j<M;j++)
+         {
+            /* This is a variant of the Alternatively Updated MDF (AUMDF) */
+            /* Remove the "if" to make this an MDF filter */
+            if (j==0 || st->cancel_count%(M-1) == j-1)
+            {
 #ifdef FIXED_POINT
-         for (i=0;i<N;i++)
-            st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
-         spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
-         for (i=0;i<st->frame_size;i++)
-         {
-            st->wtmp[i]=0;
-         }
-         for (i=st->frame_size;i<N;i++)
-         {
-            st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
-         }
-         spx_fft(st->fft_table, st->wtmp, st->wtmp2);
-         /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
-         for (i=0;i<N;i++)
-            st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
+               for (i=0;i<N;i++)
+                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
+               spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
+               for (i=0;i<st->frame_size;i++)
+               {
+                  st->wtmp[i]=0;
+               }
+               for (i=st->frame_size;i<N;i++)
+               {
+                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
+               }
+               spx_fft(st->fft_table, st->wtmp, st->wtmp2);
+               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
+               for (i=0;i<N;i++)
+                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
 #else
-         spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
-         for (i=st->frame_size;i<N;i++)
-         {
-            st->wtmp[i]=0;
-         }
-         spx_fft(st->fft_table, st->wtmp, &st->W[j*N]);
+               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
+               for (i=st->frame_size;i<N;i++)
+               {
+                  st->wtmp[i]=0;
+               }
+               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
 #endif
+            }
+         }
      }
   }

-   /* Compute filter response Y */
-   spectral_mul_accum(st->X, st->W, st->Y, N, M);
-   spx_ifft(st->fft_table, st->Y, st->y);
+   /* So we can use power_spectrum_accum */
+   for (i=0;i<=st->frame_size;i++)
+      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;

+   Dbf = 0;
+   See = 0;
 #ifdef TWO_PATH
   /* Difference in response, this is used to estimate the variance of our residual power estimate */
-   for (i=0;i<st->frame_size;i++)
-      st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]);
-   Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size);
+   for (chan = 0; chan < C; chan++)
+   {
+      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
+      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
+      for (i=0;i<st->frame_size;i++)
+         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
+      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
+      for (i=0;i<st->frame_size;i++)
+         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
+      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
+   }
 #endif

-   for (i=0;i<st->frame_size;i++)
-      st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
-   See = mdf_inner_prod(st->e, st->e, st->frame_size);
 #ifndef TWO_PATH
   Sff = See;
 #endif

 #ifdef TWO_PATH
   /* Logic for updating the foreground filter */
-   
+
   /* For two time windows, compute the mean of the energy difference, as well as the variance */
   st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
   st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
   st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
   st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
-   
+
   /* Equivalent float code:
   st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
   st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
   st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
   st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
   */
-   
+
   update_foreground = 0;
   /* Check if we have a statistically significant reduction in the residual echo */
   /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
@ -830,18 +914,19 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
      update_foreground = 1;
   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
      update_foreground = 1;
-   
+
   /* Do we update? */
   if (update_foreground)
   {
      st->Davg1 = st->Davg2 = 0;
      st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
      /* Copy background filter to foreground filter */
-      for (i=0;i<N*M;i++)
+      for (i=0;i<N*M*C*K;i++)
         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
      /* Apply a smooth transition so as to not introduce blocking artifacts */
-      for (i=0;i<st->frame_size;i++)
-         st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
+      for (chan = 0; chan < C; chan++)
+         for (i=0;i<st->frame_size;i++)
+            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
   } else {
      int reset_background=0;
      /* Otherwise, check if the background filter is significantly worse */
@ -854,13 +939,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
      if (reset_background)
      {
         /* Copy foreground filter to background filter */
-         for (i=0;i<N*M;i++)
+         for (i=0;i<N*M*C*K;i++)
            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
         /* We also need to copy the output so as to get correct adaptation */
-         for (i=0;i<st->frame_size;i++)
-            st->y[i+st->frame_size] = st->e[i+st->frame_size];
-         for (i=0;i<st->frame_size;i++)
-            st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
+         for (chan = 0; chan < C; chan++)
+         {
+            for (i=0;i<st->frame_size;i++)
+               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
+            for (i=0;i<st->frame_size;i++)
+               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
+         }
         See = Sff;
         st->Davg1 = st->Davg2 = 0;
         st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
@ -868,50 +956,60 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   }
 #endif

-   /* Compute error signal (for the output with de-emphasis) */ 
-   for (i=0;i<st->frame_size;i++)
+   Sey = Syy = Sdd = 0;
+   for (chan = 0; chan < C; chan++)
   {
-      spx_word32_t tmp_out;
-#ifdef TWO_PATH
-      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size]));
-#else
-      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size]));
-#endif
-      /* Saturation */
-      if (tmp_out>32767)
-         tmp_out = 32767;
-      else if (tmp_out<-32768)
-         tmp_out = -32768;
-      tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
-      /* This is an arbitrary test for saturation in the microphone signal */
-      if (in[i] <= -32000 || in[i] >= 32000)
+      /* Compute error signal (for the output with de-emphasis) */
+      for (i=0;i<st->frame_size;i++)
      {
-         tmp_out = 0;
+         spx_word32_t tmp_out;
+#ifdef TWO_PATH
+         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
+#else
+         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
+#endif
+         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
+      /* This is an arbitrary test for saturation in the microphone signal */
+         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
+         {
         if (st->saturated == 0)
            st->saturated = 1;
+         }
+         out[i*C+chan] = WORD2INT(tmp_out);
+         st->memE[chan] = tmp_out;
      }
-      out[i] = (spx_int16_t)tmp_out;
-      st->memE = tmp_out;
-   }
-   
+
 #ifdef DUMP_ECHO_CANCEL_DATA
-   dump_audio(in, far_end, out, st->frame_size);
+      dump_audio(in, far_end, out, st->frame_size);
 #endif
-   
-   /* Compute error signal (filter update version) */ 
-   for (i=0;i<st->frame_size;i++)
-   {
-      st->e[i+st->frame_size] = st->e[i];
-      st->e[i] = 0;
+
+      /* Compute error signal (filter update version) */
+      for (i=0;i<st->frame_size;i++)
+      {
+         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
+         st->e[chan*N+i] = 0;
+      }
+
+      /* Compute a bunch of correlations */
+      /* FIXME: bad merge */
+      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
+      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
+      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
+
+      /* Convert error to frequency domain */
+      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
+      for (i=0;i<st->frame_size;i++)
+         st->y[i+chan*N] = 0;
+      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
+
+      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
+      power_spectrum_accum(st->E+chan*N, st->Rf, N);
+      power_spectrum_accum(st->Y+chan*N, st->Yf, N);
+
   }

-   /* Compute a bunch of correlations */
-   Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
-   Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
-   Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
-   
   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
-   
+
   /* Do some sanity check */
   if (!(Syy>=0 && Sxx>=0 && See >= 0)
 #ifndef FIXED_POINT
@ -921,7 +1019,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   {
      /* Things have gone really bad */
      st->screwed_up += 50;
-      for (i=0;i<st->frame_size;i++)
+      for (i=0;i<st->frame_size*C;i++)
         out[i] = 0;
   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
   {
@ -941,35 +1039,16 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   /* Add a small noise floor to make sure not to have problems when dividing */
   See = MAX32(See, SHR32(MULT16_16(N, 100),6));

-   /* Convert error to frequency domain */
-   spx_fft(st->fft_table, st->e, st->E);
-   for (i=0;i<st->frame_size;i++)
-      st->y[i] = 0;
-   spx_fft(st->fft_table, st->y, st->Y);
+   for (speak = 0; speak < K; speak++)
+   {
+      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
+      power_spectrum_accum(st->X+speak*N, st->Xf, N);
+   }
+

-   /* Compute power spectrum of far end (X), error (E) and filter response (Y) */
-   power_spectrum(st->E, st->Rf, N);
-   power_spectrum(st->Y, st->Yf, N);
-   power_spectrum(st->X, st->Xf, N);
-   
   /* Smooth far end energy estimate over time */
   for (j=0;j<=st->frame_size;j++)
      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
-   
-   /* Enable this to compute the power based only on the tail (would need to compute more 
-      efficiently to make this really useful */
-   if (0)
-   {
-      float scale2 = .5f/M;
-      for (j=0;j<=st->frame_size;j++)
-         st->power[j] = 100;
-      for (i=0;i<M;i++)
-      {
-         power_spectrum(&st->X[i*N], st->Xf, N);
-         for (j=0;j<=st->frame_size;j++)
-            st->power[j] += scale2*st->Xf[j];
-      }
-   }

   /* Compute filtered spectra and (cross-)correlations */
   for (j=st->frame_size;j>=0;j--)
@ -987,7 +1066,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
      st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
 #endif
   }
-   
+
   Pyy = FLOAT_SQRT(Pyy);
   Pey = FLOAT_DIVU(Pey,Pyy);

@ -1015,7 +1094,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
   else
      st->leak_estimate = SHL16(st->leak_estimate,1);
   /*printf ("%f\n", st->leak_estimate);*/
-   
+
   /* Compute Residual to Error Ratio */
 #ifdef FIXED_POINT
   tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
@ -1071,7 +1150,7 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
      /* Temporary adaption rate if filter is not yet adapted enough */
      spx_word16_t adapt_rate=0;

-      if (Sxx > SHR32(MULT16_16(N, 1000),6)) 
+      if (Sxx > SHR32(MULT16_16(N, 1000),6))
      {
         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
 #ifdef FIXED_POINT
@ -1091,12 +1170,12 @@ void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const sp
      st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
   }

-   /* Save residual echo so it can be used by the nonlinear processor */
+   /* FIXME: MC conversion required */
+      for (i=0;i<st->frame_size;i++)
+         st->last_y[i] = st->last_y[st->frame_size+i];
   if (st->adapted)
   {
      /* If the filter is adapted, take the filtered echo */
-      for (i=0;i<st->frame_size;i++)
-         st->last_y[i] = st->last_y[st->frame_size+i];
      for (i=0;i<st->frame_size;i++)
         st->last_y[st->frame_size+i] = in[i]-out[i];
   } else {
@ -1113,17 +1192,17 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
   int i;
   spx_word16_t leak2;
   int N;
-   
+
   N = st->window_size;

   /* Apply hanning window (should pre-compute it)*/
   for (i=0;i<N;i++)
      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
-      
+
   /* Compute power spectrum of the echo */
   spx_fft(st->fft_table, st->y, st->Y);
   power_spectrum(st->Y, residual_echo, N);
-      
+
 #ifdef FIXED_POINT
   if (st->leak_estimate > 16383)
      leak2 = 32767;
@ -1138,14 +1217,14 @@ void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, in
   /* Estimate residual echo */
   for (i=0;i<=st->frame_size;i++)
      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
-   
+
 }

-int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
+EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
 {
   switch(request)
   {
-      
+
      case SPEEX_ECHO_GET_FRAME_SIZE:
         (*(int*)ptr) = st->frame_size;
         break;
@ -1169,6 +1248,29 @@ int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
      case SPEEX_ECHO_GET_SAMPLING_RATE:
         (*(int*)ptr) = st->sampling_rate;
         break;
+      case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
+         /*FIXME: Implement this for multiple channels */
+         *((spx_int32_t *)ptr) = st->M * st->frame_size;
+         break;
+      case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
+      {
+         int M = st->M, N = st->window_size, n = st->frame_size, i, j;
+         spx_int32_t *filt = (spx_int32_t *) ptr;
+         for(j=0;j<M;j++)
+         {
+            /*FIXME: Implement this for multiple channels */
+#ifdef FIXED_POINT
+            for (i=0;i<N;i++)
+               st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
+            spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
+#else
+            spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
+#endif
+            for(i=0;i<n;i++)
+               filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
+         }
+      }
+         break;
      default:
         speex_warning_int("Unknown speex_echo_ctl request: ", request);
         return -1;
--- a/third_party/speex/libspeex/misc_bfin.h
+++ b/third_party/speex/libspeex/misc_bfin.h
@ -1,25 +1,25 @@
 /* Copyright (C) 2005 Analog Devices */
 /**
   @file misc_bfin.h
-   @author Jean-Marc Valin 
+   @author Jean-Marc Valin
   @brief Various compatibility routines for Speex (Blackfin version)
 */
 /*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -33,6 +33,8 @@
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#include "bfin.h"
+
 #define OVERRIDE_SPEEX_MOVE
 void *speex_move (void *dest, void *src, int n)
 {
@ -48,7 +50,7 @@ void *speex_move (void *dest, void *src, int n)
         "[%1++] = R0;\n\t"
   : "=a" (src), "=a" (dest)
   : "a" ((n>>2)-1), "0" (src), "1" (dest)
-   : "R0", "I0", "L0", "memory"
+   : "R0", "I0", "L0", "memory" BFIN_HWLOOP0_REGS
         );
   return dest;
 }
--- a/third_party/speex/libspeex/os_support.h
+++ b/third_party/speex/libspeex/os_support.h
@ -1,5 +1,5 @@
 /* Copyright (C) 2007 Jean-Marc Valin
-      
+
   File: os_support.h
   This is the (tiny) OS abstraction layer. Aside from math.h, this is the
   only place where system headers are allowed.
@ -45,12 +45,12 @@
 #include "os_support_custom.h"
 #endif

-/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free 
+/** Speex wrapper for calloc. To do your own dynamic allocation, all you need to do is replace this function, speex_realloc and speex_free
    NOTE: speex_alloc needs to CLEAR THE MEMORY */
 #ifndef OVERRIDE_SPEEX_ALLOC
 static inline void *speex_alloc (int size)
 {
-   /* WARNING: this is not equivalent to malloc(). If you want to use malloc() 
+   /* WARNING: this is not equivalent to malloc(). If you want to use malloc()
      or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise
      you will experience strange bugs */
   return calloc(size,1);
@ -90,18 +90,18 @@ static inline void speex_free_scratch (void *ptr)
 }
 #endif

-/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking  */
+/** Copy n elements from src to dst. The 0* term provides compile-time type checking  */
 #ifndef OVERRIDE_SPEEX_COPY
 #define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
 #endif

-/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term 
+/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
    provides compile-time type checking */
 #ifndef OVERRIDE_SPEEX_MOVE
 #define SPEEX_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
 #endif

-/** Set n bytes of memory to value of c, starting at address s */
+/** For n elements worth of memory, set every byte to the value of c, starting at address dst */
 #ifndef OVERRIDE_SPEEX_MEMSET
 #define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n)*sizeof(*(dst))))
 #endif
--- a/third_party/speex/libspeex/resample.c
+++ b/third_party/speex/libspeex/resample.c
@ -1,6 +1,6 @@
 /* Copyright (C) 2007-2008 Jean-Marc Valin
   Copyright (C) 2008      Thorvald Natvig
-      
+
   File: resample.c
   Arbitrary resampling code

@ -38,22 +38,22 @@
      - Low memory requirement
      - Good *perceptual* quality (and not best SNR)

-   Warning: This resampler is relatively new. Although I think I got rid of 
+   Warning: This resampler is relatively new. Although I think I got rid of
   all the major bugs and I don't expect the API to change anymore, there
   may be something I've missed. So use with caution.

   This algorithm is based on this original resampling algorithm:
   Smith, Julius O. Digital Audio Resampling Home Page
-   Center for Computer Research in Music and Acoustics (CCRMA), 
+   Center for Computer Research in Music and Acoustics (CCRMA),
   Stanford University, 2007.
-   Web published at http://www-ccrma.stanford.edu/~jos/resample/.
+   Web published at https://ccrma.stanford.edu/~jos/resample/.

-   There is one main difference, though. This resampler uses cubic 
+   There is one main difference, though. This resampler uses cubic
   interpolation instead of linear interpolation in the above paper. This
   makes the table much smaller and makes it possible to compute that table
-   on a per-stream basis. In turn, being able to tweak the table for each 
-   stream makes it possible to both reduce complexity on simple ratios 
-   (e.g. 2/3), and get rid of the rounding operations in the inner loop. 
+   on a per-stream basis. In turn, being able to tweak the table for each
+   stream makes it possible to both reduce complexity on simple ratios
+   (e.g. 2/3), and get rid of the rounding operations in the inner loop.
   The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
 */

@ -63,31 +63,28 @@

 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
-static void *speex_alloc (int size) {return calloc(size,1);}
-static void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);}
-static void speex_free (void *ptr) {free(ptr);}
+static void *speex_alloc(int size) {return calloc(size,1);}
+static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
+static void speex_free(void *ptr) {free(ptr);}
+#ifndef EXPORT
+#define EXPORT
+#endif
 #include "speex_resampler.h"
 #include "arch.h"
 #else /* OUTSIDE_SPEEX */
-               
+
 #include "speex/speex_resampler.h"
 #include "arch.h"
 #include "os_support.h"
 #endif /* OUTSIDE_SPEEX */

-#include "stack_alloc.h"
 #include <math.h>
+#include <limits.h>

 #ifndef M_PI
-#define M_PI 3.14159263
+#define M_PI 3.14159265358979323846
 #endif

-#ifdef FIXED_POINT
-#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))  
-#else
-#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))  
-#endif
-               
 #define IMAX(a,b) ((a) > (b) ? (a) : (b))
 #define IMIN(a,b) ((a) < (b) ? (a) : (b))

@ -95,10 +92,18 @@ static void speex_free (void *ptr) {free(ptr);}
 #define NULL 0
 #endif

-#ifdef _USE_SSE
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967295U
+#endif
+
+#ifdef USE_SSE
 #include "resample_sse.h"
 #endif

+#ifdef USE_NEON
+#include "resample_neon.h"
+#endif
+
 /* Numer of elements to allocate on the stack */
 #ifdef VAR_ARRAYS
 #define FIXED_STACK_ALLOC 8192
@ -113,7 +118,7 @@ struct SpeexResamplerState_ {
   spx_uint32_t out_rate;
   spx_uint32_t num_rate;
   spx_uint32_t den_rate;
-   
+
   int    quality;
   spx_uint32_t nb_channels;
   spx_uint32_t filt_len;
@ -125,22 +130,22 @@ struct SpeexResamplerState_ {
   spx_uint32_t oversample;
   int          initialised;
   int          started;
-   
+
   /* These are per-channel */
   spx_int32_t  *last_sample;
   spx_uint32_t *samp_frac_num;
   spx_uint32_t *magic_samples;
-   
+
   spx_word16_t *mem;
   spx_word16_t *sinc_table;
   spx_uint32_t sinc_table_length;
   resampler_basic_func resampler_ptr;
-         
+
   int    in_stride;
   int    out_stride;
 } ;

-static double kaiser12_table[68] = {
+static const double kaiser12_table[68] = {
   0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
   0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014,
   0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601,
@ -154,7 +159,7 @@ static double kaiser12_table[68] = {
   0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
   0.00001000, 0.00000000};
 /*
-static double kaiser12_table[36] = {
+static const double kaiser12_table[36] = {
   0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
   0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762,
   0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274,
@ -162,7 +167,7 @@ static double kaiser12_table[36] = {
   0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291,
   0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000};
 */
-static double kaiser10_table[36] = {
+static const double kaiser10_table[36] = {
   0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446,
   0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347,
   0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
@ -170,15 +175,15 @@ static double kaiser10_table[36] = {
   0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000};

-static double kaiser8_table[36] = {
+static const double kaiser8_table[36] = {
   0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
   0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126,
   0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
   0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
   0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
-   
-static double kaiser6_table[36] = {
+
+static const double kaiser6_table[36] = {
   0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
   0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565,
   0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
@ -187,32 +192,30 @@ static double kaiser6_table[36] = {
   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000};

 struct FuncDef {
-   double *table;
+   const double *table;
   int oversample;
 };
-      
-static struct FuncDef _KAISER12 = {kaiser12_table, 64};
-#define KAISER12 (&_KAISER12)
-/*static struct FuncDef _KAISER12 = {kaiser12_table, 32};
-#define KAISER12 (&_KAISER12)*/
-static struct FuncDef _KAISER10 = {kaiser10_table, 32};
-#define KAISER10 (&_KAISER10)
-static struct FuncDef _KAISER8 = {kaiser8_table, 32};
-#define KAISER8 (&_KAISER8)
-static struct FuncDef _KAISER6 = {kaiser6_table, 32};
-#define KAISER6 (&_KAISER6)
+
+static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
+#define KAISER12 (&kaiser12_funcdef)
+static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
+#define KAISER10 (&kaiser10_funcdef)
+static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
+#define KAISER8 (&kaiser8_funcdef)
+static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
+#define KAISER6 (&kaiser6_funcdef)

 struct QualityMapping {
   int base_length;
   int oversample;
   float downsample_bandwidth;
   float upsample_bandwidth;
-   struct FuncDef *window_func;
+   const struct FuncDef *window_func;
 };


 /* This table maps conversion quality to internal parameters. There are two
-   reasons that explain why the up-sampling bandwidth is larger than the 
+   reasons that explain why the up-sampling bandwidth is larger than the
   down-sampling bandwidth:
   1) When up-sampling, we can assume that the spectrum is already attenuated
      close to the Nyquist rate (from an A/D or a previous resampling filter)
@ -234,11 +237,11 @@ static const struct QualityMapping quality_map[11] = {
   {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
 };
 /*8,24,40,56,80,104,128,160,200,256,320*/
-static double compute_func(float x, struct FuncDef *func)
+static double compute_func(float x, const struct FuncDef *func)
 {
   float y, frac;
   double interp[4];
-   int ind; 
+   int ind;
   y = x*func->oversample;
   ind = (int)floor(y);
   frac = (y-ind);
@ -249,7 +252,7 @@ static double compute_func(float x, struct FuncDef *func)
   interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
   /* Just to make sure we don't have rounding problems */
   interp[1] = 1.f-interp[3]-interp[2]-interp[0];
-   
+
   /*sum = frac*accum[1] + (1-frac)*accum[2];*/
   return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
 }
@ -269,7 +272,7 @@ int main(int argc, char **argv)

 #ifdef FIXED_POINT
 /* The slow way of computing a sinc for the table. Should improve that some day */
-static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
+static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
 {
   /*fprintf (stderr, "%f ", x);*/
   float xx = x * cutoff;
@ -282,7 +285,7 @@ static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_fu
 }
 #else
 /* The slow way of computing a sinc for the table. Should improve that some day */
-static spx_word16_t sinc(float cutoff, float x, int N, struct FuncDef *window_func)
+static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
 {
   /*fprintf (stderr, "%f ", x);*/
   float xx = x * cutoff;
@ -337,28 +340,35 @@ static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t c
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;
   spx_word32_t sum;
-   int j;

   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
   {
-      const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
+      const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
      const spx_word16_t *iptr = & in[last_sample];

 #ifndef OVERRIDE_INNER_PRODUCT_SINGLE
-      float accum[4] = {0,0,0,0};
+      int j;
+      sum = 0;
+      for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);

+/*    This code is slower on most DSPs which have only 2 accumulators.
+      Plus this this forces truncation to 32 bits and you lose the HW guard bits.
+      I think we can trust the compiler and let it vectorize and/or unroll itself.
+      spx_word32_t accum[4] = {0,0,0,0};
      for(j=0;j<N;j+=4) {
-        accum[0] += sinc[j]*iptr[j];
-        accum[1] += sinc[j+1]*iptr[j+1];
-        accum[2] += sinc[j+2]*iptr[j+2];
-        accum[3] += sinc[j+3]*iptr[j+3];
+        accum[0] += MULT16_16(sinct[j], iptr[j]);
+        accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
+        accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
+        accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
      }
      sum = accum[0] + accum[1] + accum[2] + accum[3];
+*/
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
-      sum = inner_product_single(sinc, iptr, N);
+      sum = inner_product_single(sinct, iptr, N);
 #endif

-      out[out_stride * out_sample++] = PSHR32(sum, 15);
+      out[out_stride * out_sample++] = sum;
      last_sample += int_advance;
      samp_frac_num += frac_advance;
      if (samp_frac_num >= den_rate)
@ -388,25 +398,25 @@ static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t c
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;
   double sum;
-   int j;

   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
   {
-      const spx_word16_t *sinc = & sinc_table[samp_frac_num*N];
+      const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
      const spx_word16_t *iptr = & in[last_sample];

 #ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
+      int j;
      double accum[4] = {0,0,0,0};

      for(j=0;j<N;j+=4) {
-        accum[0] += sinc[j]*iptr[j];
-        accum[1] += sinc[j+1]*iptr[j+1];
-        accum[2] += sinc[j+2]*iptr[j+2];
-        accum[3] += sinc[j+3]*iptr[j+3];
+        accum[0] += sinct[j]*iptr[j];
+        accum[1] += sinct[j+1]*iptr[j+1];
+        accum[2] += sinct[j+2]*iptr[j+2];
+        accum[3] += sinct[j+3]*iptr[j+3];
      }
      sum = accum[0] + accum[1] + accum[2] + accum[3];
 #else
-      sum = inner_product_double(sinc, iptr, N);
+      sum = inner_product_double(sinct, iptr, N);
 #endif

      out[out_stride * out_sample++] = PSHR32(sum, 15);
@ -435,7 +445,6 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
   const int int_advance = st->int_advance;
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;
-   int j;
   spx_word32_t sum;

   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -452,6 +461,7 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3


 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
+      int j;
      spx_word32_t accum[4] = {0,0,0,0};

      for(j=0;j<N;j++) {
@ -463,13 +473,14 @@ static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint3
      }

      cubic_coef(frac, interp);
-      sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
+      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
+      sum = SATURATE32PSHR(sum, 15, 32767);
 #else
      cubic_coef(frac, interp);
      sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
-      out[out_stride * out_sample++] = PSHR32(sum,15);
+
+      out[out_stride * out_sample++] = sum;
      last_sample += int_advance;
      samp_frac_num += frac_advance;
      if (samp_frac_num >= den_rate)
@ -497,7 +508,6 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
   const int int_advance = st->int_advance;
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;
-   int j;
   spx_word32_t sum;

   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
@ -514,6 +524,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3


 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
+      int j;
      double accum[4] = {0,0,0,0};

      for(j=0;j<N;j++) {
@ -530,7 +541,7 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
      cubic_coef(frac, interp);
      sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
-      
+
      out[out_stride * out_sample++] = PSHR32(sum,15);
      last_sample += int_advance;
      samp_frac_num += frac_advance;
@ -547,22 +558,71 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
 }
 #endif

-static void update_filter(SpeexResamplerState *st)
+/* This resampler is used to produce zero output in situations where memory
+   for the filter could not be allocated.  The expected numbers of input and
+   output samples are still processed so that callers failing to check error
+   codes are not surprised, possibly getting into infinite loops. */
+static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
 {
-   spx_uint32_t old_length;
-   
-   old_length = st->filt_len;
+   int out_sample = 0;
+   int last_sample = st->last_sample[channel_index];
+   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+   const int out_stride = st->out_stride;
+   const int int_advance = st->int_advance;
+   const int frac_advance = st->frac_advance;
+   const spx_uint32_t den_rate = st->den_rate;
+
+   (void)in;
+   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+   {
+      out[out_stride * out_sample++] = 0;
+      last_sample += int_advance;
+      samp_frac_num += frac_advance;
+      if (samp_frac_num >= den_rate)
+      {
+         samp_frac_num -= den_rate;
+         last_sample++;
+      }
+   }
+
+   st->last_sample[channel_index] = last_sample;
+   st->samp_frac_num[channel_index] = samp_frac_num;
+   return out_sample;
+}
+
+static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
+{
+   spx_uint32_t major = value / den;
+   spx_uint32_t remain = value % den;
+   /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
+   if (remain > UINT32_MAX / num || major > UINT32_MAX / num
+       || major * num > UINT32_MAX - remain * num / den)
+      return RESAMPLER_ERR_OVERFLOW;
+   *result = remain * num / den + major * num;
+   return RESAMPLER_ERR_SUCCESS;
+}
+
+static int update_filter(SpeexResamplerState *st)
+{
+   spx_uint32_t old_length = st->filt_len;
+   spx_uint32_t old_alloc_size = st->mem_alloc_size;
+   int use_direct;
+   spx_uint32_t min_sinc_table_length;
+   spx_uint32_t min_alloc_size;
+
+   st->int_advance = st->num_rate/st->den_rate;
+   st->frac_advance = st->num_rate%st->den_rate;
   st->oversample = quality_map[st->quality].oversample;
   st->filt_len = quality_map[st->quality].base_length;
-   
+
   if (st->num_rate > st->den_rate)
   {
      /* down-sampling */
      st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
-      /* FIXME: divide the numerator and denominator by a certain amount if they're too large */
-      st->filt_len = st->filt_len*st->num_rate / st->den_rate;
-      /* Round down to make sure we have a multiple of 4 */
-      st->filt_len &= (~0x3);
+      if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
+         goto fail;
+      /* Round up to make sure we have a multiple of 8 for SSE */
+      st->filt_len = ((st->filt_len-1)&(~0x7))+8;
      if (2*st->den_rate < st->num_rate)
         st->oversample >>= 1;
      if (4*st->den_rate < st->num_rate)
@ -577,18 +637,37 @@ static void update_filter(SpeexResamplerState *st)
      /* up-sampling */
      st->cutoff = quality_map[st->quality].upsample_bandwidth;
   }
-   
+
+#ifdef RESAMPLE_FULL_SINC_TABLE
+   use_direct = 1;
+   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
+      goto fail;
+#else
   /* Choose the resampling type that requires the least amount of memory */
-   if (st->den_rate <= st->oversample)
+   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
+                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
+#endif
+   if (use_direct)
+   {
+      min_sinc_table_length = st->filt_len*st->den_rate;
+   } else {
+      if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
+         goto fail;
+
+      min_sinc_table_length = st->filt_len*st->oversample+8;
+   }
+   if (st->sinc_table_length < min_sinc_table_length)
+   {
+      spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
+      if (!sinc_table)
+         goto fail;
+
+      st->sinc_table = sinc_table;
+      st->sinc_table_length = min_sinc_table_length;
+   }
+   if (use_direct)
   {
      spx_uint32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->den_rate)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->den_rate;
-      }
      for (i=0;i<st->den_rate;i++)
      {
         spx_int32_t j;
@ -608,13 +687,6 @@ static void update_filter(SpeexResamplerState *st)
      /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
   } else {
      spx_int32_t i;
-      if (!st->sinc_table)
-         st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-      else if (st->sinc_table_length < st->filt_len*st->oversample+8)
-      {
-         st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t));
-         st->sinc_table_length = st->filt_len*st->oversample+8;
-      }
      for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
         st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
 #ifdef FIXED_POINT
@ -627,51 +699,47 @@ static void update_filter(SpeexResamplerState *st)
 #endif
      /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
   }
-   st->int_advance = st->num_rate/st->den_rate;
-   st->frac_advance = st->num_rate%st->den_rate;

-   
   /* Here's the place where we update the filter memory to take into account
      the change in filter length. It's probably the messiest part of the code
      due to handling of lots of corner cases. */
-   if (!st->mem)
+
+   /* Adding buffer_size to filt_len won't overflow here because filt_len
+      could be multiplied by sizeof(spx_word16_t) above. */
+   min_alloc_size = st->filt_len-1 + st->buffer_size;
+   if (min_alloc_size > st->mem_alloc_size)
+   {
+      spx_word16_t *mem;
+      if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
+          goto fail;
+      else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
+          goto fail;
+
+      st->mem = mem;
+      st->mem_alloc_size = min_alloc_size;
+   }
+   if (!st->started)
   {
      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
-         st->mem[i] = 0;
-      /*speex_warning("init filter");*/
-   } else if (!st->started)
-   {
-      spx_uint32_t i;
-      st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-      st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
         st->mem[i] = 0;
      /*speex_warning("reinit filter");*/
   } else if (st->filt_len > old_length)
   {
-      spx_int32_t i;
+      spx_uint32_t i;
      /* Increase the filter length */
      /*speex_warning("increase filter size");*/
-      int old_alloc_size = st->mem_alloc_size;
-      if ((st->filt_len-1 + st->buffer_size) > st->mem_alloc_size)
+      for (i=st->nb_channels;i--;)
      {
-         st->mem_alloc_size = st->filt_len-1 + st->buffer_size;
-         st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*st->mem_alloc_size * sizeof(spx_word16_t));
-      }
-      for (i=st->nb_channels-1;i>=0;i--)
-      {
-         spx_int32_t j;
+         spx_uint32_t j;
         spx_uint32_t olen = old_length;
         /*if (st->magic_samples[i])*/
         {
            /* Try and remove the magic samples as if nothing had happened */
-            
+
            /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
            olen = old_length + 2*st->magic_samples[i];
-            for (j=old_length-2+st->magic_samples[i];j>=0;j--)
+            for (j=old_length-1+st->magic_samples[i];j--;)
               st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
            for (j=0;j<st->magic_samples[i];j++)
               st->mem[i*st->mem_alloc_size+j] = 0;
@ -712,7 +780,15 @@ static void update_filter(SpeexResamplerState *st)
         st->magic_samples[i] += old_magic;
      }
   }
+   return RESAMPLER_ERR_SUCCESS;

+fail:
+   st->resampler_ptr = resampler_basic_zero;
+   /* st->mem may still contain consumed input samples for the filter.
+      Restore filt_len so that filt_len - 1 still points to the position after
+      the last of these samples. */
+   st->filt_len = old_length;
+   return RESAMPLER_ERR_ALLOC_FAILED;
 }

 EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
@ -722,15 +798,22 @@ EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_u

 EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
 {
-   spx_uint32_t i;
   SpeexResamplerState *st;
-   if (quality > 10 || quality < 0)
+   int filter_err;
+
+   if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0)
   {
      if (err)
         *err = RESAMPLER_ERR_INVALID_ARG;
      return NULL;
   }
   st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
+   if (!st)
+   {
+      if (err)
+         *err = RESAMPLER_ERR_ALLOC_FAILED;
+      return NULL;
+   }
   st->initialised = 0;
   st->started = 0;
   st->in_rate = 0;
@ -743,40 +826,43 @@ EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
   st->filt_len = 0;
   st->mem = 0;
   st->resampler_ptr = 0;
-         
+
   st->cutoff = 1.f;
   st->nb_channels = nb_channels;
   st->in_stride = 1;
   st->out_stride = 1;
-   
-#ifdef FIXED_POINT
+
   st->buffer_size = 160;
-#else
-   st->buffer_size = 160;
-#endif
-   
+
   /* Per channel data */
-   st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(int));
-   st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
-   st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(int));
-   for (i=0;i<nb_channels;i++)
-   {
-      st->last_sample[i] = 0;
-      st->magic_samples[i] = 0;
-      st->samp_frac_num[i] = 0;
-   }
+   if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
+      goto fail;
+   if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;
+   if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
+      goto fail;

   speex_resampler_set_quality(st, quality);
   speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);

-   
-   update_filter(st);
-   
-   st->initialised = 1;
+   filter_err = update_filter(st);
+   if (filter_err == RESAMPLER_ERR_SUCCESS)
+   {
+      st->initialised = 1;
+   } else {
+      speex_resampler_destroy(st);
+      st = NULL;
+   }
   if (err)
-      *err = RESAMPLER_ERR_SUCCESS;
+      *err = filter_err;

   return st;
+
+fail:
+   if (err)
+      *err = RESAMPLER_ERR_ALLOC_FAILED;
+   speex_resampler_destroy(st);
+   return NULL;
 }

 EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
@ -796,17 +882,17 @@ static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t
   int out_sample = 0;
   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
   spx_uint32_t ilen;
-   
+
   st->started = 1;
-   
+
   /* Call the right resampler through the function ptr */
   out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
-   
+
   if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
      *in_len = st->last_sample[channel_index];
   *out_len = out_sample;
   st->last_sample[channel_index] -= *in_len;
-   
+
   ilen = *in_len;

   for(j=0;j<N-1;++j)
@ -819,11 +905,11 @@ static int speex_resampler_magic(SpeexResamplerState *st, spx_uint32_t channel_i
   spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
   const int N = st->filt_len;
-   
+
   speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);

   st->magic_samples[channel_index] -= tmp_in_len;
-   
+
   /* If we couldn't process all "magic" input samples, save the rest for next time */
   if (st->magic_samples[channel_index])
   {
@ -849,13 +935,13 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
   const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
   const int istride = st->in_stride;

-   if (st->magic_samples[channel_index]) 
+   if (st->magic_samples[channel_index])
      olen -= speex_resampler_magic(st, channel_index, &out, olen);
   if (! st->magic_samples[channel_index]) {
      while (ilen && olen) {
        spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
        spx_uint32_t ochunk = olen;
- 
+
        if (in) {
           for(j=0;j<ichunk;++j)
              x[j+filt_offs]=in[j*istride];
@ -873,7 +959,7 @@ EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t c
   }
   *in_len -= ilen;
   *out_len -= olen;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }

 #ifdef FIXED_POINT
@ -891,15 +977,14 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
   const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
 #ifdef VAR_ARRAYS
   const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
-   VARDECL(spx_word16_t *ystack);
-   ALLOC(ystack, ylen, spx_word16_t);
+   spx_word16_t ystack[ylen];
 #else
   const unsigned int ylen = FIXED_STACK_ALLOC;
   spx_word16_t ystack[FIXED_STACK_ALLOC];
 #endif

   st->out_stride = 1;
-   
+
   while (ilen && olen) {
     spx_word16_t *y = ystack;
     spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
@ -936,7 +1021,7 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
 #else
        out[j*ostride_save] = WORD2INT(ystack[j]);
 #endif
-     
+
     ilen -= ichunk;
     olen -= ochunk;
     out += (ochunk+omagic) * ostride_save;
@ -947,20 +1032,22 @@ EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t cha
   *in_len -= ilen;
   *out_len -= olen;

-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }

 EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
 {
   spx_uint32_t i;
   int istride_save, ostride_save;
-   spx_uint32_t bak_len = *out_len;
+   spx_uint32_t bak_out_len = *out_len;
+   spx_uint32_t bak_in_len = *in_len;
   istride_save = st->in_stride;
   ostride_save = st->out_stride;
   st->in_stride = st->out_stride = st->nb_channels;
   for (i=0;i<st->nb_channels;i++)
   {
-      *out_len = bak_len;
+      *out_len = bak_out_len;
+      *in_len = bak_in_len;
      if (in != NULL)
         speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len);
      else
@ -968,20 +1055,22 @@ EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, co
   }
   st->in_stride = istride_save;
   st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
-               
+
 EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
 {
   spx_uint32_t i;
   int istride_save, ostride_save;
-   spx_uint32_t bak_len = *out_len;
+   spx_uint32_t bak_out_len = *out_len;
+   spx_uint32_t bak_in_len = *in_len;
   istride_save = st->in_stride;
   ostride_save = st->out_stride;
   st->in_stride = st->out_stride = st->nb_channels;
   for (i=0;i<st->nb_channels;i++)
   {
-      *out_len = bak_len;
+      *out_len = bak_out_len;
+      *in_len = bak_in_len;
      if (in != NULL)
         speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len);
      else
@ -989,7 +1078,7 @@ EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, cons
   }
   st->in_stride = istride_save;
   st->out_stride = ostride_save;
-   return RESAMPLER_ERR_SUCCESS;
+   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }

 EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
@ -1003,42 +1092,55 @@ EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_r
   *out_rate = st->out_rate;
 }

+static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
+{
+   while (b != 0)
+   {
+      spx_uint32_t temp = a;
+
+      a = b;
+      b = temp % b;
+   }
+   return a;
+}
+
 EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
 {
   spx_uint32_t fact;
   spx_uint32_t old_den;
   spx_uint32_t i;
+
+   if (ratio_num == 0 || ratio_den == 0)
+      return RESAMPLER_ERR_INVALID_ARG;
+
   if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
      return RESAMPLER_ERR_SUCCESS;
-   
+
   old_den = st->den_rate;
   st->in_rate = in_rate;
   st->out_rate = out_rate;
   st->num_rate = ratio_num;
   st->den_rate = ratio_den;
-   /* FIXME: This is terribly inefficient, but who cares (at least for now)? */
-   for (fact=2;fact<=IMIN(st->num_rate, st->den_rate);fact++)
-   {
-      while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0))
-      {
-         st->num_rate /= fact;
-         st->den_rate /= fact;
-      }
-   }
-      
+
+   fact = compute_gcd(st->num_rate, st->den_rate);
+
+   st->num_rate /= fact;
+   st->den_rate /= fact;
+
   if (old_den > 0)
   {
      for (i=0;i<st->nb_channels;i++)
      {
-         st->samp_frac_num[i]=st->samp_frac_num[i]*st->den_rate/old_den;
+         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
+            return RESAMPLER_ERR_OVERFLOW;
         /* Safety net */
         if (st->samp_frac_num[i] >= st->den_rate)
            st->samp_frac_num[i] = st->den_rate-1;
      }
   }
-   
+
   if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
   return RESAMPLER_ERR_SUCCESS;
 }

@ -1056,7 +1158,7 @@ EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
      return RESAMPLER_ERR_SUCCESS;
   st->quality = quality;
   if (st->initialised)
-      update_filter(st);
+      return update_filter(st);
   return RESAMPLER_ERR_SUCCESS;
 }

@ -1106,6 +1208,12 @@ EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
 EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
 {
   spx_uint32_t i;
+   for (i=0;i<st->nb_channels;i++)
+   {
+      st->last_sample[i] = 0;
+      st->magic_samples[i] = 0;
+      st->samp_frac_num[i] = 0;
+   }
   for (i=0;i<st->nb_channels*(st->filt_len-1);i++)
      st->mem[i] = 0;
   return RESAMPLER_ERR_SUCCESS;
--- a/third_party/speex/libspeex/resample_neon.h
+++ b/third_party/speex/libspeex/resample_neon.h
@ -0,0 +1,339 @@
+/* Copyright (C) 2007-2008 Jean-Marc Valin
+ * Copyright (C) 2008 Thorvald Natvig
+ * Copyright (C) 2011 Texas Instruments
+ *               author Jyri Sarha
+ */
+/**
+   @file resample_neon.h
+   @brief Resampler functions (NEON version)
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef FIXED_POINT
+#if defined(__aarch64__)
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("fmov s0, %w[a]\n"
+         "sqxtn h0, s0\n"
+         "sxtl v0.4s, v0.4h\n"
+         "fmov %w[ret], s0\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "v0" );
+    return ret;
+}
+#elif defined(__thumb2__)
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("ssat %[ret], #16, %[a]"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : );
+    return ret;
+}
+#else
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("vmov.s32 d0[0], %[a]\n"
+         "vqmovn.s32 d0, q0\n"
+         "vmov.s16 %[ret], d0[0]\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "q0");
+    return ret;
+}
+#endif
+#undef WORD2INT
+#define WORD2INT(x) (saturate_32bit_to_16bit(x))
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+/* Only works when len % 4 == 0 and len >= 4 */
+#if defined(__aarch64__)
+static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+{
+    int32_t ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %w[len], #0\n"
+		  "	 b.ne 1f\n"
+		  "	 ld1 {v16.4h}, [%[b]], #8\n"
+		  "	 ld1 {v20.4h}, [%[a]], #8\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 smull v0.4s, v16.4h, v20.4h\n"
+		  "      b.ne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
+		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 smull v0.4s, v16.4h, v20.4h\n"
+		  "	 smlal v0.4s, v17.4h, v21.4h\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 smlal v0.4s, v19.4h, v23.4h\n"
+		  "	 b.eq 3f\n"
+		  "2:"
+		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
+		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 smlal v0.4s, v16.4h, v20.4h\n"
+		  "	 smlal v0.4s, v17.4h, v21.4h\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 smlal v0.4s, v19.4h, v23.4h\n"
+		  "	 b.ne 2b\n"
+		  "3:"
+		  "	 cmp %w[remainder], #0\n"
+		  "	 b.eq 5f\n"
+		  "4:"
+		  "	 ld1 {v18.4h}, [%[b]], #8\n"
+		  "	 ld1 {v22.4h}, [%[a]], #8\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 b.ne 4b\n"
+		  "5:"
+		  "	 saddlv d0, v0.4s\n"
+		  "	 sqxtn s0, d0\n"
+		  "	 sqrshrn h0, s0, #15\n"
+		  "	 sxtl v0.4s, v0.4h\n"
+		  "	 fmov %w[ret], s0\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "v0",
+		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+    return ret;
+}
+#else
+static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+{
+    int32_t ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %[len], #0\n"
+		  "	 bne 1f\n"
+		  "	 vld1.16 {d16}, [%[b]]!\n"
+		  "	 vld1.16 {d20}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmull.s16 q0, d16, d20\n"
+		  "      beq 5f\n"
+		  "	 b 4f\n"
+		  "1:"
+		  "	 vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
+		  "	 vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmull.s16 q0, d16, d20\n"
+		  "	 vmlal.s16 q0, d17, d21\n"
+		  "	 vmlal.s16 q0, d18, d22\n"
+		  "	 vmlal.s16 q0, d19, d23\n"
+		  "	 beq 3f\n"
+		  "2:"
+		  "	 vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
+		  "	 vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmlal.s16 q0, d16, d20\n"
+		  "	 vmlal.s16 q0, d17, d21\n"
+		  "	 vmlal.s16 q0, d18, d22\n"
+		  "	 vmlal.s16 q0, d19, d23\n"
+		  "	 bne 2b\n"
+		  "3:"
+		  "	 cmp %[remainder], #0\n"
+		  "	 beq 5f\n"
+		  "4:"
+		  "	 vld1.16 {d16}, [%[b]]!\n"
+		  "	 vld1.16 {d20}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmlal.s16 q0, d16, d20\n"
+		  "	 bne 4b\n"
+		  "5:"
+		  "	 vaddl.s32 q0, d0, d1\n"
+		  "	 vadd.s64 d0, d0, d1\n"
+		  "	 vqmovn.s64 d0, q0\n"
+		  "	 vqrshrn.s32 d0, q0, #15\n"
+		  "	 vmov.s16 %[ret], d0[0]\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "q0",
+		    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23");
+
+    return ret;
+}
+#endif  // !defined(__aarch64__)
+
+#elif defined(FLOATING_POINT)
+#if defined(__aarch64__)
+static inline int32_t saturate_float_to_16bit(float a) {
+    int32_t ret;
+    asm ("fcvtas s1, %s[a]\n"
+         "sqxtn h1, s1\n"
+         "sxtl v1.4s, v1.4h\n"
+         "fmov %w[ret], s1\n"
+         : [ret] "=r" (ret)
+         : [a] "w" (a)
+         : "v1");
+    return ret;
+}
+#else
+static inline int32_t saturate_float_to_16bit(float a) {
+    int32_t ret;
+    asm ("vmov.f32 d0[0], %[a]\n"
+         "vcvt.s32.f32 d0, d0, #15\n"
+         "vqrshrn.s32 d0, q0, #15\n"
+         "vmov.s16 %[ret], d0[0]\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "q0");
+    return ret;
+}
+#endif
+
+#undef WORD2INT
+#define WORD2INT(x) (saturate_float_to_16bit(x))
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+/* Only works when len % 4 == 0 and len >= 4 */
+#if defined(__aarch64__)
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+    float ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %w[len], #0\n"
+		  "	 b.ne 1f\n"
+		  "	 ld1 {v16.4s}, [%[b]], #16\n"
+		  "	 ld1 {v20.4s}, [%[a]], #16\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 fmul v1.4s, v16.4s, v20.4s\n"
+		  "      b.ne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
+		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 fmul v1.4s, v16.4s, v20.4s\n"
+		  "	 fmul v2.4s, v17.4s, v21.4s\n"
+		  "	 fmul v3.4s, v18.4s, v22.4s\n"
+		  "	 fmul v4.4s, v19.4s, v23.4s\n"
+		  "	 b.eq 3f\n"
+		  "2:"
+		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
+		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 fmla v1.4s, v16.4s, v20.4s\n"
+		  "	 fmla v2.4s, v17.4s, v21.4s\n"
+		  "	 fmla v3.4s, v18.4s, v22.4s\n"
+		  "	 fmla v4.4s, v19.4s, v23.4s\n"
+		  "	 b.ne 2b\n"
+		  "3:"
+		  "	 fadd v16.4s, v1.4s, v2.4s\n"
+		  "	 fadd v17.4s, v3.4s, v4.4s\n"
+		  "	 cmp %w[remainder], #0\n"
+		  "	 fadd v1.4s, v16.4s, v17.4s\n"
+		  "	 b.eq 5f\n"
+		  "4:"
+		  "	 ld1 {v18.4s}, [%[b]], #16\n"
+		  "	 ld1 {v22.4s}, [%[a]], #16\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 fmla v1.4s, v18.4s, v22.4s\n"
+		  "	 b.ne 4b\n"
+		  "5:"
+		  "	 faddp v1.4s, v1.4s, v1.4s\n"
+		  "	 faddp %[ret].4s, v1.4s, v1.4s\n"
+		  : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "v1", "v2", "v3", "v4",
+		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+    return ret;
+}
+#else
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+    float ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %[len], #0\n"
+		  "	 bne 1f\n"
+		  "	 vld1.32 {q4}, [%[b]]!\n"
+		  "	 vld1.32 {q8}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmul.f32 q0, q4, q8\n"
+		  "      bne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 vld1.32 {q4, q5}, [%[b]]!\n"
+		  "	 vld1.32 {q8, q9}, [%[a]]!\n"
+		  "	 vld1.32 {q6, q7}, [%[b]]!\n"
+		  "	 vld1.32 {q10, q11}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmul.f32 q0, q4, q8\n"
+		  "	 vmul.f32 q1, q5, q9\n"
+		  "	 vmul.f32 q2, q6, q10\n"
+		  "	 vmul.f32 q3, q7, q11\n"
+		  "	 beq 3f\n"
+		  "2:"
+		  "	 vld1.32 {q4, q5}, [%[b]]!\n"
+		  "	 vld1.32 {q8, q9}, [%[a]]!\n"
+		  "	 vld1.32 {q6, q7}, [%[b]]!\n"
+		  "	 vld1.32 {q10, q11}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmla.f32 q0, q4, q8\n"
+		  "	 vmla.f32 q1, q5, q9\n"
+		  "	 vmla.f32 q2, q6, q10\n"
+		  "	 vmla.f32 q3, q7, q11\n"
+		  "	 bne 2b\n"
+		  "3:"
+		  "	 vadd.f32 q4, q0, q1\n"
+		  "	 vadd.f32 q5, q2, q3\n"
+		  "	 cmp %[remainder], #0\n"
+		  "	 vadd.f32 q0, q4, q5\n"
+		  "	 beq 5f\n"
+		  "4:"
+		  "	 vld1.32 {q6}, [%[b]]!\n"
+		  "	 vld1.32 {q10}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmla.f32 q0, q6, q10\n"
+		  "	 bne 4b\n"
+		  "5:"
+		  "	 vadd.f32 d0, d0, d1\n"
+		  "	 vpadd.f32 d0, d0, d0\n"
+		  "	 vmov.f32 %[ret], d0[0]\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+l" (len), [remainder] "+l" (remainder)
+		  :
+		  : "cc", "q0", "q1", "q2", "q3",
+		    "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
+    return ret;
+}
+#endif  // defined(__aarch64__)
+#endif
--- a/third_party/speex/libspeex/resample_sse.h
+++ b/third_party/speex/libspeex/resample_sse.h
@ -9,18 +9,18 @@
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
-   
+
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-   
+
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-   
+
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
-   
+
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@ -71,7 +71,7 @@ static inline float interpolate_product_single(const float *a, const float *b, u
   return ret;
 }

-#ifdef _USE_SSE2
+#ifdef USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE

@ -91,7 +91,7 @@ static inline double inner_product_double(const float *a, const float *b, unsign
      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
   }
-   sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
   _mm_store_sd(&ret, sum);
   return ret;
 }
@ -120,7 +120,7 @@ static inline double interpolate_product_double(const float *a, const float *b,
  sum1 = _mm_mul_pd(f1, sum1);
  sum2 = _mm_mul_pd(f2, sum2);
  sum = _mm_add_pd(sum1, sum2);
-  sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum));
+  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
  _mm_store_sd(&ret, sum);
  return ret;
 }
--- a/third_party/speex/libspeex/scal.c
+++ b/third_party/speex/libspeex/scal.c
@ -52,6 +52,10 @@ The algorithm implemented here is described in:
 #include <math.h>
 #include <stdlib.h>

+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+
 #define ALLPASS_ORDER 20

 struct SpeexDecorrState_ {
@ -171,7 +175,6 @@ EXPORT void speex_decorrelate(SpeexDecorrState *st, const spx_int16_t *in, spx_i
         buff[i+st->frame_size] = in[i*st->channels+ch];

      x = buff+st->frame_size;
-      beta = 1.-.3*amount*amount;
      if (amount>1)
         beta = 1-sqrt(.4*amount);
      else
--- a/third_party/speex/libspeex/testresample2.c
+++ b/third_party/speex/libspeex/testresample2.c
@ -0,0 +1,93 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+
+   File: testresample2.c
+   Testing the resampling code
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "speex/speex_resampler.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+
+#define PERIOD 32
+#define INBLOCK 1024
+#define RATE 48000
+
+int main()
+{
+   spx_uint32_t i;
+   float *fin, *fout;
+   int rate = 1000, off = 0, avail = INBLOCK;
+   SpeexResamplerState *st = speex_resampler_init(1, RATE, RATE, 4, NULL);
+   speex_resampler_set_rate(st, RATE, rate);
+   speex_resampler_skip_zeros(st);
+
+   fin = malloc(INBLOCK*2*sizeof(float));
+   for (i=0; i<INBLOCK*2;i++)
+     fin[i] = sinf ((float)i/PERIOD * 2 * M_PI) * 0.9;
+
+   fout = malloc(INBLOCK*4*sizeof(float));
+
+   while (1)
+   {
+      spx_uint32_t in_len;
+      spx_uint32_t out_len;
+
+      in_len = avail;
+      out_len = (in_len * rate + RATE-1) / RATE;
+
+      fprintf (stderr, "%d %d %d %d -> ", rate, off, in_len, out_len);
+
+      speex_resampler_process_float(st, 0, fin + off, &in_len, fout, &out_len);
+
+      fprintf (stderr, "%d %d\n", in_len, out_len);
+      off += in_len;
+      avail = avail - in_len + INBLOCK;
+
+      if (off >= INBLOCK)
+        off -= INBLOCK;
+
+      fwrite(fout, sizeof(float), out_len, stdout);
+
+      rate += 100;
+      if (rate > 128000)
+        break;
+
+      speex_resampler_set_rate(st, RATE, rate);
+   }
+   speex_resampler_destroy(st);
+   free(fin);
+   free(fout);
+   return 0;
+}
+