Re #1954: Add WebRTC to third party component

* Add build config for GNU build systems git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@5428 74dad513-b988-da41-8d7b-12977e46ad98
2016-08-25 01:36:33 +00:00 · 2016-08-25 01:36:33 +00:00 · 3469abaa11
parent b86d83f88f
commit 3469abaa11
125 changed files with 37073 additions and 205 deletions
--- a/236
+++ b/236
@ -622,6 +622,10 @@ ac_subst_vars='LTLIBOBJS
 LIBOBJS
 ac_main_obj
 ac_host
+ac_webrtc_ldflags
+ac_webrtc_cflags
+ac_webrtc_instset
+ac_no_webrtc
 ac_no_yuv
 opus_present
 opus_h_present
@ -642,8 +646,6 @@ libssl_present
 openssl_h_present
 ac_ssl_has_aes_gcm
 ac_no_ssl
-ac_webrtc_ldflags
-ac_webrtc_cflags
 ac_openh264_ldflags
 ac_openh264_cflags
 ac_v4l2_ldflags
@ -682,6 +684,7 @@ ac_pa_cflags
 ac_external_pa
 ac_pjmedia_snd
 ac_pjmedia_resample
+ac_external_webrtc
 ac_external_yuv
 ac_srtp_shutdown_present
 ac_srtp_deinit_present
@ -781,6 +784,7 @@ with_external_speex
 with_external_gsm
 with_external_srtp
 with_external_yuv
+with_external_webrtc
 enable_resample
 enable_sound
 with_external_pa
@ -806,8 +810,6 @@ enable_ffmpeg
 enable_v4l2
 with_openh264
 enable_openh264
-with_webrtc
-enable_webrtc
 enable_ipp
 with_ipp
 with_ipp_samples
@ -823,6 +825,7 @@ enable_silk
 with_opus
 enable_opus
 enable_libyuv
+enable_libwebrtc
 '
      ac_precious_vars='build_alias
 host_alias
@ -1475,7 +1478,6 @@ Optional Features:
  --disable-ffmpeg        Disable ffmpeg (default: not disabled)
  --disable-v4l2          Disable Video4Linux2 (default: not disabled)
  --disable-openh264      Disable OpenH264 (default: not disabled)
-  --disable-webrtc        Exclude webrtc in the build
  --enable-ipp            Enable Intel IPP support. Specify the Intel IPP
                          package and samples location using IPPROOT and
                          IPPSAMPLES env var or with --with-ipp and
@ -1492,6 +1494,7 @@ Optional Features:
                          autodetect)

  --disable-libyuv        Exclude libyuv in the build
+  --disable-libwebrtc     Exclude libwebrtc in the build

 Optional Packages:
  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@ -1516,6 +1519,11 @@ Optional Packages:
                          make sure that libyuv is accessible to use (hint:
                          use CFLAGS and LDFLAGS env var to set the
                          include/lib paths)
+  --with-external-webrtc  Use external webrtc development files, not the one
+                          in "third_party" directory. When this option is set,
+                          make sure that webrtc is accessible to use (hint:
+                          use CFLAGS and LDFLAGS env var to set the
+                          include/lib paths)
  --with-external-pa      Use external PortAudio development files, not the
                          one in "third_party" directory. When this option is
                          set, make sure that PortAudio is accessible to use
@ -1524,7 +1532,6 @@ Optional Packages:
  --with-sdl=DIR          Specify alternate libSDL prefix
  --with-ffmpeg=DIR       Specify alternate FFMPEG prefix
  --with-openh264=DIR     Specify alternate OpenH264 prefix
-  --with-webrtc=DIR       Specify alternate WebRtc prefix
  --with-ipp=DIR          Specify the Intel IPP location
  --with-ipp-samples=DIR  Specify the Intel IPP samples location
  --with-ipp-arch=ARCH    Specify the Intel IPP ARCH suffix, e.g. "64" or
@ -6117,6 +6124,45 @@ fi



+ac_external_webrtc=0
+
+
+# Check whether --with-external-webrtc was given.
+if test "${with_external_webrtc+set}" = set; then :
+  withval=$with_external_webrtc;
+	if test "x$with_external_webrtc" != "xno"; then
+		# Test webrtc installation
+		{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if external webrtc devkit is installed" >&5
+$as_echo_n "checking if external webrtc devkit is installed... " >&6; }
+		cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <webrtc/modules/audio_processing/aec/aec_core.h>
+						     #include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
+
+int
+main ()
+{
+WebRtcAec_Create();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes!!" >&5
+$as_echo "yes!!" >&6; }
+				   ac_external_webrtc="1"
+
+else
+  as_fn_error $? "Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths" "$LINENO" 5
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+	fi
+
+
+fi
+
+
+
 ac_pjmedia_resample=libresample

 # Check whether --enable-resample was given.
@ -7401,123 +7447,6 @@ fi



-
-# Check whether --with-webrtc was given.
-if test "${with_webrtc+set}" = set; then :
-  withval=$with_webrtc;
-else
-  with_webrtc=no
-
-fi
-
-
-if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
-    enable_webrtc=no
-fi
-
-
-
-# Check whether --enable-webrtc was given.
-if test "${enable_webrtc+set}" = set; then :
-  enableval=$enable_webrtc; if test "$enable_webrtc" = "no"; then
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if webrtc is disabled...yes" >&5
-$as_echo "Checking if webrtc is disabled...yes" >&6; }
-	       fi
-else
-
-		  if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
-		        WEBRTC_PREFIX=$with_webrtc
-		  	WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
-
-			case $target in
-			    *-apple-darwin_ios*)
-				case $ARCH in
-				    *arm*)
-				    WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
-				    WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
-				    WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
-				    ;;
-				*)
-				    ;;
-				esac
-				;;
-			    *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
-			        WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
-			        WEBRTC_LIBS="-laudio_processing_sse2"
-				;;
-			    *)
-				;;
-			esac
-
-			{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Using webrtc prefix... $with_webrtc" >&5
-$as_echo "Using webrtc prefix... $with_webrtc" >&6; }
-		  else
-		  	WEBRTC_CFLAGS=""
-			WEBRTC_LDFLAGS=""
-		  fi
-
-		  WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
-
-		  SAVED_LIBS="$LIBS"
-		  SAVED_LDFLAGS="$LDFLAGS"
-		  SAVED_CFLAGS="$CFLAGS"
-
-		  LIBS="$WEBRTC_LIBS $LIBS"
-		  LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
-		  CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
-
-		  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for WebRtcAec_Process in -laudio_processing" >&5
-$as_echo_n "checking for WebRtcAec_Process in -laudio_processing... " >&6; }
-if ${ac_cv_lib_audio_processing_WebRtcAec_Process+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-laudio_processing
-			        $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char WebRtcAec_Process ();
-int
-main ()
-{
-return WebRtcAec_Process ();
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  ac_cv_lib_audio_processing_WebRtcAec_Process=yes
-else
-  ac_cv_lib_audio_processing_WebRtcAec_Process=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_audio_processing_WebRtcAec_Process" >&5
-$as_echo "$ac_cv_lib_audio_processing_WebRtcAec_Process" >&6; }
-if test "x$ac_cv_lib_audio_processing_WebRtcAec_Process" = xyes; then :
-   ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
-		  		 ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
-
-else
-   LIBS="$SAVED_LIBS"
-				 LDFLAGS="$SAVED_LDFLAGS"
-				 CFLAGS="$SAVED_CFLAGS"
-
-fi
-
-
-fi
-
-
 # Check whether --enable-ipp was given.
 if test "${enable_ipp+set}" = set; then :
  enableval=$enable_ipp;
@ -8473,6 +8402,67 @@ fi



+
+
+# Check whether --enable-libwebrtc was given.
+if test "${enable_libwebrtc+set}" = set; then :
+  enableval=$enable_libwebrtc; if test "$enable_libwebrtc" = "no"; then
+		ac_no_webrtc=1
+		$as_echo "#define PJMEDIA_HAS_LIBWEBRTC 0" >>confdefs.h
+
+		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...yes" >&5
+$as_echo "Checking if libwebrtc is disabled...yes" >&6; }
+	       fi
+else
+
+		  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...no" >&5
+$as_echo "Checking if libwebrtc is disabled...no" >&6; }
+		  case $target in
+		      *-apple-darwin_ios*)
+			case $target in
+			    *arm*)
+				ac_webrtc_instset=neon
+			    	;;
+			    *)
+				ac_webrtc_instset=sse2
+			    	;;
+			esac
+		        ;;
+		      *android*)
+			case $TARGET_ABI in
+			    armeabi-v7a)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
+			    	;;
+			    armeabi)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
+			    	;;
+			    arm64*)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
+			    	;;
+			    mips*)
+			    	ac_webrtc_instset=mips
+			        ;;
+			    *)
+				ac_webrtc_instset=sse2
+			    	;;
+			esac
+		        ;;
+		     *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
+			ac_webrtc_instset=sse2
+			;;
+		     *)
+			;;
+		esac
+
+fi
+
+
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if select() needs correct nfds" >&5
 $as_echo_n "checking if select() needs correct nfds... " >&6; }
 case $target in
--- a/aconfigure.ac
+++ b/aconfigure.ac
@ -598,6 +598,28 @@ AC_ARG_WITH(external-yuv,
    )


+dnl # Use external webrtc installation
+AC_SUBST(ac_external_webrtc,0)
+AC_ARG_WITH(external-webrtc,
+    AS_HELP_STRING([--with-external-webrtc],
+		   [Use external webrtc development files, not the one in "third_party" directory. When this option is set, make sure that webrtc is accessible to use (hint: use CFLAGS and LDFLAGS env var to set the include/lib paths)]),
+    [
+	if test "x$with_external_webrtc" != "xno"; then
+		# Test webrtc installation
+		AC_MSG_CHECKING([if external webrtc devkit is installed])
+		AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <webrtc/modules/audio_processing/aec/aec_core.h>
+						     #include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
+		]],
+						  [WebRtcAec_Create();])],
+				  [AC_MSG_RESULT(yes!!)
+				   ac_external_webrtc="1"
+				   ],
+				  [AC_MSG_ERROR([Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths])])
+	fi
+    ]
+    )
+
+
 dnl # Resample implementation
 AC_SUBST(ac_pjmedia_resample,libresample)
 AC_ARG_ENABLE(resample,
@ -1243,82 +1265,6 @@ AC_ARG_ENABLE(openh264,
 	      ])


-dnl # WebRtc alt prefix
-AC_ARG_WITH(webrtc,
-            AS_HELP_STRING([--with-webrtc=DIR],
-		           [Specify alternate WebRtc prefix]),
-            [],
-            [with_webrtc=no]
-            )
-
-dnl # Do not use default webrtc installation if we are cross-compiling
-if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
-    enable_webrtc=no
-fi	      
-	      
-dnl # WebRtc
-AC_SUBST(ac_webrtc_cflags)
-AC_SUBST(ac_webrtc_ldflags)
-AC_ARG_ENABLE(webrtc,
-	      AS_HELP_STRING([--disable-webrtc],
-			     [Exclude webrtc in the build]),
-	      [if test "$enable_webrtc" = "no"; then
-		AC_MSG_RESULT([Checking if webrtc is disabled...yes])
-	       fi],
-	      [
-		  if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
-		        WEBRTC_PREFIX=$with_webrtc
-		  	WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
-		  	
-			case $target in
-			    *-apple-darwin_ios*)
-				case $ARCH in
-				    *arm*)
-				    WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
-				    WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
-				    WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
-				    ;;
-				*)
-				    ;;
-				esac
-				;;			
-			    *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
-			        WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
-			        WEBRTC_LIBS="-laudio_processing_sse2" 
-				;;
-			    *)
-				;;
-			esac		  	
-		  	
-			AC_MSG_RESULT([Using webrtc prefix... $with_webrtc])
-		  else
-		  	WEBRTC_CFLAGS=""
-			WEBRTC_LDFLAGS="" 
-		  fi
-		  		  
-		  WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
-		  
-		  SAVED_LIBS="$LIBS"
-		  SAVED_LDFLAGS="$LDFLAGS"
-		  SAVED_CFLAGS="$CFLAGS"
-		  
-		  LIBS="$WEBRTC_LIBS $LIBS"
-		  LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
-		  CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
-		  
-		  AC_CHECK_LIB(audio_processing,
-			       WebRtcAec_Process,
-			       [ ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
-		  		 ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
-			       ],
-			       [ LIBS="$SAVED_LIBS"
-				 LDFLAGS="$SAVED_LDFLAGS"
-				 CFLAGS="$SAVED_CFLAGS"
-			       ],
-			       []
-			       )
-	      ])
-
 dnl ########################################################
 dnl # Intel IPP support
 dnl #
@ -1819,6 +1765,63 @@ AC_ARG_ENABLE(libyuv,
 	      AC_MSG_RESULT([Checking if libyuv is disabled...no]))


+dnl # Include webrtc
+AC_SUBST(ac_no_webrtc)
+AC_SUBST(ac_webrtc_instset)
+AC_SUBST(ac_webrtc_cflags)
+AC_SUBST(ac_webrtc_ldflags)
+AC_ARG_ENABLE(libwebrtc,
+	      AS_HELP_STRING([--disable-libwebrtc],
+			     [Exclude libwebrtc in the build]),
+	      [if test "$enable_libwebrtc" = "no"; then
+		[ac_no_webrtc=1]
+		AC_DEFINE(PJMEDIA_HAS_LIBWEBRTC,0)
+		AC_MSG_RESULT([Checking if libwebrtc is disabled...yes])
+	       fi],
+	      [
+		  AC_MSG_RESULT([Checking if libwebrtc is disabled...no])
+		  case $target in
+		      *-apple-darwin_ios*)
+			case $target in
+			    *arm*)
+				ac_webrtc_instset=neon
+			    	;;
+			    *)
+				ac_webrtc_instset=sse2
+			    	;;
+			esac
+		        ;;
+		      *android*)
+			case $TARGET_ABI in
+			    armeabi-v7a)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
+			    	;;
+			    armeabi)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
+			    	;;
+			    arm64*)
+				ac_webrtc_instset=neon
+				ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
+			    	;;
+			    mips*)
+			    	ac_webrtc_instset=mips
+			        ;;
+			    *)
+				ac_webrtc_instset=sse2
+			    	;;
+			esac
+		        ;;
+		     *mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
+			ac_webrtc_instset=sse2
+			;;
+		     *)
+			;;
+		esac	
+	      ])
+
+
 dnl ##########################################
 dnl #
 dnl # MANUAL CONFIG
--- a/build.mak.in
+++ b/build.mak.in
@ -135,6 +135,20 @@ endif
 endif
 endif

+ifneq (@ac_no_webrtc@,1)
+ifeq (@ac_external_webrtc@,1)
+APP_THIRD_PARTY_EXT += -lwebrtc
+else
+APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc-$(LIB_SUFFIX)
+ifeq ($(PJ_SHARED_LIBRARIES),)
+APP_THIRD_PARTY_LIBS += -lwebrtc-$(TARGET_NAME)
+else
+APP_THIRD_PARTY_LIBS += -lwebrtc
+APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX).$(PJ_VERSION_MAJOR) $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX)
+endif
+endif
+endif
+

 # Additional flags
@ac_build_mak_vars@
--- a/1
+++ b/1
@ -149,6 +149,7 @@ else
    exit 1
  fi

+  export TARGET_ABI="${TARGET_ABI}"
  export CC="${ANDROID_TC}/bin/${TARGET_HOST}-gcc"
  export CXX="${ANDROID_TC}/bin/${TARGET_HOST}-g++"
  export AR="${ANDROID_TC}/bin/${TARGET_HOST}-ar"
--- a/pjmedia/build/os-auto.mak.in
+++ b/pjmedia/build/os-auto.mak.in
@ -32,18 +32,14 @@ ANDROID_CFLAGS = @ac_android_cflags@
 OPENH264_CFLAGS = @ac_openh264_cflags@
 OPENH264_LDFLAGS = @ac_openh264_ldflags@

-# WebRtc
-WEBRTC_CFLAGS = @ac_webrtc_cflags@
-WEBRTC_LDFLAGS = @ac_webrtc_ldflags@
-

 # PJMEDIA features exclusion
 export CFLAGS += @ac_no_small_filter@ @ac_no_large_filter@ @ac_no_speex_aec@ \
 		 $(SDL_CFLAGS) $(FFMPEG_CFLAGS) $(V4L2_CFLAGS) $(QT_CFLAGS) \
 		 $(DARWIN_CFLAGS) $(ANDROID_CFLAGS) \
-		 $(OPENH264_CFLAGS) $(WEBRTC_CFLAGS)
+		 $(OPENH264_CFLAGS)
 export LDFLAGS += $(SDL_LDFLAGS) $(FFMPEG_LDFLAGS) $(V4L2_LDFLAGS) \
-		  $(OPENH264_LDFLAGS) $(WEBRTC_LDFLAGS)
+		  $(OPENH264_LDFLAGS)

 # Define the desired sound device backend
 # Valid values are:
@ -203,6 +199,22 @@ export CFLAGS += -I$(THIRD_PARTY)/yuv/include
 endif
 endif

+#
+# libwebrtc
+#
+ifeq (@ac_no_webrtc@,1)
+export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=0
+else
+export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=1
+ifneq ($(findstring arm,$(@ac_webrtc_instset@)),)
+export CFLAGS += -DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1
+endif
+
+ifeq (@ac_external_webrtc@,0)
+export CFLAGS += -I$(THIRD_PARTY)/webrtc/src
+endif
+endif
+

 #
 # MacOSX specific
--- a/third_party/build/os-auto.mak.in
+++ b/third_party/build/os-auto.mak.in
@ -64,3 +64,43 @@ else
 DIRS += yuv
 endif
 endif
+
+ifneq (@ac_no_webrtc@,1)
+ifeq (@ac_external_webrtc@,1)
+# External webrtc
+else
+DIRS += webrtc
+WEBRTC_OTHER_CFLAGS = -fexceptions -DWEBRTC_POSIX=1 @ac_webrtc_cflags@
+ifneq ($(findstring sse2,@ac_webrtc_instset@),)
+    WEBRTC_SRC = \
+    	      modules/audio_processing/aec/aec_core_sse2.o		 \
+	      modules/audio_processing/aec/aec_rdft_sse2.o	         \
+	      modules/audio_processing/aecm/aecm_core_c.o	         \
+	      modules/audio_processing/ns/nsx_core_c.o	                 \
+	      system_wrappers/source/cpu_features.o
+else ifneq ($(findstring neon,@ac_webrtc_instset@),)
+    WEBRTC_SRC = \
+       	      modules/audio_processing/aec/aec_core_neon.o               \
+	      modules/audio_processing/aec/aec_rdft_neon.o               \
+	      modules/audio_processing/aecm/aecm_core_c.o                \
+	      modules/audio_processing/aecm/aecm_core_neon.o             \
+	      modules/audio_processing/ns/nsx_core_c.o                   \
+	      modules/audio_processing/ns/nsx_core_neon.o                \
+	      common_audio/signal_processing/cross_correlation_neon.o    \
+	      common_audio/signal_processing/downsample_fast_neon.o      \
+	      common_audio/signal_processing/min_max_operations_neon.o
+    WEBRTC_OTHER_CFLAGS += -DWEBRTC_HAS_NEON
+else ifneq ($(findstring mips,@ac_webrtc_instset@),)
+    WEBRTC_SRC = \
+              modules/audio_processing/aec/aec_core_mips.o               \
+	      modules/audio_processing/aec/aec_rdft_mips.o               \
+	      modules/audio_processing/aecm/aecm_core_mips.o             \
+	      modules/audio_processing/ns/nsx_core_mips.o                \
+	      common_audio/signal_processing/cross_correlation_mips.o    \
+	      common_audio/signal_processing/downsample_fast_mips.o      \
+	      common_audio/signal_processing/min_max_operations_mips.o
+
+    WEBRTC_OTHER_CFLAGS += -DMIPS_FPU_LE
+endif
+endif
+endif
--- a/third_party/build/webrtc/Makefile
+++ b/third_party/build/webrtc/Makefile
@ -0,0 +1,100 @@
+include ../../../build.mak
+include ../../../build/common.mak
+include ../os-$(OS_NAME).mak
+
+export LIBDIR := ../../lib
+
+RULES_MAK := $(PJDIR)/build/rules.mak
+
+export WEBRTC_LIB := libwebrtc-$(TARGET_NAME)$(LIBEXT)
+
+ifeq ($(PJ_SHARED_LIBRARIES),)
+else
+export WEBRTC_SONAME := libwebrtc.$(SHLIB_SUFFIX)
+export WEBRTC_SHLIB := $(WEBRTC_SONAME).$(PJ_VERSION_MAJOR)
+endif
+
+###############################################################################
+# Gather all flags.
+#
+export _CFLAGS 	:= $(CC_CFLAGS) $(OS_CFLAGS) $(HOST_CFLAGS) $(M_CFLAGS) \
+		   $(CFLAGS) $(CC_INC). $(CC_INC)../../webrtc/src       \
+		   $(CC_INC)../../../pjlib/include
+export _CXXFLAGS:= $(_CFLAGS) $(CC_CXXFLAGS) $(OS_CXXFLAGS) $(M_CXXFLAGS) \
+		   $(HOST_CXXFLAGS) $(CXXFLAGS)
+export _LDFLAGS := $(CC_LDFLAGS) $(OS_LDFLAGS) $(M_LDFLAGS) $(HOST_LDFLAGS) \
+		   $(LDFLAGS) 
+
+export WEBRTC_SRCDIR = ../../webrtc/src/webrtc/
+export WEBRTC_OBJS = \
+	modules/audio_processing/aec/aec_core.o                    \
+	modules/audio_processing/aec/aec_rdft.o                    \
+	modules/audio_processing/aec/aec_resampler.o               \
+	modules/audio_processing/aec/echo_cancellation.o           \
+	modules/audio_processing/aecm/aecm_core.o                  \
+	modules/audio_processing/aecm/echo_control_mobile.o        \
+	modules/audio_processing/ns/noise_suppression.o            \
+	modules/audio_processing/ns/noise_suppression_x.o          \
+	modules/audio_processing/ns/ns_core.o                      \
+	modules/audio_processing/ns/nsx_core.o                     \
+	modules/audio_processing/utility/delay_estimator_wrapper.o \
+	modules/audio_processing/utility/delay_estimator.o         \
+	common_audio/fft4g.o                                       \
+	common_audio/ring_buffer.o                                 \
+	common_audio/signal_processing/complex_bit_reverse.o       \
+	common_audio/signal_processing/complex_fft.o               \
+	common_audio/signal_processing/copy_set_operations.o       \
+	common_audio/signal_processing/cross_correlation.o         \
+	common_audio/signal_processing/division_operations.o       \
+	common_audio/signal_processing/downsample_fast.o           \
+	common_audio/signal_processing/energy.o                    \
+	common_audio/signal_processing/get_scaling_square.o        \
+	common_audio/signal_processing/min_max_operations.o        \
+	common_audio/signal_processing/randomization_functions.o   \
+	common_audio/signal_processing/real_fft.o                  \
+	common_audio/signal_processing/spl_init.o                  \
+	common_audio/signal_processing/spl_sqrt.o                  \
+	common_audio/signal_processing/spl_sqrt_floor.o            \
+	common_audio/signal_processing/vector_scaling_operations.o \
+	$(WEBRTC_SRC)
+
+
+export WEBRTC_CFLAGS = $(_CFLAGS) $(WEBRTC_OTHER_CFLAGS)
+export WEBRTC_CXXFLAGS = $(WEBRTC_CFLAGS)
+
+
+export CC_OUT CC AR RANLIB HOST_MV HOST_RM HOST_RMDIR HOST_MKDIR OBJEXT LD LDOUT 
+###############################################################################
+# Main entry
+#
+# $(TARGET) is defined in os-$(OS_NAME).mak file in current directory.
+#
+TARGETS := $(WEBRTC_LIB) $(WEBRTC_SONAME)
+
+all: $(TARGETS)
+
+doc:
+	cd .. && doxygen docs/doxygen.cfg
+
+dep: depend
+distclean: realclean
+
+.PHONY: all dep depend clean realclean distclean
+.PHONY: $(TARGETS)
+.PHONY: $(WEBRTC_LIB) $(WEBRTC_SONAME)
+
+libwebrtc: $(WEBRTC_LIB)
+$(WEBRTC_SONAME): $(WEBRTC_LIB)
+$(WEBRTC_LIB) $(WEBRTC_SONAME):
+	$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $(subst /,$(HOST_PSEP),$(LIBDIR)/$@)
+
+clean print_lib:
+	$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
+
+realclean:
+	$(subst @@,$(subst /,$(HOST_PSEP),.webrtc-$(TARGET_NAME).depend),$(HOST_RMR))
+	
+	$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
+
+depend:
+	$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
--- a/third_party/build/webrtc/notes.txt
+++ b/third_party/build/webrtc/notes.txt
@ -0,0 +1,2 @@
+Notes:
+* Source code for webrtc from https://chromium.googlesource.com/external/webrtc circa Oct 2015.
--- a/third_party/webrtc/src/webrtc/common_audio/fft4g.c
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.c
--- a/third_party/webrtc/src/webrtc/common_audio/fft4g.h
+++ b/third_party/webrtc/src/webrtc/common_audio/fft4g.h
@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_
+#define WEBRTC_COMMON_AUDIO_FFT4G_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// Refer to fft4g.c for documentation.
+void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_FFT4G_H_
--- a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.c
@ -0,0 +1,247 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#include "webrtc/common_audio/ring_buffer.h"
+
+#include <stddef.h>  // size_t
+#include <stdlib.h>
+#include <string.h>
+
+enum Wrap {
+  SAME_WRAP,
+  DIFF_WRAP
+};
+
+struct RingBuffer {
+  size_t read_pos;
+  size_t write_pos;
+  size_t element_count;
+  size_t element_size;
+  enum Wrap rw_wrap;
+  char* data;
+};
+
+// Get address of region(s) from which we can read data.
+// If the region is contiguous, |data_ptr_bytes_2| will be zero.
+// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second
+// region. Returns room available to be read or |element_count|, whichever is
+// smaller.
+static size_t GetBufferReadRegions(RingBuffer* buf,
+                                   size_t element_count,
+                                   void** data_ptr_1,
+                                   size_t* data_ptr_bytes_1,
+                                   void** data_ptr_2,
+                                   size_t* data_ptr_bytes_2) {
+
+  const size_t readable_elements = WebRtc_available_read(buf);
+  const size_t read_elements = (readable_elements < element_count ?
+      readable_elements : element_count);
+  const size_t margin = buf->element_count - buf->read_pos;
+
+  // Check to see if read is not contiguous.
+  if (read_elements > margin) {
+    // Write data in two blocks that wrap the buffer.
+    *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+    *data_ptr_bytes_1 = margin * buf->element_size;
+    *data_ptr_2 = buf->data;
+    *data_ptr_bytes_2 = (read_elements - margin) * buf->element_size;
+  } else {
+    *data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
+    *data_ptr_bytes_1 = read_elements * buf->element_size;
+    *data_ptr_2 = NULL;
+    *data_ptr_bytes_2 = 0;
+  }
+
+  return read_elements;
+}
+
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) {
+  RingBuffer* self = NULL;
+  if (element_count == 0 || element_size == 0) {
+    return NULL;
+  }
+
+  self = malloc(sizeof(RingBuffer));
+  if (!self) {
+    return NULL;
+  }
+
+  self->data = malloc(element_count * element_size);
+  if (!self->data) {
+    free(self);
+    self = NULL;
+    return NULL;
+  }
+
+  self->element_count = element_count;
+  self->element_size = element_size;
+  WebRtc_InitBuffer(self);
+
+  return self;
+}
+
+void WebRtc_InitBuffer(RingBuffer* self) {
+  self->read_pos = 0;
+  self->write_pos = 0;
+  self->rw_wrap = SAME_WRAP;
+
+  // Initialize buffer to zeros
+  memset(self->data, 0, self->element_count * self->element_size);
+}
+
+void WebRtc_FreeBuffer(void* handle) {
+  RingBuffer* self = (RingBuffer*)handle;
+  if (!self) {
+    return;
+  }
+
+  free(self->data);
+  free(self);
+}
+
+size_t WebRtc_ReadBuffer(RingBuffer* self,
+                         void** data_ptr,
+                         void* data,
+                         size_t element_count) {
+
+  if (self == NULL) {
+    return 0;
+  }
+  if (data == NULL) {
+    return 0;
+  }
+
+  {
+    void* buf_ptr_1 = NULL;
+    void* buf_ptr_2 = NULL;
+    size_t buf_ptr_bytes_1 = 0;
+    size_t buf_ptr_bytes_2 = 0;
+    const size_t read_count = GetBufferReadRegions(self,
+                                                   element_count,
+                                                   &buf_ptr_1,
+                                                   &buf_ptr_bytes_1,
+                                                   &buf_ptr_2,
+                                                   &buf_ptr_bytes_2);
+
+    if (buf_ptr_bytes_2 > 0) {
+      // We have a wrap around when reading the buffer. Copy the buffer data to
+      // |data| and point to it.
+      memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+      memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2);
+      buf_ptr_1 = data;
+    } else if (!data_ptr) {
+      // No wrap, but a memcpy was requested.
+      memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
+    }
+    if (data_ptr) {
+      // |buf_ptr_1| == |data| in the case of a wrap.
+      *data_ptr = buf_ptr_1;
+    }
+
+    // Update read position
+    WebRtc_MoveReadPtr(self, (int) read_count);
+
+    return read_count;
+  }
+}
+
+size_t WebRtc_WriteBuffer(RingBuffer* self,
+                          const void* data,
+                          size_t element_count) {
+  if (!self) {
+    return 0;
+  }
+  if (!data) {
+    return 0;
+  }
+
+  {
+    const size_t free_elements = WebRtc_available_write(self);
+    const size_t write_elements = (free_elements < element_count ? free_elements
+        : element_count);
+    size_t n = write_elements;
+    const size_t margin = self->element_count - self->write_pos;
+
+    if (write_elements > margin) {
+      // Buffer wrap around when writing.
+      memcpy(self->data + self->write_pos * self->element_size,
+             data, margin * self->element_size);
+      self->write_pos = 0;
+      n -= margin;
+      self->rw_wrap = DIFF_WRAP;
+    }
+    memcpy(self->data + self->write_pos * self->element_size,
+           ((const char*) data) + ((write_elements - n) * self->element_size),
+           n * self->element_size);
+    self->write_pos += n;
+
+    return write_elements;
+  }
+}
+
+int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) {
+  if (!self) {
+    return 0;
+  }
+
+  {
+    // We need to be able to take care of negative changes, hence use "int"
+    // instead of "size_t".
+    const int free_elements = (int) WebRtc_available_write(self);
+    const int readable_elements = (int) WebRtc_available_read(self);
+    int read_pos = (int) self->read_pos;
+
+    if (element_count > readable_elements) {
+      element_count = readable_elements;
+    }
+    if (element_count < -free_elements) {
+      element_count = -free_elements;
+    }
+
+    read_pos += element_count;
+    if (read_pos > (int) self->element_count) {
+      // Buffer wrap around. Restart read position and wrap indicator.
+      read_pos -= (int) self->element_count;
+      self->rw_wrap = SAME_WRAP;
+    }
+    if (read_pos < 0) {
+      // Buffer wrap around. Restart read position and wrap indicator.
+      read_pos += (int) self->element_count;
+      self->rw_wrap = DIFF_WRAP;
+    }
+
+    self->read_pos = (size_t) read_pos;
+
+    return element_count;
+  }
+}
+
+size_t WebRtc_available_read(const RingBuffer* self) {
+  if (!self) {
+    return 0;
+  }
+
+  if (self->rw_wrap == SAME_WRAP) {
+    return self->write_pos - self->read_pos;
+  } else {
+    return self->element_count - self->read_pos + self->write_pos;
+  }
+}
+
+size_t WebRtc_available_write(const RingBuffer* self) {
+  if (!self) {
+    return 0;
+  }
+
+  return self->element_count - WebRtc_available_read(self);
+}
--- a/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
+++ b/third_party/webrtc/src/webrtc/common_audio/ring_buffer.h
@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
+// otherwise specified, functions return 0 on success and -1 on error.
+
+#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>  // size_t
+
+typedef struct RingBuffer RingBuffer;
+
+// Creates and initializes the buffer. Returns NULL on failure.
+RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size);
+void WebRtc_InitBuffer(RingBuffer* handle);
+void WebRtc_FreeBuffer(void* handle);
+
+// Reads data from the buffer. The |data_ptr| will point to the address where
+// it is located. If all |element_count| data are feasible to read without
+// buffer wrap around |data_ptr| will point to the location in the buffer.
+// Otherwise, the data will be copied to |data| (memory allocation done by the
+// user) and |data_ptr| points to the address of |data|. |data_ptr| is only
+// guaranteed to be valid until the next call to WebRtc_WriteBuffer().
+//
+// To force a copying to |data|, pass a NULL |data_ptr|.
+//
+// Returns number of elements read.
+size_t WebRtc_ReadBuffer(RingBuffer* handle,
+                         void** data_ptr,
+                         void* data,
+                         size_t element_count);
+
+// Writes |data| to buffer and returns the number of elements written.
+size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data,
+                          size_t element_count);
+
+// Moves the buffer read position and returns the number of elements moved.
+// Positive |element_count| moves the read position towards the write position,
+// that is, flushing the buffer. Negative |element_count| moves the read
+// position away from the the write position, that is, stuffing the buffer.
+// Returns number of elements moved.
+int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count);
+
+// Returns number of available elements to read.
+size_t WebRtc_available_read(const RingBuffer* handle);
+
+// Returns number of available elements for write.
+size_t WebRtc_available_write(const RingBuffer* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
@ -0,0 +1,103 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_AutoCorrToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
+{
+    int i, n;
+    int16_t tmp;
+    const int32_t *rptr;
+    int32_t L_num, L_den;
+    int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
+            P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
+
+    // Initialize loop and pointers.
+    acfptr = ACF;
+    rptr = R;
+    pptr = P;
+    p1ptr = &P[1];
+    w1ptr = &W[1];
+    wptr = w1ptr;
+
+    // First loop; n=0. Determine shifting.
+    tmp = WebRtcSpl_NormW32(*R);
+    *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+    *pptr++ = *acfptr++;
+
+    // Initialize ACF, P and W.
+    for (i = 1; i <= use_order; i++)
+    {
+        *acfptr = (int16_t)((*rptr++ << tmp) >> 16);
+        *wptr++ = *acfptr;
+        *pptr++ = *acfptr++;
+    }
+
+    // Compute reflection coefficients.
+    for (n = 1; n <= use_order; n++, K++)
+    {
+        tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
+        if (*P < tmp)
+        {
+            for (i = n; i <= use_order; i++)
+                *K++ = 0;
+
+            return;
+        }
+
+        // Division: WebRtcSpl_div(tmp, *P)
+        *K = 0;
+        if (tmp != 0)
+        {
+            L_num = tmp;
+            L_den = *P;
+            i = 15;
+            while (i--)
+            {
+                (*K) <<= 1;
+                L_num <<= 1;
+                if (L_num >= L_den)
+                {
+                    L_num -= L_den;
+                    (*K)++;
+                }
+            }
+            if (*p1ptr > 0)
+                *K = -*K;
+        }
+
+        // Last iteration; don't do Schur recursion.
+        if (n == use_order)
+            return;
+
+        // Schur recursion.
+        pptr = P;
+        wptr = w1ptr;
+        tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
+        *pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
+        pptr++;
+        for (i = 1; i <= use_order - n; i++)
+        {
+            tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
+            *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
+            pptr++;
+            tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
+            *wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
+            wptr++;
+        }
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
+                                 size_t in_vector_length,
+                                 size_t order,
+                                 int32_t* result,
+                                 int* scale) {
+  int32_t sum = 0;
+  size_t i = 0, j = 0;
+  int16_t smax = 0;
+  int scaling = 0;
+
+  assert(order <= in_vector_length);
+
+  // Find the maximum absolute value of the samples.
+  smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
+
+  // In order to avoid overflow when computing the sum we should scale the
+  // samples so that (in_vector_length * smax * smax) will not overflow.
+  if (smax == 0) {
+    scaling = 0;
+  } else {
+    // Number of bits in the sum loop.
+    int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
+    // Number of bits to normalize smax.
+    int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+    if (t > nbits) {
+      scaling = 0;
+    } else {
+      scaling = nbits - t;
+    }
+  }
+
+  // Perform the actual correlation calculation.
+  for (i = 0; i < order + 1; i++) {
+    sum = 0;
+    /* Unroll the loop to improve performance. */
+    for (j = 0; i + j + 3 < in_vector_length; j += 4) {
+      sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
+      sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
+      sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
+      sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
+    }
+    for (; j < in_vector_length - i; j++) {
+      sum += (in_vector[j] * in_vector[i + j]) >> scaling;
+    }
+    *result++ = sum;
+  }
+
+  *scale = scaling;
+  return order + 1;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* Tables for data buffer indexes that are bit reversed and thus need to be
+ * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
+ * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
+ * operation. Same for index_8.
+ */
+
+/* Indexes for the case of stages == 7. */
+static const int16_t index_7[112] = {
+  1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
+  12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
+  23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
+  37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
+  51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
+  81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
+  103, 115, 111, 123
+};
+
+/* Indexes for the case of stages == 8. */
+static const int16_t index_8[240] = {
+  1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
+  11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
+  40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
+  30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
+  148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
+  51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
+  124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
+  75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
+  234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
+  166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
+  115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
+  193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
+  149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
+  213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
+  203, 211, 207, 243, 215, 235, 223, 251, 239, 247
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
+  /* For any specific value of stages, we know exactly the indexes that are
+   * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
+   * stages are 7 and 8, so we use tables to save unnecessary iterations and
+   * calculations for these two cases.
+   */
+  if (stages == 7 || stages == 8) {
+    int m = 0;
+    int length = 112;
+    const int16_t* index = index_7;
+
+    if (stages == 8) {
+      length = 240;
+      index = index_8;
+    }
+
+    /* Decimation in time. Swap the elements with bit-reversed indexes. */
+    for (m = 0; m < length; m += 2) {
+      /* We declare a int32_t* type pointer, to load both the 16-bit real
+       * and imaginary elements from complex_data in one instruction, reducing
+       * complexity.
+       */
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      temp = complex_data_ptr[index[m]];  /* Real and imaginary */
+      complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
+      complex_data_ptr[index[m + 1]] = temp;
+    }
+  }
+  else {
+    int m = 0, mr = 0, l = 0;
+    int n = 1 << stages;
+    int nn = n - 1;
+
+    /* Decimation in time - re-order data */
+    for (m = 1; m <= nn; ++m) {
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      /* Find out indexes that are bit-reversed. */
+      l = n;
+      do {
+        l >>= 1;
+      } while (l > nn - mr);
+      mr = (mr & (l - 1)) + l;
+
+      if (mr <= m) {
+        continue;
+      }
+
+      /* Swap the elements with bit-reversed indexes.
+       * This is similar to the loop in the stages == 7 or 8 cases.
+       */
+      temp = complex_data_ptr[m];  /* Real and imaginary */
+      complex_data_ptr[m] = complex_data_ptr[mr];
+      complex_data_ptr[mr] = temp;
+    }
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
@ -0,0 +1,119 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
+@ for ARMv5 platforms.
+@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
+.align  2
+DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
+  push {r4-r7}
+
+  cmp r1, #7
+  adr r3, index_7                 @ Table pointer.
+  mov r4, #112                    @ Number of interations.
+  beq PRE_LOOP_STAGES_7_OR_8
+
+  cmp r1, #8
+  adr r3, index_8                 @ Table pointer.
+  mov r4, #240                    @ Number of interations.
+  beq PRE_LOOP_STAGES_7_OR_8
+
+  mov r3, #1                      @ Initialize m.
+  mov r1, r3, asl r1              @ n = 1 << stages;
+  subs r6, r1, #1                 @ nn = n - 1;
+  ble END
+
+  mov r5, r0                      @ &complex_data
+  mov r4, #0                      @ ml
+
+LOOP_GENERIC:
+  rsb r12, r4, r6                 @ l > nn - mr
+  mov r2, r1                      @ n
+
+LOOP_SHIFT:
+  asr r2, #1                      @ l >>= 1;
+  cmp r2, r12
+  bgt LOOP_SHIFT
+
+  sub r12, r2, #1
+  and r4, r12, r4
+  add r4, r2                      @ mr = (mr & (l - 1)) + l;
+  cmp r4, r3                      @ mr <= m ?
+  ble UPDATE_REGISTERS
+
+  mov r12, r4, asl #2
+  ldr r7, [r5, #4]                @ complex_data[2 * m, 2 * m + 1].
+                                  @   Offset 4 due to m incrementing from 1.
+  ldr r2, [r0, r12]               @ complex_data[2 * mr, 2 * mr + 1].
+  str r7, [r0, r12]
+  str r2, [r5, #4]
+
+UPDATE_REGISTERS:
+  add r3, r3, #1
+  add r5, #4
+  cmp r3, r1
+  bne LOOP_GENERIC
+
+  b END
+
+PRE_LOOP_STAGES_7_OR_8:
+  add r4, r3, r4, asl #1
+
+LOOP_STAGES_7_OR_8:
+  ldrsh r2, [r3], #2              @ index[m]
+  ldrsh r5, [r3], #2              @ index[m + 1]
+  ldr r1, [r0, r2]                @ complex_data[index[m], index[m] + 1]
+  ldr r12, [r0, r5]               @ complex_data[index[m + 1], index[m + 1] + 1]
+  cmp r3, r4
+  str r1, [r0, r5]
+  str r12, [r0, r2]
+  bne LOOP_STAGES_7_OR_8
+
+END:
+  pop {r4-r7}
+  bx lr
+
+@ The index tables. Note the values are doubles of the actual indexes for 16-bit
+@ elements, different from the generic C code. It actually provides byte offsets
+@ for the indexes.
+
+.align  2
+index_7:  @ Indexes for stages == 7.
+  .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
+  .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
+  .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
+  .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
+  .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
+  .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
+  .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
+  .short 468, 364, 436, 380, 500, 412, 460, 444, 492
+
+index_8:  @ Indexes for stages == 8.
+  .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
+  .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
+  .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
+  .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
+  .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
+  .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
+  .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
+  .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
+  .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
+  .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
+  .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
+  .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
+  .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
+  .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
+  .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
+  .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
+  .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
@ -0,0 +1,176 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static int16_t coefTable_7[] = {
+    4, 256,   8, 128,  12, 384,  16,  64,
+   20, 320,  24, 192,  28, 448,  36, 288,
+   40, 160,  44, 416,  48,  96,  52, 352,
+   56, 224,  60, 480,  68, 272,  72, 144,
+   76, 400,  84, 336,  88, 208,  92, 464,
+  100, 304, 104, 176, 108, 432, 116, 368,
+  120, 240, 124, 496, 132, 264, 140, 392,
+  148, 328, 152, 200, 156, 456, 164, 296,
+  172, 424, 180, 360, 184, 232, 188, 488,
+  196, 280, 204, 408, 212, 344, 220, 472,
+  228, 312, 236, 440, 244, 376, 252, 504,
+  268, 388, 276, 324, 284, 452, 300, 420,
+  308, 356, 316, 484, 332, 404, 348, 468,
+  364, 436, 380, 500, 412, 460, 444, 492
+};
+
+static int16_t coefTable_8[] = {
+    4,  512,    8,  256,   12,  768,   16,  128,
+   20,  640,   24,  384,   28,  896,   32,   64,
+   36,  576,   40,  320,   44,  832,   48,  192,
+   52,  704,   56,  448,   60,  960,   68,  544,
+   72,  288,   76,  800,   80,  160,   84,  672,
+   88,  416,   92,  928,  100,  608,  104,  352,
+  108,  864,  112,  224,  116,  736,  120,  480,
+  124,  992,  132,  528,  136,  272,  140,  784,
+  148,  656,  152,  400,  156,  912,  164,  592,
+  168,  336,  172,  848,  176,  208,  180,  720,
+  184,  464,  188,  976,  196,  560,  200,  304,
+  204,  816,  212,  688,  216,  432,  220,  944,
+  228,  624,  232,  368,  236,  880,  244,  752,
+  248,  496,  252, 1008,  260,  520,  268,  776,
+  276,  648,  280,  392,  284,  904,  292,  584,
+  296,  328,  300,  840,  308,  712,  312,  456,
+  316,  968,  324,  552,  332,  808,  340,  680,
+  344,  424,  348,  936,  356,  616,  364,  872,
+  372,  744,  376,  488,  380, 1000,  388,  536,
+  396,  792,  404,  664,  412,  920,  420,  600,
+  428,  856,  436,  728,  440,  472,  444,  984,
+  452,  568,  460,  824,  468,  696,  476,  952,
+  484,  632,  492,  888,  500,  760,  508, 1016,
+  524,  772,  532,  644,  540,  900,  548,  580,
+  556,  836,  564,  708,  572,  964,  588,  804,
+  596,  676,  604,  932,  620,  868,  628,  740,
+  636,  996,  652,  788,  668,  916,  684,  852,
+  692,  724,  700,  980,  716,  820,  732,  948,
+  748,  884,  764, 1012,  796,  908,  812,  844,
+  828,  972,  860,  940,  892, 1004,  956,  988
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
+  int l;
+  int16_t tr, ti;
+  int32_t tmp1, tmp2, tmp3, tmp4;
+  int32_t* ptr_i;
+  int32_t* ptr_j;
+
+  if (stages == 8) {
+    int16_t* pcoeftable_8 = coefTable_8;
+
+    __asm __volatile (
+      ".set         push                                             \n\t"
+      ".set         noreorder                                        \n\t"
+      "addiu        %[l],            $zero,               120        \n\t"
+     "1:                                                             \n\t"
+      "addiu        %[l],            %[l],                -4         \n\t"
+      "lh           %[tr],           0(%[pcoeftable_8])              \n\t"
+      "lh           %[ti],           2(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp3],         4(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp4],         6(%[pcoeftable_8])              \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tr]      \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[ti]      \n\t"
+      "addu         %[tr],           %[frfi],             %[tmp3]    \n\t"
+      "addu         %[ti],           %[frfi],             %[tmp4]    \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "lh           %[tmp1],         8(%[pcoeftable_8])              \n\t"
+      "lh           %[tmp2],         10(%[pcoeftable_8])             \n\t"
+      "lh           %[tr],           12(%[pcoeftable_8])             \n\t"
+      "lh           %[ti],           14(%[pcoeftable_8])             \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tmp1]    \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[tmp2]    \n\t"
+      "addu         %[tr],           %[frfi],             %[tr]      \n\t"
+      "addu         %[ti],           %[frfi],             %[ti]      \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "bgtz         %[l],            1b                              \n\t"
+      " addiu       %[pcoeftable_8], %[pcoeftable_8],     16         \n\t"
+      ".set         pop                                              \n\t"
+
+      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+        [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
+        [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
+        [ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
+      : [frfi] "r" (frfi)
+      : "memory"
+    );
+  } else if (stages == 7) {
+    int16_t* pcoeftable_7 = coefTable_7;
+
+    __asm __volatile (
+      ".set push                                                     \n\t"
+      ".set noreorder                                                \n\t"
+      "addiu        %[l],            $zero,               56         \n\t"
+     "1:                                                             \n\t"
+      "addiu        %[l],            %[l],                -4         \n\t"
+      "lh           %[tr],           0(%[pcoeftable_7])              \n\t"
+      "lh           %[ti],           2(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp3],         4(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp4],         6(%[pcoeftable_7])              \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tr]      \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[ti]      \n\t"
+      "addu         %[tr],           %[frfi],             %[tmp3]    \n\t"
+      "addu         %[ti],           %[frfi],             %[tmp4]    \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "lh           %[tmp1],         8(%[pcoeftable_7])              \n\t"
+      "lh           %[tmp2],         10(%[pcoeftable_7])             \n\t"
+      "lh           %[tr],           12(%[pcoeftable_7])             \n\t"
+      "lh           %[ti],           14(%[pcoeftable_7])             \n\t"
+      "addu         %[ptr_i],        %[frfi],             %[tmp1]    \n\t"
+      "addu         %[ptr_j],        %[frfi],             %[tmp2]    \n\t"
+      "addu         %[tr],           %[frfi],             %[tr]      \n\t"
+      "addu         %[ti],           %[frfi],             %[ti]      \n\t"
+      "ulw          %[tmp1],         0(%[ptr_i])                     \n\t"
+      "ulw          %[tmp2],         0(%[ptr_j])                     \n\t"
+      "ulw          %[tmp3],         0(%[tr])                        \n\t"
+      "ulw          %[tmp4],         0(%[ti])                        \n\t"
+      "usw          %[tmp1],         0(%[ptr_j])                     \n\t"
+      "usw          %[tmp2],         0(%[ptr_i])                     \n\t"
+      "usw          %[tmp4],         0(%[tr])                        \n\t"
+      "usw          %[tmp3],         0(%[ti])                        \n\t"
+      "bgtz         %[l],            1b                              \n\t"
+      " addiu       %[pcoeftable_7], %[pcoeftable_7],     16         \n\t"
+      ".set pop                                                      \n\t"
+
+      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
+        [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
+        [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
+        [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+      : [frfi] "r" (frfi)
+      : "memory"
+    );
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft.c
@ -0,0 +1,298 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ComplexFFT().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
+{
+    int i, j, l, k, istep, n, m;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = 1 << stages;
+    if (n > 1024)
+        return -1;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    if (mode == 0)
+    {
+        // mode==0: Low-complexity and Low-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
+                }
+            }
+
+            --k;
+            l = istep;
+
+        }
+
+    } else
+    {
+        // mode==1: High-complexity and High-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                        "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
+                        " lsl #16\n\t"
+                        "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        :[frfi_r]"=&r"(frfi_r),
+                         [tr32]"=&r"(tr32),
+                         [ti32]"=r"(ti32)
+                        :[frfi_even]"r"((int32_t)frfi[2*j]),
+                         [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                         [wri]"r"(wri),
+                         [cfftrnd]"r"(CFFTRND));
+#else
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
+#endif
+
+                    tr32 >>= 15 - CFFTSFT;
+                    ti32 >>= 15 - CFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT;
+                    qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT;
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                }
+            }
+
+            --k;
+            l = istep;
+        }
+    }
+    return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
+{
+    size_t i, j, l, istep, n, m;
+    int k, scale, shift;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+    int32_t tmp32, round2;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = 1 << stages;
+    if (n > 1024)
+        return -1;
+
+    scale = 0;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    while (l < n)
+    {
+        // variable scaling, depending upon data
+        shift = 0;
+        round2 = 8192;
+
+        tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
+        if (tmp32 > 13573)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+        if (tmp32 > 27146)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+
+        istep = l << 1;
+
+        if (mode == 0)
+        {
+            // mode==0: Low-complexity and Low-accuracy mode
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
+                }
+            }
+        } else
+        {
+            // mode==1: High-complexity and High-accuracy mode
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                      "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
+                      "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      :[frfi_r]"=&r"(frfi_r),
+                       [tr32]"=&r"(tr32),
+                       [ti32]"=r"(ti32)
+                      :[frfi_even]"r"((int32_t)frfi[2*j]),
+                       [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                       [wri]"r"(wri),
+                       [cifftrnd]"r"(CIFFTRND)
+                    );
+#else
+
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
+#endif
+                    tr32 >>= 15 - CIFFTSFT;
+                    ti32 >>= 15 - CIFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT;
+                    qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT;
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
+                }
+            }
+
+        }
+        --k;
+        l = istep;
+    }
+    return scale;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
@ -0,0 +1,328 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
+  int i = 0;
+  int l = 0;
+  int k = 0;
+  int istep = 0;
+  int n = 0;
+  int m = 0;
+  int32_t wr = 0, wi = 0;
+  int32_t tmp1 = 0;
+  int32_t tmp2 = 0;
+  int32_t tmp3 = 0;
+  int32_t tmp4 = 0;
+  int32_t tmp5 = 0;
+  int32_t tmp6 = 0;
+  int32_t tmp = 0;
+  int16_t* ptr_j = NULL;
+  int16_t* ptr_i = NULL;
+
+  n = 1 << stages;
+  if (n > 1024) {
+    return -1;
+  }
+
+  __asm __volatile (
+    ".set push                                                         \n\t"
+    ".set noreorder                                                    \n\t"
+
+    "addiu      %[k],           $zero,            10                   \n\t"
+    "addiu      %[l],           $zero,            1                    \n\t"
+   "3:                                                                 \n\t"
+    "sll        %[istep],       %[l],             1                    \n\t"
+    "move       %[m],           $zero                                  \n\t"
+    "sll        %[tmp],         %[l],             2                    \n\t"
+    "move       %[i],           $zero                                  \n\t"
+   "2:                                                                 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addiu      %[tmp2],        %[tmp3],          512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lhx        %[wi],          %[tmp3](%[kSinTable1024])              \n\t"
+    "lhx        %[wr],          %[tmp2](%[kSinTable1024])              \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addu       %[ptr_j],       %[tmp3],          %[kSinTable1024]     \n\t"
+    "addiu      %[ptr_i],       %[ptr_j],         512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lh         %[wi],          0(%[ptr_j])                            \n\t"
+    "lh         %[wr],          0(%[ptr_i])                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+   "1:                                                                 \n\t"
+    "sll        %[tmp1],        %[i],             2                    \n\t"
+    "addu       %[ptr_i],       %[frfi],          %[tmp1]              \n\t"
+    "addu       %[ptr_j],       %[ptr_i],         %[tmp]               \n\t"
+    "lh         %[tmp6],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp5],        2(%[ptr_i])                            \n\t"
+    "lh         %[tmp3],        0(%[ptr_j])                            \n\t"
+    "lh         %[tmp4],        2(%[ptr_j])                            \n\t"
+    "addu       %[i],           %[i],             %[istep]             \n\t"
+#if defined(MIPS_DSP_R2_LE)
+    "mult       %[wr],          %[tmp3]                                \n\t"
+    "madd       %[wi],          %[tmp4]                                \n\t"
+    "mult       $ac1,           %[wr],            %[tmp4]              \n\t"
+    "msub       $ac1,           %[wi],            %[tmp3]              \n\t"
+    "mflo       %[tmp1]                                                \n\t"
+    "mflo       %[tmp2],        $ac1                                   \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          1                    \n\t"
+    "shra_r.w   %[tmp2],        %[tmp2],          1                    \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          15                   \n\t"
+    "shra_r.w   %[tmp6],        %[tmp6],          15                   \n\t"
+    "shra_r.w   %[tmp4],        %[tmp4],          15                   \n\t"
+    "shra_r.w   %[tmp5],        %[tmp5],          15                   \n\t"
+#else  // #if defined(MIPS_DSP_R2_LE)
+    "mul        %[tmp2],        %[wr],            %[tmp4]              \n\t"
+    "mul        %[tmp1],        %[wr],            %[tmp3]              \n\t"
+    "mul        %[tmp4],        %[wi],            %[tmp4]              \n\t"
+    "mul        %[tmp3],        %[wi],            %[tmp3]              \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "addiu      %[tmp6],        %[tmp6],          16384                \n\t"
+    "addiu      %[tmp5],        %[tmp5],          16384                \n\t"
+    "addu       %[tmp1],        %[tmp1],          %[tmp4]              \n\t"
+    "subu       %[tmp2],        %[tmp2],          %[tmp3]              \n\t"
+    "addiu      %[tmp1],        %[tmp1],          1                    \n\t"
+    "addiu      %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp1],        %[tmp1],          1                    \n\t"
+    "sra        %[tmp2],        %[tmp2],          1                    \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "sra        %[tmp4],        %[tmp4],          15                   \n\t"
+    "sra        %[tmp1],        %[tmp1],          15                   \n\t"
+    "sra        %[tmp6],        %[tmp6],          15                   \n\t"
+    "sra        %[tmp5],        %[tmp5],          15                   \n\t"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+    "sh         %[tmp1],        0(%[ptr_i])                            \n\t"
+    "sh         %[tmp6],        2(%[ptr_i])                            \n\t"
+    "sh         %[tmp4],        0(%[ptr_j])                            \n\t"
+    "blt        %[i],           %[n],             1b                   \n\t"
+    " sh        %[tmp5],        2(%[ptr_j])                            \n\t"
+    "blt        %[m],           %[l],             2b                   \n\t"
+    " addu      %[i],           $zero,            %[m]                 \n\t"
+    "move       %[l],           %[istep]                               \n\t"
+    "blt        %[l],           %[n],             3b                   \n\t"
+    " addiu     %[k],           %[k],             -1                   \n\t"
+
+    ".set pop                                                          \n\t"
+
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+      [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
+      [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
+      [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
+    : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+    : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+    , "$ac1hi", "$ac1lo"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  );
+
+  return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
+  int i = 0, l = 0, k = 0;
+  int istep = 0, n = 0, m = 0;
+  int scale = 0, shift = 0;
+  int32_t wr = 0, wi = 0;
+  int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
+  int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0;
+  int16_t* ptr_j = NULL;
+  int16_t* ptr_i = NULL;
+
+  n = 1 << stages;
+  if (n > 1024) {
+    return -1;
+  }
+
+  __asm __volatile (
+    ".set push                                                         \n\t"
+    ".set noreorder                                                    \n\t"
+
+    "addiu      %[k],           $zero,            10                   \n\t"
+    "addiu      %[l],           $zero,            1                    \n\t"
+    "move       %[scale],       $zero                                  \n\t"
+   "3:                                                                 \n\t"
+    "addiu      %[shift],       $zero,            14                   \n\t"
+    "addiu      %[round2],      $zero,            8192                 \n\t"
+    "move       %[ptr_i],       %[frfi]                                \n\t"
+    "move       %[tempMax],     $zero                                  \n\t"
+    "addu       %[i],           %[n],             %[n]                 \n\t"
+   "5:                                                                 \n\t"
+    "lh         %[tmp1],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp2],        2(%[ptr_i])                            \n\t"
+    "lh         %[tmp3],        4(%[ptr_i])                            \n\t"
+    "lh         %[tmp4],        6(%[ptr_i])                            \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "absq_s.w   %[tmp1],        %[tmp1]                                \n\t"
+    "absq_s.w   %[tmp2],        %[tmp2]                                \n\t"
+    "absq_s.w   %[tmp3],        %[tmp3]                                \n\t"
+    "absq_s.w   %[tmp4],        %[tmp4]                                \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "slt        %[tmp5],        %[tmp1],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp1]              \n\t"
+    "movn       %[tmp1],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp2],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp2]              \n\t"
+    "movn       %[tmp2],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp3],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp3]              \n\t"
+    "movn       %[tmp3],        %[tmp6],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tmp4],          $zero                \n\t"
+    "subu       %[tmp6],        $zero,            %[tmp4]              \n\t"
+    "movn       %[tmp4],        %[tmp6],          %[tmp5]              \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "slt        %[tmp5],        %[tempMax],       %[tmp1]              \n\t"
+    "movn       %[tempMax],     %[tmp1],          %[tmp5]              \n\t"
+    "addiu      %[i],           %[i],             -4                   \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp2]              \n\t"
+    "movn       %[tempMax],     %[tmp2],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp3]              \n\t"
+    "movn       %[tempMax],     %[tmp3],          %[tmp5]              \n\t"
+    "slt        %[tmp5],        %[tempMax],       %[tmp4]              \n\t"
+    "movn       %[tempMax],     %[tmp4],          %[tmp5]              \n\t"
+    "bgtz       %[i],                             5b                   \n\t"
+    " addiu     %[ptr_i],       %[ptr_i],         8                    \n\t"
+    "addiu      %[tmp1],        $zero,            13573                \n\t"
+    "addiu      %[tmp2],        $zero,            27146                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "sll        %[tempMax],     %[tempMax],       16                   \n\t"
+    "sra        %[tempMax],     %[tempMax],       16                   \n\t"
+#else  // #if !defined(MIPS32_R2_LE)
+    "seh        %[tempMax]                                             \n\t"
+#endif  // #if !defined(MIPS32_R2_LE)
+    "slt        %[tmp1],        %[tmp1],          %[tempMax]           \n\t"
+    "slt        %[tmp2],        %[tmp2],          %[tempMax]           \n\t"
+    "addu       %[tmp1],        %[tmp1],          %[tmp2]              \n\t"
+    "addu       %[shift],       %[shift],         %[tmp1]              \n\t"
+    "addu       %[scale],       %[scale],         %[tmp1]              \n\t"
+    "sllv       %[round2],      %[round2],        %[tmp1]              \n\t"
+    "sll        %[istep],       %[l],             1                    \n\t"
+    "move       %[m],           $zero                                  \n\t"
+    "sll        %[tmp],         %[l],             2                    \n\t"
+   "2:                                                                 \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addiu      %[tmp2],        %[tmp3],          512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lhx        %[wi],          %[tmp3](%[kSinTable1024])              \n\t"
+    "lhx        %[wr],          %[tmp2](%[kSinTable1024])              \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "sllv       %[tmp3],        %[m],             %[k]                 \n\t"
+    "addu       %[ptr_j],       %[tmp3],          %[kSinTable1024]     \n\t"
+    "addiu      %[ptr_i],       %[ptr_j],         512                  \n\t"
+    "addiu      %[m],           %[m],             1                    \n\t"
+    "lh         %[wi],          0(%[ptr_j])                            \n\t"
+    "lh         %[wr],          0(%[ptr_i])                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+   "1:                                                                 \n\t"
+    "sll        %[tmp1],        %[i],             2                    \n\t"
+    "addu       %[ptr_i],       %[frfi],          %[tmp1]              \n\t"
+    "addu       %[ptr_j],       %[ptr_i],         %[tmp]               \n\t"
+    "lh         %[tmp3],        0(%[ptr_j])                            \n\t"
+    "lh         %[tmp4],        2(%[ptr_j])                            \n\t"
+    "lh         %[tmp6],        0(%[ptr_i])                            \n\t"
+    "lh         %[tmp5],        2(%[ptr_i])                            \n\t"
+    "addu       %[i],           %[i],             %[istep]             \n\t"
+#if defined(MIPS_DSP_R2_LE)
+    "mult       %[wr],          %[tmp3]                                \n\t"
+    "msub       %[wi],          %[tmp4]                                \n\t"
+    "mult       $ac1,           %[wr],            %[tmp4]              \n\t"
+    "madd       $ac1,           %[wi],            %[tmp3]              \n\t"
+    "mflo       %[tmp1]                                                \n\t"
+    "mflo       %[tmp2],        $ac1                                   \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "shra_r.w   %[tmp1],        %[tmp1],          1                    \n\t"
+    "shra_r.w   %[tmp2],        %[tmp2],          1                    \n\t"
+    "addu       %[tmp6],        %[tmp6],          %[round2]            \n\t"
+    "addu       %[tmp5],        %[tmp5],          %[round2]            \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "srav       %[tmp4],        %[tmp4],          %[shift]             \n\t"
+    "srav       %[tmp1],        %[tmp1],          %[shift]             \n\t"
+    "srav       %[tmp6],        %[tmp6],          %[shift]             \n\t"
+    "srav       %[tmp5],        %[tmp5],          %[shift]             \n\t"
+#else  // #if defined(MIPS_DSP_R2_LE)
+    "mul        %[tmp1],        %[wr],            %[tmp3]              \n\t"
+    "mul        %[tmp2],        %[wr],            %[tmp4]              \n\t"
+    "mul        %[tmp4],        %[wi],            %[tmp4]              \n\t"
+    "mul        %[tmp3],        %[wi],            %[tmp3]              \n\t"
+    "sll        %[tmp6],        %[tmp6],          14                   \n\t"
+    "sll        %[tmp5],        %[tmp5],          14                   \n\t"
+    "sub        %[tmp1],        %[tmp1],          %[tmp4]              \n\t"
+    "addu       %[tmp2],        %[tmp2],          %[tmp3]              \n\t"
+    "addiu      %[tmp1],        %[tmp1],          1                    \n\t"
+    "addiu      %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp2],        %[tmp2],          1                    \n\t"
+    "sra        %[tmp1],        %[tmp1],          1                    \n\t"
+    "addu       %[tmp6],        %[tmp6],          %[round2]            \n\t"
+    "addu       %[tmp5],        %[tmp5],          %[round2]            \n\t"
+    "subu       %[tmp4],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp1],        %[tmp6],          %[tmp1]              \n\t"
+    "addu       %[tmp6],        %[tmp5],          %[tmp2]              \n\t"
+    "subu       %[tmp5],        %[tmp5],          %[tmp2]              \n\t"
+    "sra        %[tmp4],        %[tmp4],          %[shift]             \n\t"
+    "sra        %[tmp1],        %[tmp1],          %[shift]             \n\t"
+    "sra        %[tmp6],        %[tmp6],          %[shift]             \n\t"
+    "sra        %[tmp5],        %[tmp5],          %[shift]             \n\t"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+    "sh         %[tmp1],         0(%[ptr_i])                           \n\t"
+    "sh         %[tmp6],         2(%[ptr_i])                           \n\t"
+    "sh         %[tmp4],         0(%[ptr_j])                           \n\t"
+    "blt        %[i],            %[n],            1b                   \n\t"
+    " sh        %[tmp5],         2(%[ptr_j])                           \n\t"
+    "blt        %[m],            %[l],            2b                   \n\t"
+    " addu      %[i],            $zero,           %[m]                 \n\t"
+    "move       %[l],            %[istep]                              \n\t"
+    "blt        %[l],            %[n],            3b                   \n\t"
+    " addiu     %[k],            %[k],            -1                   \n\t"
+
+    ".set pop                                                          \n\t"
+
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
+      [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
+      [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
+      [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
+      [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
+    : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
+    : "hi", "lo", "memory"
+#if defined(MIPS_DSP_R2_LE)
+    , "$ac1hi", "$ac1lo"
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  );
+
+  return scale;
+
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
@ -0,0 +1,148 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+
+#include "webrtc/typedefs.h"
+
+static const int16_t kSinTable1024[] = {
+       0,    201,    402,    603,    804,   1005,   1206,   1406,
+    1607,   1808,   2009,   2209,   2410,   2610,   2811,   3011,
+    3211,   3411,   3611,   3811,   4011,   4210,   4409,   4608,
+    4807,   5006,   5205,   5403,   5601,   5799,   5997,   6195,
+    6392,   6589,   6786,   6982,   7179,   7375,   7571,   7766,
+    7961,   8156,   8351,   8545,   8739,   8932,   9126,   9319,
+    9511,   9703,   9895,  10087,  10278,  10469,  10659,  10849,
+   11038,  11227,  11416,  11604,  11792,  11980,  12166,  12353,
+   12539,  12724,  12909,  13094,  13278,  13462,  13645,  13827,
+   14009,  14191,  14372,  14552,  14732,  14911,  15090,  15268,
+   15446,  15623,  15799,  15975,  16150,  16325,  16499,  16672,
+   16845,  17017,  17189,  17360,  17530,  17699,  17868,  18036,
+   18204,  18371,  18537,  18702,  18867,  19031,  19194,  19357,
+   19519,  19680,  19840,  20000,  20159,  20317,  20474,  20631,
+   20787,  20942,  21096,  21249,  21402,  21554,  21705,  21855,
+   22004,  22153,  22301,  22448,  22594,  22739,  22883,  23027,
+   23169,  23311,  23452,  23592,  23731,  23869,  24006,  24143,
+   24278,  24413,  24546,  24679,  24811,  24942,  25072,  25201,
+   25329,  25456,  25582,  25707,  25831,  25954,  26077,  26198,
+   26318,  26437,  26556,  26673,  26789,  26905,  27019,  27132,
+   27244,  27355,  27466,  27575,  27683,  27790,  27896,  28001,
+   28105,  28208,  28309,  28410,  28510,  28608,  28706,  28802,
+   28897,  28992,  29085,  29177,  29268,  29358,  29446,  29534,
+   29621,  29706,  29790,  29873,  29955,  30036,  30116,  30195,
+   30272,  30349,  30424,  30498,  30571,  30643,  30713,  30783,
+   30851,  30918,  30984,  31049,  31113,  31175,  31236,  31297,
+   31356,  31413,  31470,  31525,  31580,  31633,  31684,  31735,
+   31785,  31833,  31880,  31926,  31970,  32014,  32056,  32097,
+   32137,  32176,  32213,  32249,  32284,  32318,  32350,  32382,
+   32412,  32441,  32468,  32495,  32520,  32544,  32567,  32588,
+   32609,  32628,  32646,  32662,  32678,  32692,  32705,  32717,
+   32727,  32736,  32744,  32751,  32757,  32761,  32764,  32766,
+   32767,  32766,  32764,  32761,  32757,  32751,  32744,  32736,
+   32727,  32717,  32705,  32692,  32678,  32662,  32646,  32628,
+   32609,  32588,  32567,  32544,  32520,  32495,  32468,  32441,
+   32412,  32382,  32350,  32318,  32284,  32249,  32213,  32176,
+   32137,  32097,  32056,  32014,  31970,  31926,  31880,  31833,
+   31785,  31735,  31684,  31633,  31580,  31525,  31470,  31413,
+   31356,  31297,  31236,  31175,  31113,  31049,  30984,  30918,
+   30851,  30783,  30713,  30643,  30571,  30498,  30424,  30349,
+   30272,  30195,  30116,  30036,  29955,  29873,  29790,  29706,
+   29621,  29534,  29446,  29358,  29268,  29177,  29085,  28992,
+   28897,  28802,  28706,  28608,  28510,  28410,  28309,  28208,
+   28105,  28001,  27896,  27790,  27683,  27575,  27466,  27355,
+   27244,  27132,  27019,  26905,  26789,  26673,  26556,  26437,
+   26318,  26198,  26077,  25954,  25831,  25707,  25582,  25456,
+   25329,  25201,  25072,  24942,  24811,  24679,  24546,  24413,
+   24278,  24143,  24006,  23869,  23731,  23592,  23452,  23311,
+   23169,  23027,  22883,  22739,  22594,  22448,  22301,  22153,
+   22004,  21855,  21705,  21554,  21402,  21249,  21096,  20942,
+   20787,  20631,  20474,  20317,  20159,  20000,  19840,  19680,
+   19519,  19357,  19194,  19031,  18867,  18702,  18537,  18371,
+   18204,  18036,  17868,  17699,  17530,  17360,  17189,  17017,
+   16845,  16672,  16499,  16325,  16150,  15975,  15799,  15623,
+   15446,  15268,  15090,  14911,  14732,  14552,  14372,  14191,
+   14009,  13827,  13645,  13462,  13278,  13094,  12909,  12724,
+   12539,  12353,  12166,  11980,  11792,  11604,  11416,  11227,
+   11038,  10849,  10659,  10469,  10278,  10087,   9895,   9703,
+    9511,   9319,   9126,   8932,   8739,   8545,   8351,   8156,
+    7961,   7766,   7571,   7375,   7179,   6982,   6786,   6589,
+    6392,   6195,   5997,   5799,   5601,   5403,   5205,   5006,
+    4807,   4608,   4409,   4210,   4011,   3811,   3611,   3411,
+    3211,   3011,   2811,   2610,   2410,   2209,   2009,   1808,
+    1607,   1406,   1206,   1005,    804,    603,    402,    201,
+       0,   -201,   -402,   -603,   -804,  -1005,  -1206,  -1406,
+   -1607,  -1808,  -2009,  -2209,  -2410,  -2610,  -2811,  -3011,
+   -3211,  -3411,  -3611,  -3811,  -4011,  -4210,  -4409,  -4608,
+   -4807,  -5006,  -5205,  -5403,  -5601,  -5799,  -5997,  -6195,
+   -6392,  -6589,  -6786,  -6982,  -7179,  -7375,  -7571,  -7766,
+   -7961,  -8156,  -8351,  -8545,  -8739,  -8932,  -9126,  -9319,
+   -9511,  -9703,  -9895, -10087, -10278, -10469, -10659, -10849,
+  -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
+  -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827,
+  -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268,
+  -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672,
+  -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036,
+  -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357,
+  -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631,
+  -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855,
+  -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027,
+  -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
+  -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201,
+  -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198,
+  -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132,
+  -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001,
+  -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802,
+  -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534,
+  -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195,
+  -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783,
+  -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
+  -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735,
+  -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097,
+  -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382,
+  -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588,
+  -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717,
+  -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766,
+  -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736,
+  -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628,
+  -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
+  -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176,
+  -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833,
+  -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413,
+  -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918,
+  -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349,
+  -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706,
+  -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992,
+  -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208,
+  -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
+  -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437,
+  -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456,
+  -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413,
+  -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311,
+  -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153,
+  -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942,
+  -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680,
+  -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371,
+  -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
+  -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623,
+  -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191,
+  -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724,
+  -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227,
+  -11038, -10849, -10659, -10469, -10278, -10087,  -9895,  -9703,
+   -9511,  -9319,  -9126,  -8932,  -8739,  -8545,  -8351,  -8156,
+   -7961,  -7766,  -7571,  -7375,  -7179,  -6982,  -6786,  -6589,
+   -6392,  -6195,  -5997,  -5799,  -5601,  -5403,  -5205,  -5006,
+   -4807,  -4608,  -4409,  -4210,  -4011,  -3811,  -3611,  -3411,
+   -3211,  -3011,  -2811,  -2610,  -2410,  -2209,  -2009,  -1808,
+   -1607,  -1406,  -1206,  -1005,   -804,   -603,   -402,   -201
+};
+
+#endif  // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MemSetW16()
+ * WebRtcSpl_MemSetW32()
+ * WebRtcSpl_MemCpyReversedOrder()
+ * WebRtcSpl_CopyFromEndW16()
+ * WebRtcSpl_ZerosArrayW16()
+ * WebRtcSpl_ZerosArrayW32()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+
+void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
+{
+    size_t j;
+    int16_t *arrptr = ptr;
+
+    for (j = length; j > 0; j--)
+    {
+        *arrptr++ = set_value;
+    }
+}
+
+void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
+{
+    size_t j;
+    int32_t *arrptr = ptr;
+
+    for (j = length; j > 0; j--)
+    {
+        *arrptr++ = set_value;
+    }
+}
+
+void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
+                                   int16_t* source,
+                                   size_t length)
+{
+    size_t j;
+    int16_t* destPtr = dest;
+    int16_t* sourcePtr = source;
+
+    for (j = 0; j < length; j++)
+    {
+        *destPtr-- = *sourcePtr++;
+    }
+}
+
+void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
+                              size_t length,
+                              size_t samples,
+                              int16_t *vector_out)
+{
+    // Copy the last <samples> of the input vector to vector_out
+    WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
+}
+
+void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
+{
+    WebRtcSpl_MemSetW16(vector, 0, length);
+}
+
+void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
+{
+    WebRtcSpl_MemSetW32(vector, 0, length);
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 size_t dim_seq,
+                                 size_t dim_cross_correlation,
+                                 int right_shifts,
+                                 int step_seq2) {
+  size_t i = 0, j = 0;
+
+  for (i = 0; i < dim_cross_correlation; i++) {
+    int32_t corr = 0;
+    for (j = 0; j < dim_seq; j++)
+      corr += (seq1[j] * seq2[j]) >> right_shifts;
+    seq2 += step_seq2;
+    *cross_correlation++ = corr;
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
@ -0,0 +1,104 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
+                                     const int16_t* seq1,
+                                     const int16_t* seq2,
+                                     size_t dim_seq,
+                                     size_t dim_cross_correlation,
+                                     int right_shifts,
+                                     int step_seq2) {
+
+  int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
+  int16_t *pseq2 = NULL;
+  int16_t *pseq1 = NULL;
+  int16_t *pseq1_0 = (int16_t*)&seq1[0];
+  int16_t *pseq2_0 = (int16_t*)&seq2[0];
+  int k = 0;
+
+  __asm __volatile (
+    ".set        push                                           \n\t"
+    ".set        noreorder                                      \n\t"
+    "sll         %[step_seq2], %[step_seq2],   1                \n\t"
+    "andi        %[t0],        %[dim_seq],     1                \n\t"
+    "bgtz        %[t0],        3f                               \n\t"
+    " nop                                                       \n\t"
+   "1:                                                          \n\t"
+    "move        %[pseq1],     %[pseq1_0]                       \n\t"
+    "move        %[pseq2],     %[pseq2_0]                       \n\t"
+    "sra         %[k],         %[dim_seq],     1                \n\t"
+    "addiu       %[dim_cc],    %[dim_cc],      -1               \n\t"
+    "xor         %[sum],       %[sum],         %[sum]           \n\t"
+   "2:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "lh          %[t2],        2(%[pseq1])                      \n\t"
+    "lh          %[t3],        2(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "addiu       %[k],         %[k],           -1               \n\t"
+    "mul         %[t2],        %[t2],          %[t3]            \n\t"
+    "addiu       %[pseq1],     %[pseq1],       4                \n\t"
+    "addiu       %[pseq2],     %[pseq2],       4                \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "srav        %[t2],        %[t2],          %[right_shifts]  \n\t"
+    "bgtz        %[k],         2b                               \n\t"
+    " addu       %[sum],       %[sum],         %[t2]            \n\t"
+    "addu        %[pseq2_0],   %[pseq2_0],     %[step_seq2]     \n\t"
+    "sw          %[sum],       0(%[cc])                         \n\t"
+    "bgtz        %[dim_cc],    1b                               \n\t"
+    " addiu      %[cc],        %[cc],          4                \n\t"
+    "b           6f                                             \n\t"
+    " nop                                                       \n\t"
+   "3:                                                          \n\t"
+    "move        %[pseq1],     %[pseq1_0]                       \n\t"
+    "move        %[pseq2],     %[pseq2_0]                       \n\t"
+    "sra         %[k],         %[dim_seq],     1                \n\t"
+    "addiu       %[dim_cc],    %[dim_cc],      -1               \n\t"
+    "beqz        %[k],         5f                               \n\t"
+    " xor        %[sum],       %[sum],         %[sum]           \n\t"
+   "4:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "lh          %[t2],        2(%[pseq1])                      \n\t"
+    "lh          %[t3],        2(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "addiu       %[k],         %[k],           -1               \n\t"
+    "mul         %[t2],        %[t2],          %[t3]            \n\t"
+    "addiu       %[pseq1],     %[pseq1],       4                \n\t"
+    "addiu       %[pseq2],     %[pseq2],       4                \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "srav        %[t2],        %[t2],          %[right_shifts]  \n\t"
+    "bgtz        %[k],         4b                               \n\t"
+    " addu       %[sum],       %[sum],         %[t2]            \n\t"
+   "5:                                                          \n\t"
+    "lh          %[t0],        0(%[pseq1])                      \n\t"
+    "lh          %[t1],        0(%[pseq2])                      \n\t"
+    "mul         %[t0],        %[t0],          %[t1]            \n\t"
+    "srav        %[t0],        %[t0],          %[right_shifts]  \n\t"
+    "addu        %[sum],       %[sum],         %[t0]            \n\t"
+    "addu        %[pseq2_0],   %[pseq2_0],     %[step_seq2]     \n\t"
+    "sw          %[sum],       0(%[cc])                         \n\t"
+    "bgtz        %[dim_cc],    3b                               \n\t"
+    " addiu      %[cc],        %[cc],          4                \n\t"
+   "6:                                                          \n\t"
+    ".set        pop                                            \n\t"
+    : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
+      [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
+      [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
+      [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
+      [cc] "+r" (cross_correlation)
+    : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
+    : "hi", "lo", "memory"
+  );
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
+                                           const int16_t* vector1,
+                                           const int16_t* vector2,
+                                           size_t length,
+                                           int scaling) {
+  size_t i = 0;
+  size_t len1 = length >> 3;
+  size_t len2 = length & 7;
+  int64x2_t sum0 = vdupq_n_s64(0);
+  int64x2_t sum1 = vdupq_n_s64(0);
+
+  for (i = len1; i > 0; i -= 1) {
+    int16x8_t seq1_16x8 = vld1q_s16(vector1);
+    int16x8_t seq2_16x8 = vld1q_s16(vector2);
+#if defined(WEBRTC_ARCH_ARM64)
+    int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+                               vget_low_s16(seq2_16x8));
+    int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
+#else
+    int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
+                               vget_low_s16(seq2_16x8));
+    int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
+                               vget_high_s16(seq2_16x8));
+#endif
+    sum0 = vpadalq_s32(sum0, tmp0);
+    sum1 = vpadalq_s32(sum1, tmp1);
+    vector1 += 8;
+    vector2 += 8;
+  }
+
+  // Calculate the rest of the samples.
+  int64_t sum_res = 0;
+  for (i = len2; i > 0; i -= 1) {
+    sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
+    vector1++;
+    vector2++;
+  }
+
+  sum0 = vaddq_s64(sum0, sum1);
+#if defined(WEBRTC_ARCH_ARM64)
+  int64_t sum2 = vaddvq_s64(sum0);
+  *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
+#else
+  int64x1_t shift = vdup_n_s64(-scaling);
+  int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
+  sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
+  sum2 = vshl_s64(sum2, shift);
+  vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
+#endif
+}
+
+/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+                                    const int16_t* seq1,
+                                    const int16_t* seq2,
+                                    size_t dim_seq,
+                                    size_t dim_cross_correlation,
+                                    int right_shifts,
+                                    int step_seq2) {
+  size_t i = 0;
+
+  for (i = 0; i < dim_cross_correlation; i++) {
+    const int16_t* seq1_ptr = seq1;
+    const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
+
+    DotProductWithScaleNeon(cross_correlation,
+                            seq1_ptr,
+                            seq2_ptr,
+                            dim_seq,
+                            right_shifts);
+    cross_correlation++;
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
@ -0,0 +1,138 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the divisions
+ * WebRtcSpl_DivU32U16()
+ * WebRtcSpl_DivW32W16()
+ * WebRtcSpl_DivW32W16ResW16()
+ * WebRtcSpl_DivResultInQ31()
+ * WebRtcSpl_DivW32HiLow()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (uint32_t)(num / den);
+    } else
+    {
+        return (uint32_t)0xFFFFFFFF;
+    }
+}
+
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int32_t)(num / den);
+    } else
+    {
+        return (int32_t)0x7FFFFFFF;
+    }
+}
+
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int16_t)(num / den);
+    } else
+    {
+        return (int16_t)0x7FFF;
+    }
+}
+
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
+{
+    int32_t L_num = num;
+    int32_t L_den = den;
+    int32_t div = 0;
+    int k = 31;
+    int change_sign = 0;
+
+    if (num == 0)
+        return 0;
+
+    if (num < 0)
+    {
+        change_sign++;
+        L_num = -num;
+    }
+    if (den < 0)
+    {
+        change_sign++;
+        L_den = -den;
+    }
+    while (k--)
+    {
+        div <<= 1;
+        L_num <<= 1;
+        if (L_num >= L_den)
+        {
+            L_num -= L_den;
+            div++;
+        }
+    }
+    if (change_sign == 1)
+    {
+        div = -div;
+    }
+    return div;
+}
+
+int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
+{
+    int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
+    int32_t tmpW32;
+
+    approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
+    // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
+
+    // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
+    tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
+    // tmpW32 = den * approx
+
+    tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
+
+    // Store tmpW32 in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // tmpW32 = 1/den in Q29
+    tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
+
+    // 1/den in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // Store num in hi and low format
+    num_hi = (int16_t)(num >> 16);
+    num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
+
+    // num * (1/den) by 32 bit multiplication (result in Q28)
+
+    tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
+        (num_low * tmp_hi >> 15);
+
+    // Put result in Q31 (convert from Q28)
+    tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
+
+    return tmpW32;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      size_t length,
+                                      int scaling) {
+  int32_t sum = 0;
+  size_t i = 0;
+
+  /* Unroll the loop to improve performance. */
+  for (i = 0; i + 3 < length; i += 4) {
+    sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
+    sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
+    sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
+    sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
+  }
+  for (; i < length; i++) {
+    sum += (vector1[i] * vector2[i]) >> scaling;
+  }
+
+  return sum;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(Bjornv): Change the function parameter order to WebRTC code style.
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              size_t data_in_length,
+                              int16_t* data_out,
+                              size_t data_out_length,
+                              const int16_t* __restrict coefficients,
+                              size_t coefficients_length,
+                              int factor,
+                              size_t delay) {
+  size_t i = 0;
+  size_t j = 0;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+
+  for (i = delay; i < endpos; i += factor) {
+    out_s32 = 2048;  // Round value, 0.5 in Q12.
+
+    for (j = 0; j < coefficients_length; j++) {
+      out_s32 += coefficients[j] * data_in[i - j];  // Q12.
+    }
+
+    out_s32 >>= 12;  // Q0.
+
+    // Saturate and store the output.
+    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+  }
+
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
@ -0,0 +1,169 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
+int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
+                                  size_t data_in_length,
+                                  int16_t* data_out,
+                                  size_t data_out_length,
+                                  const int16_t* __restrict coefficients,
+                                  size_t coefficients_length,
+                                  int factor,
+                                  size_t delay) {
+  int i;
+  int j;
+  int k;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+  int32_t  tmp1, tmp2, tmp3, tmp4, factor_2;
+  int16_t* p_coefficients;
+  int16_t* p_data_in;
+  int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
+  int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
+#if !defined(MIPS_DSP_R1_LE)
+  int32_t max_16 = 0x7FFF;
+  int32_t min_16 = 0xFFFF8000;
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+#if defined(MIPS_DSP_R2_LE)
+  __asm __volatile (
+    ".set        push                                                \n\t"
+    ".set        noreorder                                           \n\t"
+    "subu        %[i],            %[endpos],       %[delay]          \n\t"
+    "sll         %[factor_2],     %[factor],       1                 \n\t"
+   "1:                                                               \n\t"
+    "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
+    "mult        $zero,           $zero                              \n\t"
+    "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
+    "sra         %[j],            %[coef_length],  2                 \n\t"
+    "beq         %[j],            $zero,           3f                \n\t"
+    " andi       %[k],            %[coef_length],  3                 \n\t"
+   "2:                                                               \n\t"
+    "lwl         %[tmp1],         1(%[p_data_in])                    \n\t"
+    "lwl         %[tmp2],         3(%[p_coefs])                      \n\t"
+    "lwl         %[tmp3],         -3(%[p_data_in])                   \n\t"
+    "lwl         %[tmp4],         7(%[p_coefs])                      \n\t"
+    "lwr         %[tmp1],         -2(%[p_data_in])                   \n\t"
+    "lwr         %[tmp2],         0(%[p_coefs])                      \n\t"
+    "lwr         %[tmp3],         -6(%[p_data_in])                   \n\t"
+    "lwr         %[tmp4],         4(%[p_coefs])                      \n\t"
+    "packrl.ph   %[tmp1],         %[tmp1],         %[tmp1]           \n\t"
+    "packrl.ph   %[tmp3],         %[tmp3],         %[tmp3]           \n\t"
+    "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
+    "dpa.w.ph    $ac0,            %[tmp3],         %[tmp4]           \n\t"
+    "addiu       %[j],            %[j],            -1                \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -8                \n\t"
+    "bgtz        %[j],            2b                                 \n\t"
+    " addiu      %[p_coefs],      %[p_coefs],      8                 \n\t"
+   "3:                                                               \n\t"
+    "beq         %[k],            $zero,           5f                \n\t"
+    " nop                                                            \n\t"
+   "4:                                                               \n\t"
+    "lhu         %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lhu         %[tmp2],         0(%[p_coefs])                      \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -2                \n\t"
+    "addiu       %[k],            %[k],            -1                \n\t"
+    "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
+    "bgtz        %[k],            4b                                 \n\t"
+    " addiu      %[p_coefs],      %[p_coefs],      2                 \n\t"
+   "5:                                                               \n\t"
+    "extr_r.w    %[out_s32],      $ac0,            12                \n\t"
+    "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
+    "subu        %[i],            %[i],            %[factor]         \n\t"
+    "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
+    "sra         %[out_s32],      %[out_s32],      16                \n\t"
+    "sh          %[out_s32],      0(%[data_out])                     \n\t"
+    "bgtz        %[i],            1b                                 \n\t"
+    " addiu      %[data_out],     %[data_out],     2                 \n\t"
+    ".set        pop                                                 \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
+      [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+      [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+      [i] "=&r" (i), [k] "=&r" (k)
+    : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+      [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+      [delay] "r" (delay), [factor] "r" (factor)
+    : "memory", "hi", "lo"
+ );
+#else  // #if defined(MIPS_DSP_R2_LE)
+  __asm __volatile (
+    ".set        push                                                \n\t"
+    ".set        noreorder                                           \n\t"
+    "sll         %[factor_2],     %[factor],       1                 \n\t"
+    "subu        %[i],            %[endpos],       %[delay]          \n\t"
+   "1:                                                               \n\t"
+    "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
+    "addiu       %[out_s32],      $zero,           2048              \n\t"
+    "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
+    "sra         %[j],            %[coef_length],  1                 \n\t"
+    "beq         %[j],            $zero,           3f                \n\t"
+    " andi       %[k],            %[coef_length],  1                 \n\t"
+   "2:                                                               \n\t"
+    "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
+    "lh          %[tmp3],         -2(%[p_data_in])                   \n\t"
+    "lh          %[tmp4],         2(%[p_coefs])                      \n\t"
+    "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
+    "addiu       %[p_coefs],      %[p_coefs],      4                 \n\t"
+    "mul         %[tmp3],         %[tmp3],         %[tmp4]           \n\t"
+    "addiu       %[j],            %[j],            -1                \n\t"
+    "addiu       %[p_data_in],    %[p_data_in],    -4                \n\t"
+    "addu        %[tmp1],         %[tmp1],         %[tmp3]           \n\t"
+    "bgtz        %[j],            2b                                 \n\t"
+    " addu       %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
+   "3:                                                               \n\t"
+    "beq         %[k],            $zero,           4f                \n\t"
+    " nop                                                            \n\t"
+    "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
+    "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
+    "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
+    "addu        %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
+   "4:                                                               \n\t"
+    "sra         %[out_s32],      %[out_s32],      12                \n\t"
+    "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
+    "sra         %[out_s32],      %[out_s32],      16                \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "slt         %[tmp1],         %[max_16],       %[out_s32]        \n\t"
+    "movn        %[out_s32],      %[max_16],       %[tmp1]           \n\t"
+    "slt         %[tmp1],         %[out_s32],      %[min_16]         \n\t"
+    "movn        %[out_s32],      %[min_16],       %[tmp1]           \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "subu        %[i],            %[i],            %[factor]         \n\t"
+    "sh          %[out_s32],      0(%[data_out])                     \n\t"
+    "bgtz        %[i],            1b                                 \n\t"
+    " addiu      %[data_out],     %[data_out],     2                 \n\t"
+    ".set        pop                                                 \n\t"
+    : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
+      [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
+      [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
+      [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
+      [i] "=&r" (i)
+    : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
+      [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
+#if !defined(MIPS_DSP_R1_LE)
+      [max_16] "r" (max_16), [min_16] "r" (min_16),
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+      [delay] "r" (delay), [factor] "r" (factor)
+    : "memory", "hi", "lo"
+  );
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
@ -0,0 +1,217 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <arm_neon.h>
+
+// NEON intrinsics version of WebRtcSpl_DownsampleFast()
+// for ARM 32-bit/64-bit platforms.
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+                                 size_t data_in_length,
+                                 int16_t* data_out,
+                                 size_t data_out_length,
+                                 const int16_t* __restrict coefficients,
+                                 size_t coefficients_length,
+                                 int factor,
+                                 size_t delay) {
+  size_t i = 0;
+  size_t j = 0;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+  size_t res = data_out_length & 0x7;
+  size_t endpos1 = endpos - factor * res;
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+
+  // First part, unroll the loop 8 times, with 3 subcases
+  // (factor == 2, 4, others).
+  switch (factor) {
+    case 2: {
+      for (i = delay; i < endpos1; i += 16) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+#if defined(WEBRTC_ARCH_ARM64)
+        // Unroll the loop 2 times.
+        for (j = 0; j < coefficients_length - 1; j += 2) {
+          int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
+          int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
+        }
+
+        for (; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+#else
+        // On ARMv7, the loop unrolling 2 times results in performance
+        // regression.
+        for (j = 0; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
+
+          // Mul and accumulate.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+#endif
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+    case 4: {
+      for (i = delay; i < endpos1; i += 32) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+        // Unroll the loop 4 times.
+        for (j = 0; j < coefficients_length - 3; j += 4) {
+          int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
+          int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+          int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]);
+          int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]);
+          int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+          int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]);
+          int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]);
+          int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
+        }
+
+        for (; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
+
+          // Mul and accumulate low 64-bit data.
+          int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+
+          // Mul and accumulate high 64-bit data.
+          // TODO: vget_high_s16 need extra cost on ARM64. This could be
+          // replaced by vmlal_high_lane_s16. But for the interface of
+          // vmlal_high_lane_s16, there is a bug in gcc 4.9.
+          // This issue need to be tracked in the future.
+          int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+    default: {
+      for (i = delay; i < endpos1; i += factor * 8) {
+        // Round value, 0.5 in Q12.
+        int32x4_t out32x4_0 = vdupq_n_s32(2048);
+        int32x4_t out32x4_1 = vdupq_n_s32(2048);
+
+        for (j = 0; j < coefficients_length; j++) {
+          int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
+          int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2);
+          in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3);
+          int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2);
+          in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3);
+
+          // Mul and accumulate.
+          out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
+          out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
+        }
+
+        // Saturate and store the output.
+        int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
+        int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
+        vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
+        data_out += 8;
+      }
+      break;
+    }
+  }
+
+  // Second part, do the rest iterations (if any).
+  for (; i < endpos; i += factor) {
+    out_s32 = 2048;  // Round value, 0.5 in Q12.
+
+    for (j = 0; j < coefficients_length; j++) {
+      out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
+    }
+
+    // Saturate and store the output.
+    out_s32 >>= 12;
+    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+  }
+
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/energy.c
@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Energy().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+                         size_t vector_length,
+                         int* scale_factor)
+{
+    int32_t en = 0;
+    size_t i;
+    int scaling =
+        WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
+    size_t looptimes = vector_length;
+    int16_t *vectorptr = vector;
+
+    for (i = 0; i < looptimes; i++)
+    {
+      en += (*vectorptr * *vectorptr) >> scaling;
+      vectorptr++;
+    }
+    *scale_factor = scaling;
+
+    return en;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar.c
@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterAR().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+size_t WebRtcSpl_FilterAR(const int16_t* a,
+                          size_t a_length,
+                          const int16_t* x,
+                          size_t x_length,
+                          int16_t* state,
+                          size_t state_length,
+                          int16_t* state_low,
+                          size_t state_low_length,
+                          int16_t* filtered,
+                          int16_t* filtered_low,
+                          size_t filtered_low_length)
+{
+    int32_t o;
+    int32_t oLOW;
+    size_t i, j, stop;
+    const int16_t* x_ptr = &x[0];
+    int16_t* filteredFINAL_ptr = filtered;
+    int16_t* filteredFINAL_LOW_ptr = filtered_low;
+
+    for (i = 0; i < x_length; i++)
+    {
+        // Calculate filtered[i] and filtered_low[i]
+        const int16_t* a_ptr = &a[1];
+        int16_t* filtered_ptr = &filtered[i - 1];
+        int16_t* filtered_low_ptr = &filtered_low[i - 1];
+        int16_t* state_ptr = &state[state_length - 1];
+        int16_t* state_low_ptr = &state_low[state_length - 1];
+
+        o = (int32_t)(*x_ptr++) << 12;
+        oLOW = (int32_t)0;
+
+        stop = (i < a_length) ? i + 1 : a_length;
+        for (j = 1; j < stop; j++)
+        {
+          o -= *a_ptr * *filtered_ptr--;
+          oLOW -= *a_ptr++ * *filtered_low_ptr--;
+        }
+        for (j = i + 1; j < a_length; j++)
+        {
+          o -= *a_ptr * *state_ptr--;
+          oLOW -= *a_ptr++ * *state_low_ptr--;
+        }
+
+        o += (oLOW >> 12);
+        *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
+        *filteredFINAL_LOW_ptr++ = (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++)
+                << 12));
+    }
+
+    // Save the filter state
+    if (x_length >= state_length)
+    {
+        WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
+        WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
+    } else
+    {
+        for (i = 0; i < state_length - x_length; i++)
+        {
+            state[i] = state[i + x_length];
+            state_low[i] = state_low[i + x_length];
+        }
+        for (i = 0; i < x_length; i++)
+        {
+            state[state_length - x_length + i] = filtered[i];
+            state[state_length - x_length + i] = filtered_low[i];
+        }
+    }
+
+    return x_length;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjornv): Change the return type to report errors.
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+                               int16_t* data_out,
+                               const int16_t* __restrict coefficients,
+                               size_t coefficients_length,
+                               size_t data_length) {
+  size_t i = 0;
+  size_t j = 0;
+
+  assert(data_length > 0);
+  assert(coefficients_length > 1);
+
+  for (i = 0; i < data_length; i++) {
+    int32_t output = 0;
+    int32_t sum = 0;
+
+    for (j = coefficients_length - 1; j > 0; j--) {
+      sum += coefficients[j] * data_out[i - j];
+    }
+
+    output = coefficients[0] * data_in[i];
+    output -= sum;
+
+    // Saturate and store the output.
+    output = WEBRTC_SPL_SAT(134215679, output, -134217728);
+    data_out[i] = (int16_t)((output + 2048) >> 12);
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
@ -0,0 +1,218 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
+@ ARMv7  platform. The description header can be found in
+@ signal_processing_library.h
+@
+@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
+@ the reference C code at end of this file.
+
+@ Assumptions:
+@ (1) data_length > 0
+@ (2) coefficients_length > 1
+
+@ Register usage:
+@
+@ r0:  &data_in[i]
+@ r1:  &data_out[i], for result ouput
+@ r2:  &coefficients[0]
+@ r3:  coefficients_length
+@ r4:  Iteration counter for the outer loop.
+@ r5:  data_out[j] as multiplication inputs
+@ r6:  Calculated value for output data_out[]; interation counter for inner loop
+@ r7:  Partial sum of a filtering multiplication results
+@ r8:  Partial sum of a filtering multiplication results
+@ r9:  &data_out[], for filtering input; data_in[i]
+@ r10: coefficients[j]
+@ r11: Scratch
+@ r12: &coefficients[j]
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
+.align  2
+DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
+  push {r4-r11}
+
+  ldrsh r12, [sp, #32]         @ data_length
+  subs r4, r12, #1
+  beq ODD_LENGTH               @ jump if data_length == 1
+
+LOOP_LENGTH:
+  add r12, r2, r3, lsl #1
+  sub r12, #4                  @ &coefficients[coefficients_length - 2]
+  sub r9, r1, r3, lsl #1
+  add r9, #2                   @ &data_out[i - coefficients_length + 1]
+  ldr r5, [r9], #4             @ data_out[i - coefficients_length + {1,2}]
+
+  mov r7, #0                   @ sum1
+  mov r8, #0                   @ sum2
+  subs r6, r3, #3              @ Iteration counter for inner loop.
+  beq ODD_A_LENGTH             @ branch if coefficients_length == 3
+  blt POST_LOOP_A_LENGTH       @ branch if coefficients_length == 2
+
+LOOP_A_LENGTH:
+  ldr r10, [r12], #-4          @ coefficients[j - 1], coefficients[j]
+  subs r6, #2
+  smlatt r8, r10, r5, r8       @ sum2 += coefficients[j] * data_out[i - j + 1];
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[j] * data_out[i - j];
+  smlabt r7, r10, r5, r7       @ coefficients[j - 1] * data_out[i - j + 1];
+  ldr r5, [r9], #4             @ data_out[i - j + 2],  data_out[i - j + 3]
+  smlabb r8, r10, r5, r8       @ coefficients[j - 1] * data_out[i - j + 2];
+  bgt LOOP_A_LENGTH
+  blt POST_LOOP_A_LENGTH
+
+ODD_A_LENGTH:
+  ldrsh r10, [r12, #2]         @ Filter coefficients coefficients[2]
+  sub r12, #2                  @ &coefficients[0]
+  smlabb r7, r10, r5, r7       @ sum1 += coefficients[2] * data_out[i - 2];
+  smlabt r8, r10, r5, r8       @ sum2 += coefficients[2] * data_out[i - 1];
+  ldr r5, [r9, #-2]            @ data_out[i - 1],  data_out[i]
+
+POST_LOOP_A_LENGTH:
+  ldr r10, [r12]               @ coefficients[0], coefficients[1]
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[1] * data_out[i - 1];
+
+  ldr r9, [r0], #4             @ data_in[i], data_in[i + 1]
+  smulbb r6, r10, r9           @ output1 = coefficients[0] * data_in[i];
+  sub r6, r7                   @ output1 -= sum1;
+
+  sbfx r11, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r11
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1], #2            @ Store data_out[i]
+
+  smlatb r8, r10, r6, r8       @ sum2 += coefficients[1] * data_out[i];
+  smulbt r6, r10, r9           @ output2 = coefficients[0] * data_in[i + 1];
+  sub r6, r8                   @ output1 -= sum1;
+
+  sbfx r11, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r11
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1], #2            @ Store data_out[i + 1]
+
+  subs r4, #2
+  bgt LOOP_LENGTH
+  blt END                      @ For even data_length, it's done. Jump to END.
+
+@ Process i = data_length -1, for the case of an odd length.
+ODD_LENGTH:
+  add r12, r2, r3, lsl #1
+  sub r12, #4                  @ &coefficients[coefficients_length - 2]
+  sub r9, r1, r3, lsl #1
+  add r9, #2                   @ &data_out[i - coefficients_length + 1]
+  mov r7, #0                   @ sum1
+  mov r8, #0                   @ sum1
+  subs r6, r3, #2              @ inner loop counter
+  beq EVEN_A_LENGTH            @ branch if coefficients_length == 2
+
+LOOP2_A_LENGTH:
+  ldr r10, [r12], #-4          @ coefficients[j - 1], coefficients[j]
+  ldr r5, [r9], #4             @ data_out[i - j],  data_out[i - j + 1]
+  subs r6, #2
+  smlatb r7, r10, r5, r7       @ sum1 += coefficients[j] * data_out[i - j];
+  smlabt r8, r10, r5, r8       @ coefficients[j - 1] * data_out[i - j + 1];
+  bgt LOOP2_A_LENGTH
+  addlt r12, #2
+  blt POST_LOOP2_A_LENGTH
+
+EVEN_A_LENGTH:
+  ldrsh r10, [r12, #2]         @ Filter coefficients coefficients[1]
+  ldrsh r5, [r9]               @ data_out[i - 1]
+  smlabb r7, r10, r5, r7       @ sum1 += coefficients[1] * data_out[i - 1];
+
+POST_LOOP2_A_LENGTH:
+  ldrsh r10, [r12]             @ Filter coefficients coefficients[0]
+  ldrsh r9, [r0]               @ data_in[i]
+  smulbb r6, r10, r9           @ output1 = coefficients[0] * data_in[i];
+  sub r6, r7                   @ output1 -= sum1;
+  sub r6, r8                   @ output1 -= sum1;
+  sbfx r8, r6, #12, #16
+  ssat r7, #16, r6, asr #12
+  cmp r7, r8
+  addeq r6, r6, #2048
+  ssat r6, #16, r6, asr #12
+  strh r6, [r1]                @ Store the data_out[i]
+
+END:
+  pop {r4-r11}
+  bx  lr
+
+@Reference C code:
+@
+@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
+@                               int16_t* data_out,
+@                               int16_t* __restrict coefficients,
+@                               size_t coefficients_length,
+@                               size_t data_length) {
+@  size_t i = 0;
+@  size_t j = 0;
+@
+@  assert(data_length > 0);
+@  assert(coefficients_length > 1);
+@
+@  for (i = 0; i < data_length - 1; i += 2) {
+@    int32_t output1 = 0;
+@    int32_t sum1 = 0;
+@    int32_t output2 = 0;
+@    int32_t sum2 = 0;
+@
+@    for (j = coefficients_length - 1; j > 2; j -= 2) {
+@      sum1 += coefficients[j]      * data_out[i - j];
+@      sum1 += coefficients[j - 1]  * data_out[i - j + 1];
+@      sum2 += coefficients[j]     * data_out[i - j + 1];
+@      sum2 += coefficients[j - 1] * data_out[i - j + 2];
+@    }
+@
+@    if (j == 2) {
+@      sum1 += coefficients[2] * data_out[i - 2];
+@      sum2 += coefficients[2] * data_out[i - 1];
+@    }
+@
+@    sum1 += coefficients[1] * data_out[i - 1];
+@    output1 = coefficients[0] * data_in[i];
+@    output1 -= sum1;
+@    // Saturate and store the output.
+@    output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@    data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@
+@    sum2 += coefficients[1] * data_out[i];
+@    output2 = coefficients[0] * data_in[i + 1];
+@    output2 -= sum2;
+@    // Saturate and store the output.
+@    output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
+@    data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
+@  }
+@
+@  if (i == data_length - 1) {
+@    int32_t output1 = 0;
+@    int32_t sum1 = 0;
+@
+@    for (j = coefficients_length - 1; j > 1; j -= 2) {
+@      sum1 += coefficients[j]      * data_out[i - j];
+@      sum1 += coefficients[j - 1]  * data_out[i - j + 1];
+@    }
+@
+@    if (j == 1) {
+@      sum1 += coefficients[1] * data_out[i - 1];
+@    }
+@
+@    output1 = coefficients[0] * data_in[i];
+@    output1 -= sum1;
+@    // Saturate and store the output.
+@    output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
+@    data_out[i] = (int16_t)((output1 + 2048) >> 12);
+@  }
+@}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
@ -0,0 +1,140 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
+                               int16_t* data_out,
+                               const int16_t* __restrict coefficients,
+                               size_t coefficients_length,
+                               size_t data_length) {
+  int r0, r1, r2, r3;
+  int coef0, offset;
+  int i, j, k;
+  int coefptr, outptr, tmpout, inptr;
+#if !defined(MIPS_DSP_R1_LE)
+  int max16 = 0x7FFF;
+  int min16 = 0xFFFF8000;
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+  assert(data_length > 0);
+  assert(coefficients_length > 1);
+
+  __asm __volatile (
+    ".set       push                                             \n\t"
+    ".set       noreorder                                        \n\t"
+    "addiu      %[i],       %[data_length],          0           \n\t"
+    "lh         %[coef0],   0(%[coefficients])                   \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+    "andi       %[k],       %[j],                    1           \n\t"
+    "sll        %[offset],  %[j],                    1           \n\t"
+    "subu       %[outptr],  %[data_out],             %[offset]   \n\t"
+    "addiu      %[inptr],   %[data_in],              0           \n\t"
+    "bgtz       %[k],       3f                                   \n\t"
+    " addu      %[coefptr], %[coefficients],         %[offset]   \n\t"
+   "1:                                                           \n\t"
+    "lh         %[r0],      0(%[inptr])                          \n\t"
+    "addiu      %[i],       %[i],                    -1          \n\t"
+    "addiu      %[tmpout],  %[outptr],               0           \n\t"
+    "mult       %[r0],      %[coef0]                             \n\t"
+   "2:                                                           \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "lh         %[r2],      2(%[tmpout])                         \n\t"
+    "lh         %[r3],      -2(%[coefptr])                       \n\t"
+    "addiu      %[tmpout],  %[tmpout],               4           \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+    "msub       %[r2],      %[r3]                                \n\t"
+    "addiu      %[j],       %[j],                    -2          \n\t"
+    "bgtz       %[j],       2b                                   \n\t"
+    " addiu     %[coefptr], %[coefptr],              -4          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extr_r.w   %[r0],      $ac0,                    12          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "mflo       %[r0]                                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "addu       %[coefptr], %[coefficients],         %[offset]   \n\t"
+    "addiu      %[inptr],   %[inptr],                2           \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[r0],      %[r0],                   16          \n\t"
+    "sra        %[r0],      %[r0],                   16          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "addiu      %[r0],      %[r0],                   2048        \n\t"
+    "sra        %[r0],      %[r0],                   12          \n\t"
+    "slt        %[r1],      %[max16],                %[r0]       \n\t"
+    "movn       %[r0],      %[max16],                %[r1]       \n\t"
+    "slt        %[r1],      %[r0],                   %[min16]    \n\t"
+    "movn       %[r0],      %[min16],                %[r1]       \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[r0],      0(%[tmpout])                         \n\t"
+    "bgtz       %[i],       1b                                   \n\t"
+    " addiu     %[outptr],  %[outptr],               2           \n\t"
+    "b          5f                                               \n\t"
+    " nop                                                        \n\t"
+   "3:                                                           \n\t"
+    "lh         %[r0],      0(%[inptr])                          \n\t"
+    "addiu      %[i],       %[i],                    -1          \n\t"
+    "addiu      %[tmpout],  %[outptr],               0           \n\t"
+    "mult       %[r0],      %[coef0]                             \n\t"
+   "4:                                                           \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "lh         %[r2],      2(%[tmpout])                         \n\t"
+    "lh         %[r3],      -2(%[coefptr])                       \n\t"
+    "addiu      %[tmpout],  %[tmpout],               4           \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+    "msub       %[r2],      %[r3]                                \n\t"
+    "addiu      %[j],       %[j],                    -2          \n\t"
+    "bgtz       %[j],       4b                                   \n\t"
+    " addiu     %[coefptr], %[coefptr],              -4          \n\t"
+    "lh         %[r0],      0(%[tmpout])                         \n\t"
+    "lh         %[r1],      0(%[coefptr])                        \n\t"
+    "msub       %[r0],      %[r1]                                \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "extr_r.w   %[r0],      $ac0,                    12          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "mflo       %[r0]                                            \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "addu       %[coefptr], %[coefficients],         %[offset]   \n\t"
+    "addiu      %[inptr],   %[inptr],                2           \n\t"
+    "addiu      %[j],       %[coefficients_length],  -1          \n\t"
+#if defined(MIPS_DSP_R1_LE)
+    "shll_s.w   %[r0],      %[r0],                   16          \n\t"
+    "sra        %[r0],      %[r0],                   16          \n\t"
+#else  // #if defined(MIPS_DSP_R1_LE)
+    "addiu      %[r0],      %[r0],                   2048        \n\t"
+    "sra        %[r0],      %[r0],                   12          \n\t"
+    "slt        %[r1],      %[max16],                %[r0]       \n\t"
+    "movn       %[r0],      %[max16],                %[r1]       \n\t"
+    "slt        %[r1],      %[r0],                   %[min16]    \n\t"
+    "movn       %[r0],      %[min16],                %[r1]       \n\t"
+#endif  // #if defined(MIPS_DSP_R1_LE)
+    "sh         %[r0],      2(%[tmpout])                         \n\t"
+    "bgtz       %[i],       3b                                   \n\t"
+    " addiu     %[outptr],  %[outptr],               2           \n\t"
+   "5:                                                           \n\t"
+    ".set       pop                                              \n\t"
+    : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
+      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
+      [coef0] "=&r" (coef0), [offset] "=&r" (offset),
+      [outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
+      [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
+    : [coefficients] "r" (coefficients), [data_length] "r" (data_length),
+      [coefficients_length] "r" (coefficients_length),
+#if !defined(MIPS_DSP_R1_LE)
+      [max16] "r" (max16), [min16] "r" (min16),
+#endif
+      [data_out] "r" (data_out), [data_in] "r" (data_in)
+    : "hi", "lo", "memory"
+  );
+}
+
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_FilterMAFastQ12().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
+                               int16_t* out_ptr,
+                               const int16_t* B,
+                               size_t B_length,
+                               size_t length)
+{
+    size_t i, j;
+    for (i = 0; i < length; i++)
+    {
+        int32_t o = 0;
+
+        for (j = 0; j < B_length; j++)
+        {
+          o += B[j] * in_ptr[i - j];
+        }
+
+        // If output is higher than 32768, saturate it. Same with negative side
+        // 2^27 = 134217728, which corresponds to 32768 in Q12
+
+        // Saturate the output
+        o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
+
+        *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
+    }
+    return;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
@ -0,0 +1,77 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetHanningWindow().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Hanning table with 256 entries
+static const int16_t kHanningTable[] = {
+    1,      2,      6,     10,     15,     22,     30,     39,
+   50,     62,     75,     89,    104,    121,    138,    157,
+  178,    199,    222,    246,    271,    297,    324,    353,
+  383,    413,    446,    479,    513,    549,    586,    624,
+  663,    703,    744,    787,    830,    875,    920,    967,
+ 1015,   1064,   1114,   1165,   1218,   1271,   1325,   1381,
+ 1437,   1494,   1553,   1612,   1673,   1734,   1796,   1859,
+ 1924,   1989,   2055,   2122,   2190,   2259,   2329,   2399,
+ 2471,   2543,   2617,   2691,   2765,   2841,   2918,   2995,
+ 3073,   3152,   3232,   3312,   3393,   3475,   3558,   3641,
+ 3725,   3809,   3895,   3980,   4067,   4154,   4242,   4330,
+ 4419,   4509,   4599,   4689,   4781,   4872,   4964,   5057,
+ 5150,   5244,   5338,   5432,   5527,   5622,   5718,   5814,
+ 5910,   6007,   6104,   6202,   6299,   6397,   6495,   6594,
+ 6693,   6791,   6891,   6990,   7090,   7189,   7289,   7389,
+ 7489,   7589,   7690,   7790,   7890,   7991,   8091,   8192,
+ 8293,   8393,   8494,   8594,   8694,   8795,   8895,   8995,
+ 9095,   9195,   9294,   9394,   9493,   9593,   9691,   9790,
+ 9889,   9987,  10085,  10182,  10280,  10377,  10474,  10570,
+10666,  10762,  10857,  10952,  11046,  11140,  11234,  11327,
+11420,  11512,  11603,  11695,  11785,  11875,  11965,  12054,
+12142,  12230,  12317,  12404,  12489,  12575,  12659,  12743,
+12826,  12909,  12991,  13072,  13152,  13232,  13311,  13389,
+13466,  13543,  13619,  13693,  13767,  13841,  13913,  13985,
+14055,  14125,  14194,  14262,  14329,  14395,  14460,  14525,
+14588,  14650,  14711,  14772,  14831,  14890,  14947,  15003,
+15059,  15113,  15166,  15219,  15270,  15320,  15369,  15417,
+15464,  15509,  15554,  15597,  15640,  15681,  15721,  15760,
+15798,  15835,  15871,  15905,  15938,  15971,  16001,  16031,
+16060,  16087,  16113,  16138,  16162,  16185,  16206,  16227,
+16246,  16263,  16280,  16295,  16309,  16322,  16334,  16345,
+16354,  16362,  16369,  16374,  16378,  16382,  16383,  16384
+};
+
+void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
+{
+    size_t jj;
+    int16_t *vptr1;
+
+    int32_t index;
+    int32_t factor = ((int32_t)0x40000000);
+
+    factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
+    if (size < 513)
+        index = (int32_t)-0x200000;
+    else
+        index = (int32_t)-0x100000;
+    vptr1 = v;
+
+    for (jj = 0; jj < size; jj++)
+    {
+        index += factor;
+        (*vptr1++) = kHanningTable[index >> 22];
+    }
+
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
@ -0,0 +1,46 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetScalingSquare().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+                                   size_t in_vector_length,
+                                   size_t times)
+{
+    int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
+    size_t i;
+    int16_t smax = -1;
+    int16_t sabs;
+    int16_t *sptr = in_vector;
+    int16_t t;
+    size_t looptimes = in_vector_length;
+
+    for (i = looptimes; i > 0; i--)
+    {
+        sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
+        smax = (sabs > smax ? sabs : smax);
+    }
+    t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+    if (smax == 0)
+    {
+        return 0; // Since norm(0) returns 0
+    } else
+    {
+        return (t > nbits) ? 0 : nbits - t;
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the iLBC specific functions
+ * WebRtcSpl_ReverseOrderMultArrayElements()
+ * WebRtcSpl_ElementwiseVectorMult()
+ * WebRtcSpl_AddVectorsAndShift()
+ * WebRtcSpl_AddAffineVectorToVector()
+ * WebRtcSpl_AffineTransformVector()
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
+                                             const int16_t *win,
+                                             size_t vector_length,
+                                             int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *inptr = in;
+    const int16_t *winptr = win;
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
+                                     const int16_t *win, size_t vector_length,
+                                     int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *inptr = in;
+    const int16_t *winptr = win;
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
+                                  const int16_t *in2, size_t vector_length,
+                                  int16_t right_shifts)
+{
+    size_t i;
+    int16_t *outptr = out;
+    const int16_t *in1ptr = in1;
+    const int16_t *in2ptr = in2;
+    for (i = vector_length; i > 0; i--)
+    {
+        (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AddAffineVectorToVector(int16_t *out, int16_t *in,
+                                       int16_t gain, int32_t add_constant,
+                                       int16_t right_shifts,
+                                       size_t vector_length)
+{
+    size_t i;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_AffineTransformVector(int16_t *out, int16_t *in,
+                                     int16_t gain, int32_t add_constant,
+                                     int16_t right_shifts, size_t vector_length)
+{
+    size_t i;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+
+#include "webrtc/typedefs.h"
+
+// For ComplexFFT(), the maximum fft order is 10;
+// for OpenMax FFT in ARM, it is 12;
+// WebRTC APM uses orders of only 7 and 8.
+enum {kMaxFFTOrder = 10};
+
+struct RealFFT;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+//     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+//     n=0,1,2,...N-1
+//     N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+//     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+//     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   real_data_in - the input signal. For an ARM Neon platform, it must be
+//                  aligned on a 32-byte boundary.
+//
+// Output Arguments:
+//   complex_data_out - the output complex signal with (2^order + 2) 16-bit
+//                      elements. For an ARM Neon platform, it must be different
+//                      from real_data_in, and aligned on a 32-byte boundary.
+//
+// Return Value:
+//   0  - FFT calculation is successful.
+//   -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+                             const int16_t* real_data_in,
+                             int16_t* complex_data_out);
+
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   complex_data_in - the input complex signal with (2^order + 2) 16-bit
+//                     elements. For an ARM Neon platform, it must be aligned on
+//                     a 32-byte boundary.
+//
+// Output Arguments:
+//   real_data_out - the output real signal. For an ARM Neon platform, it must
+//                   be different to complex_data_in, and aligned on a 32-byte
+//                   boundary.
+//
+// Return Value:
+//   0 or a positive number - a value that the elements in the |real_data_out|
+//                            should be shifted left with in order to get
+//                            correct physical values.
+//   -1 - Error with bad arguments (NULL pointers).
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+                             const int16_t* complex_data_in,
+                             int16_t* real_data_out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
@ -0,0 +1,173 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_H_
+#define WEBRTC_SPL_SPL_INL_H_
+
+#ifdef WEBRTC_ARCH_ARM_V7
+#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
+#else
+
+#if defined(MIPS32_LE)
+#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
+#endif
+
+#if !defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  int16_t out16 = (int16_t) value32;
+
+  if (value32 > 32767)
+    out16 = 32767;
+  else if (value32 < -32768)
+    out16 = -32768;
+
+  return out16;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum;
+
+  // Perform long addition
+  l_sum = l_var1 + l_var2;
+
+  if (l_var1 < 0) {  // Check for underflow.
+    if ((l_var2 < 0) && (l_sum >= 0)) {
+        l_sum = (int32_t)0x80000000;
+    }
+  } else {  // Check for overflow.
+    if ((l_var2 > 0) && (l_sum < 0)) {
+        l_sum = (int32_t)0x7FFFFFFF;
+    }
+  }
+
+  return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_diff;
+
+  // Perform subtraction.
+  l_diff = l_var1 - l_var2;
+
+  if (l_var1 < 0) {  // Check for underflow.
+    if ((l_var2 > 0) && (l_diff > 0)) {
+      l_diff = (int32_t)0x80000000;
+    }
+  } else {  // Check for overflow.
+    if ((l_var2 < 0) && (l_diff < 0)) {
+      l_diff = (int32_t)0x7FFFFFFF;
+    }
+  }
+
+  return l_diff;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b);
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2);
+}
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+#if !defined(MIPS32_LE)
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int16_t bits;
+
+  if (0xFFFF0000 & n) {
+    bits = 16;
+  } else {
+    bits = 0;
+  }
+  if (0x0000FF00 & (n >> bits)) bits += 8;
+  if (0x000000F0 & (n >> bits)) bits += 4;
+  if (0x0000000C & (n >> bits)) bits += 2;
+  if (0x00000002 & (n >> bits)) bits += 1;
+  if (0x00000001 & (n >> bits)) bits += 1;
+
+  return bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int16_t zeros;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a = ~a;
+  }
+
+  if (!(0xFFFF8000 & a)) {
+    zeros = 16;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xFF800000 & (a << zeros))) zeros += 8;
+  if (!(0xF8000000 & (a << zeros))) zeros += 4;
+  if (!(0xE0000000 & (a << zeros))) zeros += 2;
+  if (!(0xC0000000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int16_t zeros;
+
+  if (a == 0) return 0;
+
+  if (!(0xFFFF0000 & a)) {
+    zeros = 16;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xFF000000 & (a << zeros))) zeros += 8;
+  if (!(0xF0000000 & (a << zeros))) zeros += 4;
+  if (!(0xC0000000 & (a << zeros))) zeros += 2;
+  if (!(0x80000000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int16_t zeros;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a = ~a;
+  }
+
+  if (!(0xFF80 & a)) {
+    zeros = 8;
+  } else {
+    zeros = 0;
+  }
+  if (!(0xF800 & (a << zeros))) zeros += 4;
+  if (!(0xE000 & (a << zeros))) zeros += 2;
+  if (!(0xC000 & (a << zeros))) zeros += 1;
+
+  return zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+  return (a * b + c);
+}
+#endif  // #if !defined(MIPS32_LE)
+
+#endif  // WEBRTC_ARCH_ARM_V7
+
+#endif  // WEBRTC_SPL_SPL_INL_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/* This header file includes the inline functions for ARM processors in
+ * the fix point signal processing library.
+ */
+
+#ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_
+#define WEBRTC_SPL_SPL_INL_ARMV7_H_
+
+/* TODO(kma): Replace some assembly code with GCC intrinsics
+ * (e.g. __builtin_clz).
+ */
+
+/* This function produces result that is not bit exact with that by the generic
+ * C version in some cases, although the former is at least as accurate as the
+ * later.
+ */
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
+  int32_t tmp = 0;
+  __asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a));
+  return tmp;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) {
+  int32_t tmp = 0;
+  __asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b));
+  return tmp;
+}
+
+// TODO(kma): add unit test.
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+  int32_t tmp = 0;
+  __asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
+  return tmp;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  int32_t s_sum = 0;
+
+  __asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b));
+
+  return (int16_t) s_sum;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum = 0;
+
+  __asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2));
+
+  return l_sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sub = 0;
+
+  __asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2));
+
+  return l_sub;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  int32_t s_sub = 0;
+
+  __asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2));
+
+  return (int16_t)s_sub;
+}
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int32_t tmp = 0;
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n));
+
+  return (int16_t)(32 - tmp);
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int32_t tmp = 0;
+
+  if (a == 0) {
+    return 0;
+  }
+  else if (a < 0) {
+    a ^= 0xFFFFFFFF;
+  }
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+  return (int16_t)(tmp - 1);
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int tmp = 0;
+
+  if (a == 0) return 0;
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
+
+  return (int16_t)tmp;
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int32_t tmp = 0;
+  int32_t a_32 = a;
+
+  if (a_32 == 0) {
+    return 0;
+  }
+  else if (a_32 < 0) {
+    a_32 ^= 0xFFFFFFFF;
+  }
+
+  __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a_32));
+
+  return (int16_t)(tmp - 17);
+}
+
+// TODO(kma): add unit test.
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  int32_t out = 0;
+
+  __asm __volatile ("ssat %0, #16, %1" : "=r"(out) : "r"(value32));
+
+  return (int16_t)out;
+}
+
+#endif  // WEBRTC_SPL_SPL_INL_ARMV7_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
@ -0,0 +1,225 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_
+#define WEBRTC_SPL_SPL_INL_MIPS_H_
+
+static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a,
+                                             int32_t b) {
+  int32_t value32 = 0;
+  int32_t a1 = 0, b1 = 0;
+
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a1],          %[a]                \n\t"
+    "seh    %[b1],          %[b]                \n\t"
+#else
+    "sll    %[a1],          %[a],         16    \n\t"
+    "sll    %[b1],          %[b],         16    \n\t"
+    "sra    %[a1],          %[a1],        16    \n\t"
+    "sra    %[b1],          %[b1],        16    \n\t"
+#endif
+    "mul    %[value32],     %[a1],  %[b1]       \n\t"
+    : [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1)
+    : [a] "r" (a), [b] "r" (b)
+    : "hi", "lo"
+  );
+  return value32;
+}
+
+static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a,
+                                                    int32_t b) {
+  int32_t value32 = 0, b1 = 0, b2 = 0;
+  int32_t a1 = 0;
+
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a1],          %[a]                        \n\t"
+#else
+    "sll    %[a1],          %[a],           16          \n\t"
+    "sra    %[a1],          %[a1],          16          \n\t"
+#endif
+    "andi   %[b2],          %[b],           0xFFFF      \n\t"
+    "sra    %[b1],          %[b],           16          \n\t"
+    "sra    %[b2],          %[b2],          1           \n\t"
+    "mul    %[value32],     %[a1],          %[b1]       \n\t"
+    "mul    %[b2],          %[a1],          %[b2]       \n\t"
+    "addiu  %[b2],          %[b2],          0x4000      \n\t"
+    "sra    %[b2],          %[b2],          15          \n\t"
+    "addu   %[value32],     %[value32],     %[b2]       \n\t"
+    : [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2),
+      [a1] "=&r" (a1)
+    : [a] "r" (a), [b] "r" (b)
+    : "hi", "lo"
+  );
+  return value32;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  __asm __volatile(
+    "shll_s.w   %[value32], %[value32], 16      \n\t"
+    "sra        %[value32], %[value32], 16      \n\t"
+    : [value32] "+r" (value32)
+    :
+  );
+  int16_t out16 = (int16_t)value32;
+  return out16;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  int32_t value32 = 0;
+
+  __asm __volatile(
+    "addq_s.ph      %[value32],     %[a],   %[b]    \n\t"
+    : [value32] "=r" (value32)
+    : [a] "r" (a), [b] "r" (b)
+  );
+  return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_sum;
+
+  __asm __volatile(
+    "addq_s.w   %[l_sum],       %[l_var1],      %[l_var2]    \n\t"
+    : [l_sum] "=r" (l_sum)
+    : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+  );
+
+  return l_sum;
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  int32_t value32;
+
+  __asm __volatile(
+    "subq_s.ph  %[value32], %[var1],    %[var2]     \n\t"
+    : [value32] "=r" (value32)
+    : [var1] "r" (var1), [var2] "r" (var2)
+  );
+
+  return (int16_t)value32;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
+  int32_t l_diff;
+
+  __asm __volatile(
+    "subq_s.w   %[l_diff],      %[l_var1],      %[l_var2]    \n\t"
+    : [l_diff] "=r" (l_diff)
+    : [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
+  );
+
+  return l_diff;
+}
+#endif
+
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  int bits = 0;
+  int i32 = 32;
+
+  __asm __volatile(
+    "clz    %[bits],    %[n]                    \n\t"
+    "subu   %[bits],    %[i32],     %[bits]     \n\t"
+    : [bits] "=&r" (bits)
+    : [n] "r" (n), [i32] "r" (i32)
+  );
+
+  return (int16_t)bits;
+}
+
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  int zeros = 0;
+
+  __asm __volatile(
+    ".set       push                                \n\t"
+    ".set       noreorder                           \n\t"
+    "bnez       %[a],       1f                      \n\t"
+    " sra       %[zeros],   %[a],       31          \n\t"
+    "b          2f                                  \n\t"
+    " move      %[zeros],   $zero                   \n\t"
+   "1:                                              \n\t"
+    "xor        %[zeros],   %[a],       %[zeros]    \n\t"
+    "clz        %[zeros],   %[zeros]                \n\t"
+    "addiu      %[zeros],   %[zeros],   -1          \n\t"
+   "2:                                              \n\t"
+    ".set       pop                                 \n\t"
+    : [zeros]"=&r"(zeros)
+    : [a] "r" (a)
+  );
+
+  return (int16_t)zeros;
+}
+
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  int zeros = 0;
+
+  __asm __volatile(
+    "clz    %[zeros],   %[a]    \n\t"
+    : [zeros] "=r" (zeros)
+    : [a] "r" (a)
+  );
+
+  return (int16_t)(zeros & 0x1f);
+}
+
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  int zeros = 0;
+  int a0 = a << 16;
+
+  __asm __volatile(
+    ".set       push                                \n\t"
+    ".set       noreorder                           \n\t"
+    "bnez       %[a0],      1f                      \n\t"
+    " sra       %[zeros],   %[a0],      31          \n\t"
+    "b          2f                                  \n\t"
+    " move      %[zeros],   $zero                   \n\t"
+   "1:                                              \n\t"
+    "xor        %[zeros],   %[a0],      %[zeros]    \n\t"
+    "clz        %[zeros],   %[zeros]                \n\t"
+    "addiu      %[zeros],   %[zeros],   -1          \n\t"
+   "2:                                              \n\t"
+    ".set       pop                                 \n\t"
+    : [zeros]"=&r"(zeros)
+    : [a0] "r" (a0)
+  );
+
+  return (int16_t)zeros;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a,
+                                           int16_t b,
+                                           int32_t c) {
+  int32_t res = 0, c1 = 0;
+  __asm __volatile(
+#if defined(MIPS32_R2_LE)
+    "seh    %[a],       %[a]            \n\t"
+    "seh    %[b],       %[b]            \n\t"
+#else
+    "sll    %[a],       %[a],   16      \n\t"
+    "sll    %[b],       %[b],   16      \n\t"
+    "sra    %[a],       %[a],   16      \n\t"
+    "sra    %[b],       %[b],   16      \n\t"
+#endif
+    "mul    %[res],     %[a],   %[b]    \n\t"
+    "addu   %[c1],      %[c],   %[res]  \n\t"
+    : [c1] "=r" (c1), [res] "=&r" (res)
+    : [a] "r" (a), [b] "r" (b), [c] "r" (c)
+    : "hi", "lo"
+  );
+  return (c1);
+}
+
+#endif  // WEBRTC_SPL_SPL_INL_MIPS_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
@ -0,0 +1,246 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LevinsonDurbin().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LEVINSON_MAXORDER 20
+
+int16_t WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
+                                 size_t order)
+{
+    size_t i, j;
+    // Auto-correlation coefficients in high precision
+    int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
+    // LPC coefficients in high precision
+    int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
+    // LPC coefficients for next iteration
+    int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
+    // Reflection coefficient in high precision
+    int16_t K_hi, K_low;
+    // Prediction gain Alpha in high precision and with scale factor
+    int16_t Alpha_hi, Alpha_low, Alpha_exp;
+    int16_t tmp_hi, tmp_low;
+    int32_t temp1W32, temp2W32, temp3W32;
+    int16_t norm;
+
+    // Normalize the autocorrelation R[0]...R[order+1]
+
+    norm = WebRtcSpl_NormW32(R[0]);
+
+    for (i = 0; i <= order; ++i)
+    {
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(R[i], norm);
+        // Put R in hi and low format
+        R_hi[i] = (int16_t)(temp1W32 >> 16);
+        R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] << 16)) >> 1);
+    }
+
+    // K = A[1] = -R[1] / R[0]
+
+    temp2W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[1],16)
+            + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[1],1); // R[1] in Q31
+    temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
+    temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
+    // Put back the sign on R[1]
+    if (temp2W32 > 0)
+    {
+        temp1W32 = -temp1W32;
+    }
+
+    // Put K in hi and low format
+    K_hi = (int16_t)(temp1W32 >> 16);
+    K_low = (int16_t)((temp1W32 - ((int32_t)K_hi << 16)) >> 1);
+
+    // Store first reflection coefficient
+    K[0] = K_hi;
+
+    temp1W32 >>= 4;  // A[1] in Q27.
+
+    // Put A[1] in hi and low format
+    A_hi[1] = (int16_t)(temp1W32 >> 16);
+    A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] << 16)) >> 1);
+
+    // Alpha = R[0] * (1-K^2)
+
+    temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1;  // = k^2 in Q31
+
+    temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+    temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
+
+    // Store temp1W32 = 1 - K[0]*K[0] on hi and low format
+    tmp_hi = (int16_t)(temp1W32 >> 16);
+    tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // Calculate Alpha in Q31
+    temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
+        (R_low[0] * tmp_hi >> 15)) << 1;
+
+    // Normalize Alpha and put it in hi and low format
+
+    Alpha_exp = WebRtcSpl_NormW32(temp1W32);
+    temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
+    Alpha_hi = (int16_t)(temp1W32 >> 16);
+    Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+    // Perform the iterative calculations in the Levinson-Durbin algorithm
+
+    for (i = 2; i <= order; i++)
+    {
+        /*                    ----
+         temp1W32 =  R[i] + > R[j]*A[i-j]
+         /
+         ----
+         j=1..i-1
+         */
+
+        temp1W32 = 0;
+
+        for (j = 1; j < i; j++)
+        {
+          // temp1W32 is in Q31
+          temp1W32 += (R_hi[j] * A_hi[i - j] << 1) +
+              (((R_hi[j] * A_low[i - j] >> 15) +
+              (R_low[j] * A_hi[i - j] >> 15)) << 1);
+        }
+
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, 4);
+        temp1W32 += (WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[i], 16)
+                + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1));
+
+        // K = -temp1W32 / Alpha
+        temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
+        temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
+
+        // Put the sign of temp1W32 back again
+        if (temp1W32 > 0)
+        {
+            temp3W32 = -temp3W32;
+        }
+
+        // Use the Alpha shifts from earlier to de-normalize
+        norm = WebRtcSpl_NormW32(temp3W32);
+        if ((Alpha_exp <= norm) || (temp3W32 == 0))
+        {
+            temp3W32 = WEBRTC_SPL_LSHIFT_W32(temp3W32, Alpha_exp);
+        } else
+        {
+            if (temp3W32 > 0)
+            {
+                temp3W32 = (int32_t)0x7fffffffL;
+            } else
+            {
+                temp3W32 = (int32_t)0x80000000L;
+            }
+        }
+
+        // Put K on hi and low format
+        K_hi = (int16_t)(temp3W32 >> 16);
+        K_low = (int16_t)((temp3W32 - ((int32_t)K_hi << 16)) >> 1);
+
+        // Store Reflection coefficient in Q15
+        K[i - 1] = K_hi;
+
+        // Test for unstable filter.
+        // If unstable return 0 and let the user decide what to do in that case
+
+        if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
+        {
+            return 0; // Unstable filter
+        }
+
+        /*
+         Compute updated LPC coefficient: Anew[i]
+         Anew[j]= A[j] + K*A[i-j]   for j=1..i-1
+         Anew[i]= K
+         */
+
+        for (j = 1; j < i; j++)
+        {
+            // temp1W32 = A[j] in Q27
+            temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[j],16)
+                    + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
+
+            // temp1W32 += K*A[i-j] in Q27
+            temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
+                (K_low * A_hi[i - j] >> 15)) << 1;
+
+            // Put Anew in hi and low format
+            A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
+            A_upd_low[j] = (int16_t)(
+                (temp1W32 - ((int32_t)A_upd_hi[j] << 16)) >> 1);
+        }
+
+        // temp3W32 = K in Q27 (Convert from Q31 to Q27)
+        temp3W32 >>= 4;
+
+        // Store Anew in hi and low format
+        A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
+        A_upd_low[i] = (int16_t)(
+            (temp3W32 - ((int32_t)A_upd_hi[i] << 16)) >> 1);
+
+        // Alpha = Alpha * (1-K^2)
+
+        temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1;  // K*K in Q31
+
+        temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
+        temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K  in Q31
+
+        // Convert 1- K^2 in hi and low format
+        tmp_hi = (int16_t)(temp1W32 >> 16);
+        tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+        // Calculate Alpha = Alpha * (1-K^2) in Q31
+        temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
+            (Alpha_low * tmp_hi >> 15)) << 1;
+
+        // Normalize Alpha and store it on hi and low format
+
+        norm = WebRtcSpl_NormW32(temp1W32);
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
+
+        Alpha_hi = (int16_t)(temp1W32 >> 16);
+        Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
+
+        // Update the total normalization of Alpha
+        Alpha_exp = Alpha_exp + norm;
+
+        // Update A[]
+
+        for (j = 1; j <= i; j++)
+        {
+            A_hi[j] = A_upd_hi[j];
+            A_low[j] = A_upd_low[j];
+        }
+    }
+
+    /*
+     Set A[0] to 1.0 and store the A[i] i=1...order in Q12
+     (Convert from Q27 and use rounding)
+     */
+
+    A[0] = 4096;
+
+    for (i = 1; i <= order; i++)
+    {
+        // temp1W32 in Q27
+        temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[i], 16)
+                + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
+        // Round and store upper word
+        A[i] = (int16_t)(((temp1W32 << 1) + 32768) >> 16);
+    }
+    return 1; // Stable filters
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_LpcToReflCoef().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
+
+void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
+{
+    int m, k;
+    int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
+    int32_t tmp_inv_denom32;
+    int16_t tmp_inv_denom16;
+
+    k16[use_order - 1] = a16[use_order] << 3;  // Q12<<3 => Q15
+    for (m = use_order - 1; m > 0; m--)
+    {
+        // (1 - k^2) in Q30
+        tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
+        // (1 - k^2) in Q15
+        tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
+
+        for (k = 1; k <= m; k++)
+        {
+            // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
+
+            // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
+            tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
+
+            tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
+        }
+
+        for (k = 1; k < m; k++)
+        {
+            a16[k] = (int16_t)(tmp32[k] >> 1);  // Q13>>1 => Q12
+        }
+
+        tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
+        k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
+    }
+    return;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
+ * WebRtcSpl_MaxAbsIndexW16()
+ * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinIndexW16()
+ * WebRtcSpl_MinIndexW32()
+ *
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
+//   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
+  size_t i = 0;
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  size_t i = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Index of maximum absolute value in a word16 vector.
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
+  // Use type int for local variables, to accomodate the value of abs(-32768).
+
+  size_t i = 0, index = 0;
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word16 vector.
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word32 vector.
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word16 vector.
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word32 vector.
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+
+  assert(length > 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
@ -0,0 +1,376 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of function
+ * WebRtcSpl_MaxAbsValueW16()
+ *
+ * The description header can be found in signal_processing_library.h.
+ *
+ */
+
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector.
+int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
+  int32_t totMax = 0;
+  int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
+  size_t i, loop_size;
+
+  assert(length > 0);
+
+#if defined(MIPS_DSP_R1)
+  const int32_t* tmpvec32 = (int32_t*)vector;
+  loop_size = length >> 4;
+
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lw         %[tmp32_0],     0(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_1],     4(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_2],     8(%[tmpvec32])              \n\t"
+      "lw         %[tmp32_3],     12(%[tmpvec32])             \n\t"
+
+      "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_0],     16(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_1],     20(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_2],     24(%[tmpvec32])             \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
+
+      "lw         %[tmp32_3],     28(%[tmpvec32])             \n\t"
+      "absq_s.ph  %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "absq_s.ph  %[tmp32_1],     %[tmp32_1]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+
+      "absq_s.ph  %[tmp32_2],     %[tmp32_2]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_1]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_1],     %[totMax]   \n\t"
+      "absq_s.ph  %[tmp32_3],     %[tmp32_3]                  \n\t"
+      "cmp.lt.ph  %[totMax],      %[tmp32_2]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_2],     %[totMax]   \n\t"
+
+      "cmp.lt.ph  %[totMax],      %[tmp32_3]                  \n\t"
+      "pick.ph    %[totMax],      %[tmp32_3],     %[totMax]   \n\t"
+
+      "addiu      %[tmpvec32],    %[tmpvec32],    32          \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+        [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
+      :
+      : "memory"
+    );
+  }
+  __asm__ volatile (
+    "rotr       %[tmp32_0],     %[totMax],      16          \n\t"
+    "cmp.lt.ph  %[totMax],      %[tmp32_0]                  \n\t"
+    "pick.ph    %[totMax],      %[tmp32_0],     %[totMax]   \n\t"
+    "packrl.ph  %[totMax],      $0,             %[totMax]   \n\t"
+    : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
+    :
+  );
+  loop_size = length & 0xf;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh         %[tmp32_0],     0(%[tmpvec32])              \n\t"
+      "addiu      %[tmpvec32],    %[tmpvec32],     2          \n\t"
+      "absq_s.w   %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
+      "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
+      :
+      : "memory"
+    );
+  }
+#else  // #if defined(MIPS_DSP_R1)
+  int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
+  int32_t r, r1, r2, r3;
+  const int16_t* tmpvector = vector;
+  loop_size = length >> 4;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh     %[tmp32_0],     0(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_1],     2(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_2],     4(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_3],     6(%[tmpvector])                 \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     8(%[tmpvector])                 \n\t"
+      "lh     %[tmp32_1],     10(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     12(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     14(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     16(%[tmpvector])                \n\t"
+      "lh     %[tmp32_1],     18(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     20(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     22(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "lh     %[tmp32_0],     24(%[tmpvector])                \n\t"
+      "lh     %[tmp32_1],     26(%[tmpvector])                \n\t"
+      "lh     %[tmp32_2],     28(%[tmpvector])                \n\t"
+      "lh     %[tmp32_3],     30(%[tmpvector])                \n\t"
+
+      "abs    %[tmp32_0],     %[tmp32_0]                      \n\t"
+      "abs    %[tmp32_1],     %[tmp32_1]                      \n\t"
+      "abs    %[tmp32_2],     %[tmp32_2]                      \n\t"
+      "abs    %[tmp32_3],     %[tmp32_3]                      \n\t"
+
+      "slt    %[r],           %[totMax],      %[tmp32_0]      \n\t"
+      "movn   %[totMax],      %[tmp32_0],     %[r]            \n\t"
+      "slt    %[r1],          %[totMax],      %[tmp32_1]      \n\t"
+      "movn   %[totMax],      %[tmp32_1],     %[r1]           \n\t"
+      "slt    %[r2],          %[totMax],      %[tmp32_2]      \n\t"
+      "movn   %[totMax],      %[tmp32_2],     %[r2]           \n\t"
+      "slt    %[r3],          %[totMax],      %[tmp32_3]      \n\t"
+      "movn   %[totMax],      %[tmp32_3],     %[r3]           \n\t"
+
+      "addiu  %[tmpvector],   %[tmpvector],   32              \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
+        [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
+        [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
+      :
+      : "memory"
+    );
+  }
+  loop_size = length & 0xf;
+  for (i = 0; i < loop_size; i++) {
+    __asm__ volatile (
+      "lh         %[tmp32_0],     0(%[tmpvector])             \n\t"
+      "addiu      %[tmpvector],   %[tmpvector],    2          \n\t"
+      "abs        %[tmp32_0],     %[tmp32_0]                  \n\t"
+      "slt        %[tmp32_1],     %[totMax],       %[tmp32_0] \n\t"
+      "movn       %[totMax],      %[tmp32_0],      %[tmp32_1] \n\t"
+      : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
+        [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
+      :
+      : "memory"
+    );
+  }
+
+  __asm__ volatile (
+    "slt    %[r],       %[v16MaxMax],   %[totMax]   \n\t"
+    "movn   %[totMax],  %[v16MaxMax],   %[r]        \n\t"
+    : [totMax] "+r" (totMax), [r] "=&r" (r)
+    : [v16MaxMax] "r" (v16MaxMax)
+  );
+#endif  // #if defined(MIPS_DSP_R1)
+  return (int16_t)totMax;
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Maximum absolute value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  int tmp1 = 0, max_value = 0x7fffffff;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[absolute],      0(%[vector])                        \n\t"
+    "absq_s.w   %[absolute],      %[absolute]                         \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[absolute]     \n\t"
+    "movn       %[maximum],       %[absolute],        %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+    "slt        %[tmp1],          %[max_value],       %[maximum]      \n\t"
+    "movn       %[maximum],       %[max_value],       %[tmp1]         \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
+    : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
+    : "memory"
+  );
+
+  return (int32_t)maximum;
+}
+#endif  // #if defined(MIPS_DSP_R1_LE)
+
+// Maximum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  int tmp1;
+  int16_t value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lh         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
+    "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          2               \n\t"
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return maximum;
+}
+
+// Maximum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  int tmp1, value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[maximum],         %[value]        \n\t"
+    "movn       %[maximum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return maximum;
+}
+
+// Minimum value of word16 vector. Version for MIPS platform.
+int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  int tmp1;
+  int16_t value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lh         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
+    "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          2               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return minimum;
+}
+
+// Minimum value of word32 vector. Version for MIPS platform.
+int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  int tmp1, value;
+
+  assert(length > 0);
+
+  __asm__ volatile (
+    ".set push                                                        \n\t"
+    ".set noreorder                                                   \n\t"
+
+   "1:                                                                \n\t"
+    "lw         %[value],         0(%[vector])                        \n\t"
+    "addiu      %[length],        %[length],          -1              \n\t"
+    "slt        %[tmp1],          %[value],           %[minimum]      \n\t"
+    "movn       %[minimum],       %[value],           %[tmp1]         \n\t"
+    "bgtz       %[length],        1b                                  \n\t"
+    " addiu     %[vector],        %[vector],          4               \n\t"
+
+    ".set pop                                                         \n\t"
+
+    : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
+    : [vector] "r" (vector), [length] "r" (length)
+    : "memory"
+  );
+
+  return minimum;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
@ -0,0 +1,283 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
+  int absolute = 0, maximum = 0;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  size_t rest = length & 7;
+  const int16_t* p_end = vector + length - rest;
+
+  int16x8_t v;
+  uint16x8_t max_qv;
+  max_qv = vdupq_n_u16(0);
+
+  while (p_start < p_end) {
+    v = vld1q_s16(p_start);
+    // Note vabs doesn't change the value of -32768.
+    v = vabsq_s16(v);
+    // Use u16 so we don't lose the value -32768.
+    max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
+    p_start += 8;
+  }
+
+#ifdef WEBRTC_ARCH_ARM64
+  maximum = (int)vmaxvq_u16(max_qv);
+#else
+  uint16x4_t max_dv;
+  max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
+  max_dv = vpmax_u16(max_dv, max_dv);
+  max_dv = vpmax_u16(max_dv, max_dv);
+
+  maximum = (int)vget_lane_u16(max_dv, 0);
+#endif
+
+  p_end = vector + length;
+  while (p_start < p_end) {
+    absolute = abs((int)(*p_start));
+
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+    p_start++;
+  }
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  uint32x4_t max32x4_0 = vdupq_n_u32(0);
+  uint32x4_t max32x4_1 = vdupq_n_u32(0);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    in32x4_0 = vabsq_s32(in32x4_0);
+    in32x4_1 = vabsq_s32(in32x4_1);
+    // vabs doesn't change the value of 0x80000000.
+    // Use u32 so we don't lose the value 0x80000000.
+    max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
+    max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
+  }
+
+  uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_u32(max32x4);
+#else
+  uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
+  max32x2 = vpmax_u32(max32x2, max32x2);
+
+  maximum = vget_lane_u32(max32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    absolute = abs((int)(*p_start));
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+    p_start++;
+  }
+
+  // Guard against the case for 0x80000000.
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int16x8_t in16x8 = vld1q_s16(p_start);
+    max16x8 = vmaxq_s16(max16x8, in16x8);
+    p_start += 8;
+  }
+
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_s16(max16x8);
+#else
+  int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
+  max16x4 = vpmax_s16(max16x4, max16x4);
+  max16x4 = vpmax_s16(max16x4, max16x4);
+
+  maximum = vget_lane_s16(max16x4, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start > maximum)
+      maximum = *p_start;
+    p_start++;
+  }
+  return maximum;
+}
+
+// Maximum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+  int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
+    max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
+  }
+
+  int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  maximum = vmaxvq_s32(max32x4);
+#else
+  int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
+  max32x2 = vpmax_s32(max32x2, max32x2);
+
+  maximum = vget_lane_s32(max32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start > maximum)
+      maximum = *p_start;
+    p_start++;
+  }
+  return maximum;
+}
+
+// Minimum value of word16 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int16_t* p_start = vector;
+  int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int16x8_t in16x8 = vld1q_s16(p_start);
+    min16x8 = vminq_s16(min16x8, in16x8);
+    p_start += 8;
+  }
+
+#if defined(WEBRTC_ARCH_ARM64)
+  minimum = vminvq_s16(min16x8);
+#else
+  int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
+  min16x4 = vpmin_s16(min16x4, min16x4);
+  min16x4 = vpmin_s16(min16x4, min16x4);
+
+  minimum = vget_lane_s16(min16x4, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start < minimum)
+      minimum = *p_start;
+    p_start++;
+  }
+  return minimum;
+}
+
+// Minimum value of word32 vector. NEON intrinsics version for
+// ARM 32-bit/64-bit platforms.
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  size_t i = 0;
+  size_t residual = length & 0x7;
+
+  assert(length > 0);
+
+  const int32_t* p_start = vector;
+  int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+  int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
+
+  // First part, unroll the loop 8 times.
+  for (i = 0; i < length - residual; i += 8) {
+    int32x4_t in32x4_0 = vld1q_s32(p_start);
+    p_start += 4;
+    int32x4_t in32x4_1 = vld1q_s32(p_start);
+    p_start += 4;
+    min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
+    min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
+  }
+
+  int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
+#if defined(WEBRTC_ARCH_ARM64)
+  minimum = vminvq_s32(min32x4);
+#else
+  int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
+  min32x2 = vpmin_s32(min32x2, min32x2);
+
+  minimum = vget_lane_s32(min32x2, 0);
+#endif
+
+  // Second part, do the remaining iterations (if any).
+  for (i = residual; i > 0; i--) {
+    if (*p_start < minimum)
+      minimum = *p_start;
+    p_start++;
+  }
+  return minimum;
+}
+
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the randomization functions
+ * WebRtcSpl_RandU()
+ * WebRtcSpl_RandN()
+ * WebRtcSpl_RandUArray()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const uint32_t kMaxSeedUsed = 0x80000000;
+
+static const int16_t kRandNTable[] = {
+    9178,    -7260,       40,    10189,     4894,    -3531,   -13779,    14764,
+   -4008,    -8884,    -8990,     1008,     7368,     5184,     3251,    -5817,
+   -9786,     5963,     1770,     8066,    -7135,    10772,    -2298,     1361,
+    6484,     2241,    -8633,      792,      199,    -3344,     6553,   -10079,
+  -15040,       95,    11608,   -12469,    14161,    -4176,     2476,     6403,
+   13685,   -16005,     6646,     2239,    10916,    -3004,     -602,    -3141,
+    2142,    14144,    -5829,     5305,     8209,     4713,     2697,    -5112,
+   16092,    -1210,    -2891,    -6631,    -5360,   -11878,    -6781,    -2739,
+   -6392,      536,    10923,    10872,     5059,    -4748,    -7770,     5477,
+      38,    -1025,    -2892,     1638,     6304,    14375,   -11028,     1553,
+   -1565,    10762,     -393,     4040,     5257,    12310,     6554,    -4799,
+    4899,    -6354,     1603,    -1048,    -2220,     8247,     -186,    -8944,
+  -12004,     2332,     4801,    -4933,     6371,      131,     8614,    -5927,
+   -8287,   -22760,     4033,   -15162,     3385,     3246,     3153,    -5250,
+    3766,      784,     6494,      -62,     3531,    -1582,    15572,      662,
+   -3952,     -330,    -3196,      669,     7236,    -2678,    -6569,    23319,
+   -8645,     -741,    14830,   -15976,     4903,      315,   -11342,    10311,
+    1858,    -7777,     2145,     5436,     5677,     -113,   -10033,      826,
+   -1353,    17210,     7768,      986,    -1471,     8291,    -4982,     8207,
+  -14911,    -6255,    -2449,   -11881,    -7059,   -11703,    -4338,     8025,
+    7538,    -2823,   -12490,     9470,    -1613,    -2529,   -10092,    -7807,
+    9480,     6970,   -12844,     5123,     3532,     4816,     4803,    -8455,
+   -5045,    14032,    -4378,    -1643,     5756,   -11041,    -2732,   -16618,
+   -6430,   -18375,    -3320,     6098,     5131,    -4269,    -8840,     2482,
+   -7048,     1547,   -21890,    -6505,    -7414,     -424,   -11722,     7955,
+    1653,   -17299,     1823,      473,    -9232,     3337,     1111,      873,
+    4018,    -8982,     9889,     3531,   -11763,    -3799,     7373,    -4539,
+    3231,     7054,    -8537,     7616,     6244,    16635,      447,    -2915,
+   13967,      705,    -2669,    -1520,    -1771,   -16188,     5956,     5117,
+    6371,    -9936,    -1448,     2480,     5128,     7550,    -8130,     5236,
+    8213,    -6443,     7707,    -1950,   -13811,     7218,     7031,    -3883,
+      67,     5731,    -2874,    13480,    -3743,     9298,    -3280,     3552,
+   -4425,      -18,    -3785,    -9988,    -5357,     5477,   -11794,     2117,
+    1416,    -9935,     3376,      802,    -5079,    -8243,    12652,       66,
+    3653,    -2368,     6781,   -21895,    -7227,     2487,     7839,     -385,
+    6646,    -7016,    -4658,     5531,    -1705,      834,      129,     3694,
+   -1343,     2238,   -22640,    -6417,   -11139,    11301,    -2945,    -3494,
+   -5626,      185,    -3615,    -2041,    -7972,    -3106,      -60,   -23497,
+   -1566,    17064,     3519,     2518,      304,    -6805,   -10269,     2105,
+    1936,     -426,     -736,    -8122,    -1467,     4238,    -6939,   -13309,
+     360,     7402,    -7970,    12576,     3287,    12194,    -6289,   -16006,
+    9171,     4042,    -9193,     9123,    -2512,     6388,    -4734,    -8739,
+    1028,    -5406,    -1696,     5889,     -666,    -4736,     4971,     3565,
+    9362,    -6292,     3876,    -3652,   -19666,     7523,    -4061,      391,
+  -11773,     7502,    -3763,     4929,    -9478,    13278,     2805,     4496,
+    7814,    16419,    12455,   -14773,     2127,    -2746,     3763,     4847,
+    3698,     6978,     4751,    -6957,    -3581,      -45,     6252,     1513,
+   -4797,    -7925,    11270,    16188,    -2359,    -5269,     9376,   -10777,
+    7262,    20031,    -6515,    -2208,    -5353,     8085,    -1341,    -1303,
+    7333,     5576,     3625,     5763,    -7931,     9833,    -3371,   -10305,
+    6534,   -13539,    -9971,      997,     8464,    -4064,    -1495,     1857,
+   13624,     5458,     9490,   -11086,    -4524,    12022,     -550,     -198,
+     408,    -8455,    -7068,    10289,     9712,    -3366,     9028,    -7621,
+   -5243,     2362,     6909,     4672,    -4933,    -1799,     4709,    -4563,
+     -62,     -566,     1624,    -7010,    14730,   -17791,    -3697,    -2344,
+   -1741,     7099,    -9509,    -6855,    -1989,     3495,    -2289,     2031,
+   12784,      891,    14189,    -3963,    -5683,      421,   -12575,     1724,
+  -12682,    -5970,    -8169,     3143,    -1824,    -5488,    -5130,     8536,
+   12799,      794,     5738,     3459,   -11689,     -258,    -3738,    -3775,
+   -8742,     2333,     8312,    -9383,    10331,    13119,     8398,    10644,
+  -19433,    -6446,   -16277,   -11793,    16284,     9345,    15222,    15834,
+    2009,    -7349,      130,   -14547,      338,    -5998,     3337,    21492,
+    2406,     7703,     -951,    11196,     -564,     3406,     2217,     4806,
+    2374,    -5797,    11839,     8940,   -11874,    18213,     2855,    10492
+};
+
+static uint32_t IncreaseSeed(uint32_t* seed) {
+  seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);
+  return seed[0];
+}
+
+int16_t WebRtcSpl_RandU(uint32_t* seed) {
+  return (int16_t)(IncreaseSeed(seed) >> 16);
+}
+
+int16_t WebRtcSpl_RandN(uint32_t* seed) {
+  return kRandNTable[IncreaseSeed(seed) >> 23];
+}
+
+// Creates an array of uniformly distributed variables.
+int16_t WebRtcSpl_RandUArray(int16_t* vector,
+                             int16_t vector_length,
+                             uint32_t* seed) {
+  int i;
+  for (i = 0; i < vector_length; i++) {
+    vector[i] = WebRtcSpl_RandU(seed);
+  }
+  return vector_length;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft.c
@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+#include <stdlib.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+struct RealFFT {
+  int order;
+};
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
+  struct RealFFT* self = NULL;
+
+  if (order > kMaxFFTOrder || order < 0) {
+    return NULL;
+  }
+
+  self = malloc(sizeof(struct RealFFT));
+  if (self == NULL) {
+    return NULL;
+  }
+  self->order = order;
+
+  return self;
+}
+
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
+  if (self != NULL) {
+    free(self);
+  }
+}
+
+// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and
+// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued
+// FFT implementation in SPL.
+
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+                             const int16_t* real_data_in,
+                             int16_t* complex_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // The complex-value FFT implementation needs a buffer to hold 2^order
+  // 16-bit COMPLEX numbers, for both time and frequency data.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // Insert zeros to the imaginary parts for complex forward FFT input.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    complex_buffer[j] = real_data_in[i];
+    complex_buffer[j + 1] = 0;
+  };
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
+
+  // For real FFT output, use only the first N + 2 elements from
+  // complex forward FFT.
+  memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
+
+  return result;
+}
+
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+                             const int16_t* complex_data_in,
+                             int16_t* real_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // Create the buffer specific to complex-valued FFT implementation.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // For n-point FFT, first copy the first n + 2 elements into complex
+  // FFT, then construct the remaining n - 2 elements by real FFT's
+  // conjugate-symmetric properties.
+  memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
+  for (i = n + 2; i < 2 * n; i += 2) {
+    complex_buffer[i] = complex_data_in[2 * n - i];
+    complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
+  }
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
+
+  // Strip out the imaginary parts of the complex inverse FFT output.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    real_data_out[i] = complex_buffer[j];
+  }
+
+  return result;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+namespace {
+
+// FFT order.
+const int kOrder = 5;
+// Lengths for real FFT's time and frequency bufffers.
+// For N-point FFT, the length requirements from API are N and N+2 respectively.
+const int kTimeDataLength = 1 << kOrder;
+const int kFreqDataLength = (1 << kOrder) + 2;
+// For complex FFT's time and freq buffer. The implementation requires
+// 2*N 16-bit words.
+const int kComplexFftDataLength = 2 << kOrder;
+// Reference data for time signal.
+const int16_t kRefData[kTimeDataLength] = {
+  11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
+  -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
+  1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
+  -2629, 5607, -3, 1178, -23819, 1498, -25772, 10076
+};
+
+class RealFFTTest : public ::testing::Test {
+ protected:
+   RealFFTTest() {
+     WebRtcSpl_Init();
+   }
+};
+
+TEST_F(RealFFTTest, CreateFailsOnBadInput) {
+  RealFFT* fft = WebRtcSpl_CreateRealFFT(11);
+  EXPECT_TRUE(fft == NULL);
+  fft = WebRtcSpl_CreateRealFFT(-1);
+  EXPECT_TRUE(fft == NULL);
+}
+
+TEST_F(RealFFTTest, RealAndComplexMatch) {
+  int i = 0;
+  int j = 0;
+  int16_t real_fft_time[kTimeDataLength] = {0};
+  int16_t real_fft_freq[kFreqDataLength] = {0};
+  // One common buffer for complex FFT's time and frequency data.
+  int16_t complex_fft_buff[kComplexFftDataLength] = {0};
+
+  // Prepare the inputs to forward FFT's.
+  memcpy(real_fft_time, kRefData, sizeof(kRefData));
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    complex_fft_buff[j] = kRefData[i];
+    complex_fft_buff[j + 1] = 0;  // Insert zero's to imaginary parts.
+  };
+
+  // Create and run real forward FFT.
+  RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
+  EXPECT_TRUE(fft != NULL);
+  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
+
+  // Run complex forward FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
+
+  // Verify the results between complex and real forward FFT.
+  for (i = 0; i < kFreqDataLength; i++) {
+    EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
+  }
+
+  // Prepare the inputs to inverse real FFT.
+  // We use whatever data in complex_fft_buff[] since we don't care
+  // about data contents. Only kFreqDataLength 16-bit words are copied
+  // from complex_fft_buff to real_fft_freq since remaining words (2nd half)
+  // are conjugate-symmetric to the first half in theory.
+  memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
+
+  // Run real inverse FFT.
+  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
+  EXPECT_GE(real_scale, 0);
+
+  // Run complex inverse FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
+
+  // Verify the results between complex and real inverse FFT.
+  // They are not bit-exact, since complex IFFT doesn't produce
+  // exactly conjugate-symmetric data (between first and second half).
+  EXPECT_EQ(real_scale, complex_scale);
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
+  }
+
+  WebRtcSpl_FreeRealFFT(fft);
+}
+
+}  // namespace
+}  // namespace webrtc
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ReflCoefToLpc().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
+{
+    int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
+    int16_t *aptr, *aptr2, *anyptr;
+    const int16_t *kptr;
+    int m, i;
+
+    kptr = k;
+    *a = 4096; // i.e., (Word16_MAX >> 3)+1.
+    *any = *a;
+    a[1] = *k >> 3;
+
+    for (m = 1; m < use_order; m++)
+    {
+        kptr++;
+        aptr = a;
+        aptr++;
+        aptr2 = &a[m];
+        anyptr = any;
+        anyptr++;
+
+        any[m + 1] = *kptr >> 3;
+        for (i = 0; i < m; i++)
+        {
+            *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
+            anyptr++;
+            aptr++;
+            aptr2--;
+        }
+
+        aptr = a;
+        anyptr = any;
+        for (i = 0; i < (m + 2); i++)
+        {
+            *aptr = *anyptr;
+            aptr++;
+            anyptr++;
+        }
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample.c
@ -0,0 +1,505 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions for 22 kHz.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// Declaration of internally used functions
+static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
+                                             int32_t K);
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
+                                    int32_t K);
+
+// interpolation coefficients
+static const int16_t kCoefficients32To22[5][9] = {
+        {127, -712,  2359, -6333, 23456, 16775, -3695,  945, -154},
+        {-39,  230,  -830,  2785, 32366, -2324,   760, -218,   38},
+        {117, -663,  2222, -6133, 26634, 13070, -3174,  831, -137},
+        {-77,  457, -1677,  5958, 31175, -4136,  1405, -408,   71},
+        { 98, -560,  1900, -5406, 29240,  9423, -2480,  663, -110}
+};
+
+//////////////////////
+// 22 kHz -> 16 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_22_16    5
+
+// 22 -> 16 resampler
+void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_22_16; k++)
+    {
+        ///// 22 --> 44 /////
+        // int16_t  in[220/SUB_BLOCKS_22_16]
+        // int32_t out[440/SUB_BLOCKS_22_16]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
+
+        ///// 44 --> 32 /////
+        // int32_t  in[440/SUB_BLOCKS_22_16]
+        // int32_t out[320/SUB_BLOCKS_22_16]
+        /////
+        // copy state to and from input array
+        tmpmem[8] = state->S_44_32[0];
+        tmpmem[9] = state->S_44_32[1];
+        tmpmem[10] = state->S_44_32[2];
+        tmpmem[11] = state->S_44_32[3];
+        tmpmem[12] = state->S_44_32[4];
+        tmpmem[13] = state->S_44_32[5];
+        tmpmem[14] = state->S_44_32[6];
+        tmpmem[15] = state->S_44_32[7];
+        state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
+        state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
+        state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
+        state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
+        state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
+        state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
+        state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
+        state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
+
+        WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
+
+        ///// 32 --> 16 /////
+        // int32_t  in[320/SUB_BLOCKS_22_16]
+        // int32_t out[160/SUB_BLOCKS_22_16]
+        /////
+        WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
+
+        // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
+        in += 220 / SUB_BLOCKS_22_16;
+        out += 160 / SUB_BLOCKS_22_16;
+    }
+}
+
+// initialize state of 22 -> 16 resampler
+void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_22_44[k] = 0;
+        state->S_44_32[k] = 0;
+        state->S_32_16[k] = 0;
+    }
+}
+
+//////////////////////
+// 16 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 4, 5, 10
+#define SUB_BLOCKS_16_22    4
+
+// 16 -> 22 resampler
+void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_16_22; k++)
+    {
+        ///// 16 --> 32 /////
+        // int16_t  in[160/SUB_BLOCKS_16_22]
+        // int32_t out[320/SUB_BLOCKS_16_22]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
+
+        ///// 32 --> 22 /////
+        // int32_t  in[320/SUB_BLOCKS_16_22]
+        // int32_t out[220/SUB_BLOCKS_16_22]
+        /////
+        // copy state to and from input array
+        tmpmem[0] = state->S_32_22[0];
+        tmpmem[1] = state->S_32_22[1];
+        tmpmem[2] = state->S_32_22[2];
+        tmpmem[3] = state->S_32_22[3];
+        tmpmem[4] = state->S_32_22[4];
+        tmpmem[5] = state->S_32_22[5];
+        tmpmem[6] = state->S_32_22[6];
+        tmpmem[7] = state->S_32_22[7];
+        state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
+        state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
+        state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
+        state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
+        state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
+        state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
+        state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
+        state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
+
+        WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
+
+        // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
+        in += 160 / SUB_BLOCKS_16_22;
+        out += 220 / SUB_BLOCKS_16_22;
+    }
+}
+
+// initialize state of 16 -> 22 resampler
+void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_16_32[k] = 0;
+        state->S_32_22[k] = 0;
+    }
+}
+
+//////////////////////
+// 22 kHz ->  8 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_22_8     2
+
+// 22 -> 8 resampler
+void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_22_8; k++)
+    {
+        ///// 22 --> 22 lowpass /////
+        // int16_t  in[220/SUB_BLOCKS_22_8]
+        // int32_t out[220/SUB_BLOCKS_22_8]
+        /////
+        WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
+
+        ///// 22 --> 16 /////
+        // int32_t  in[220/SUB_BLOCKS_22_8]
+        // int32_t out[160/SUB_BLOCKS_22_8]
+        /////
+        // copy state to and from input array
+        tmpmem[8] = state->S_22_16[0];
+        tmpmem[9] = state->S_22_16[1];
+        tmpmem[10] = state->S_22_16[2];
+        tmpmem[11] = state->S_22_16[3];
+        tmpmem[12] = state->S_22_16[4];
+        tmpmem[13] = state->S_22_16[5];
+        tmpmem[14] = state->S_22_16[6];
+        tmpmem[15] = state->S_22_16[7];
+        state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
+        state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
+        state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
+        state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
+        state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
+        state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
+        state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
+        state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
+
+        WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
+
+        ///// 16 --> 8 /////
+        // int32_t in[160/SUB_BLOCKS_22_8]
+        // int32_t out[80/SUB_BLOCKS_22_8]
+        /////
+        WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
+
+        // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
+        in += 220 / SUB_BLOCKS_22_8;
+        out += 80 / SUB_BLOCKS_22_8;
+    }
+}
+
+// initialize state of 22 -> 8 resampler
+void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_22_22[k] = 0;
+        state->S_22_22[k + 8] = 0;
+        state->S_22_16[k] = 0;
+        state->S_16_8[k] = 0;
+    }
+}
+
+//////////////////////
+//  8 kHz -> 22 kHz //
+//////////////////////
+
+// number of subblocks; options: 1, 2, 5, 10
+#define SUB_BLOCKS_8_22     2
+
+// 8 -> 22 resampler
+void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
+{
+    int k;
+
+    // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
+    for (k = 0; k < SUB_BLOCKS_8_22; k++)
+    {
+        ///// 8 --> 16 /////
+        // int16_t  in[80/SUB_BLOCKS_8_22]
+        // int32_t out[160/SUB_BLOCKS_8_22]
+        /////
+        WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
+
+        ///// 16 --> 11 /////
+        // int32_t  in[160/SUB_BLOCKS_8_22]
+        // int32_t out[110/SUB_BLOCKS_8_22]
+        /////
+        // copy state to and from input array
+        tmpmem[10] = state->S_16_11[0];
+        tmpmem[11] = state->S_16_11[1];
+        tmpmem[12] = state->S_16_11[2];
+        tmpmem[13] = state->S_16_11[3];
+        tmpmem[14] = state->S_16_11[4];
+        tmpmem[15] = state->S_16_11[5];
+        tmpmem[16] = state->S_16_11[6];
+        tmpmem[17] = state->S_16_11[7];
+        state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
+        state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
+        state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
+        state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
+        state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
+        state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
+        state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
+        state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
+
+        WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
+
+        ///// 11 --> 22 /////
+        // int32_t  in[110/SUB_BLOCKS_8_22]
+        // int16_t out[220/SUB_BLOCKS_8_22]
+        /////
+        WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
+
+        // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
+        in += 80 / SUB_BLOCKS_8_22;
+        out += 220 / SUB_BLOCKS_8_22;
+    }
+}
+
+// initialize state of 8 -> 22 resampler
+void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
+{
+    int k;
+    for (k = 0; k < 8; k++)
+    {
+        state->S_8_16[k] = 0;
+        state->S_16_11[k] = 0;
+        state->S_11_22[k] = 0;
+    }
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
+                                      const int16_t* coef_ptr, int32_t* out1,
+                                      int32_t* out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    *out1 = tmp1 + coef * in1[8];
+    *out2 = tmp2 + coef * in2[-8];
+}
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
+                                        const int16_t* coef_ptr, int16_t* out1,
+                                        int16_t* out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    tmp1 += coef * in1[8];
+    tmp2 += coef * in2[-8];
+
+    // scale down, round and saturate
+    tmp1 >>= 15;
+    if (tmp1 > (int32_t)0x00007FFF)
+        tmp1 = 0x00007FFF;
+    if (tmp1 < (int32_t)0xFFFF8000)
+        tmp1 = 0xFFFF8000;
+    tmp2 >>= 15;
+    if (tmp2 > (int32_t)0x00007FFF)
+        tmp2 = 0x00007FFF;
+    if (tmp2 < (int32_t)0xFFFF8000)
+        tmp2 = 0xFFFF8000;
+    *out1 = (int16_t)tmp1;
+    *out2 = (int16_t)tmp2;
+}
+
+//   Resampling ratio: 11/16
+// input:  int32_t (normalized, not saturated) :: size 16 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
+//      K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
+                                    int32_t* Out,
+                                    int32_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (16 input samples -> 11 output samples);
+    // process in sub blocks of size 16 samples.
+    int32_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        // first output sample
+        Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+        // update pointers
+        In += 16;
+        Out += 11;
+    }
+}
+
+//   Resampling ratio: 11/16
+// input:  int32_t (normalized, not saturated) :: size 16 * K
+// output: int16_t (saturated) :: size 11 * K
+//      K: Number of blocks
+
+void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
+                                      int16_t *Out,
+                                      int32_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (16 input samples -> 11 output samples);
+    // process in sub blocks of size 16 samples.
+    int32_t tmp;
+    int32_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        // first output sample
+        tmp = In[3];
+        if (tmp > (int32_t)0x00007FFF)
+            tmp = 0x00007FFF;
+        if (tmp < (int32_t)0xFFFF8000)
+            tmp = 0xFFFF8000;
+        Out[0] = (int16_t)tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
+
+        // update pointers
+        In += 16;
+        Out += 11;
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
@ -0,0 +1,186 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains resampling functions between 48 kHz and nb/wb.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+////////////////////////////
+///// 48 kHz -> 16 kHz /////
+////////////////////////////
+
+// 48 -> 16 resampler
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 48(LP) /////
+    // int16_t  in[480]
+    // int32_t out[480]
+    /////
+    WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
+
+    ///// 48 --> 32 /////
+    // int32_t  in[480]
+    // int32_t out[320]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
+    memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
+
+    ///// 32 --> 16 /////
+    // int32_t  in[320]
+    // int16_t out[160]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
+}
+
+// initialize state of 48 -> 16 resampler
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
+{
+    memset(state->S_48_48, 0, 16 * sizeof(int32_t));
+    memset(state->S_48_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_16, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 16 kHz -> 48 kHz /////
+////////////////////////////
+
+// 16 -> 48 resampler
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 16 --> 32 /////
+    // int16_t  in[160]
+    // int32_t out[320]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
+
+    ///// 32 --> 24 /////
+    // int32_t  in[320]
+    // int32_t out[240]
+    // copy state to and from input array
+    /////
+    memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
+    memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 16 -> 48 resampler
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
+{
+    memset(state->S_16_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 48 kHz ->  8 kHz /////
+////////////////////////////
+
+// 48 -> 8 resampler
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 24 /////
+    // int16_t  in[480]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
+
+    ///// 24 --> 24(LP) /////
+    // int32_t  in[240]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
+
+    ///// 24 --> 16 /////
+    // int32_t  in[240]
+    // int32_t out[160]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
+    memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 16 --> 8 /////
+    // int32_t  in[160]
+    // int16_t out[80]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
+}
+
+// initialize state of 48 -> 8 resampler
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
+{
+    memset(state->S_48_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_24, 0, 16 * sizeof(int32_t));
+    memset(state->S_24_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_8, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+/////  8 kHz -> 48 kHz /////
+////////////////////////////
+
+// 8 -> 48 resampler
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 8 --> 16 /////
+    // int16_t  in[80]
+    // int32_t out[160]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
+
+    ///// 16 --> 12 /////
+    // int32_t  in[160]
+    // int32_t out[120]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
+    memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
+
+    ///// 12 --> 24 /////
+    // int32_t  in[120]
+    // int16_t out[240]
+    /////
+    WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 8 -> 48 resampler
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
+{
+    memset(state->S_8_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_12, 0, 8 * sizeof(int32_t));
+    memset(state->S_12_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
@ -0,0 +1,183 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#ifdef WEBRTC_ARCH_ARM_V7
+
+// allpass filter coefficients.
+static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
+static const uint32_t kResampleAllpass2[3] =
+  {12199, 37471 << 15, 60255 << 15};
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: state + ((diff * tbl_value) >> 16)
+
+static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
+                                    int32_t diff,
+                                    int32_t state) {
+  int32_t result;
+  __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
+                                   "r"(tbl_value), "r"(state));
+  return result;
+}
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
+//
+// The reason to introduce this function is that, in case we can't use smlawb
+// instruction (in MUL_ACCUM_1) due to input value range, we can still use 
+// smmla to save some cycles.
+
+static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
+                                    int32_t diff,
+                                    int32_t state) {
+  int32_t result;
+  __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
+                                  "r"(tbl_value), "r"(state));
+  return result;
+}
+
+#else
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+#endif  // WEBRTC_ARCH_ARM_V7
+
+
+// decimator
+#if !defined(MIPS32_LE)
+void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
+                             int16_t* out, int32_t* filtState) {
+  int32_t tmp1, tmp2, diff, in32, out32;
+  size_t i;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+  for (i = (len >> 1); i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
+}
+#endif  // #if defined(MIPS32_LE)
+
+
+void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
+                           int16_t* out, int32_t* filtState) {
+  int32_t tmp1, tmp2, diff, in32, out32;
+  size_t i;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+  for (i = len; i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
+    state2 = tmp2;
+
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state3 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+
+    // upper allpass filter
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
+    state6 = tmp2;
+
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state7 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
@ -0,0 +1,679 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+// allpass filter coefficients.
+static const int16_t kResampleAllpass[2][3] = {
+        {821, 6110, 12382},
+        {3050, 9368, 15063}
+};
+
+//
+//   decimator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
+// output: int16_t (saturated) (of length len/2)
+// state:  filter state array; length = 8
+
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+                                 int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[7] >> 1);
+    }
+
+    in--;
+
+    // combine allpass outputs
+    for (i = 0; i < len; i += 2)
+    {
+        // divide by two, add both allpass outputs and round
+        tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
+        tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
+        if (tmp0 > (int32_t)0x00007FFF)
+            tmp0 = 0x00007FFF;
+        if (tmp0 < (int32_t)0xFFFF8000)
+            tmp0 = 0xFFFF8000;
+        out[i] = (int16_t)tmp0;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i + 1] = (int16_t)tmp1;
+    }
+}
+
+//
+//   decimator
+// input:  int16_t
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
+// state:  filter state array; length = 8
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
+                                  int32_t len,
+                                  int32_t *out,
+                                  int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] += (state[7] >> 1);
+    }
+
+    in--;
+}
+
+//
+//   interpolator
+// input:  int16_t
+// output: int32_t (normalized, not saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7] >> 15;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 15;
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+                             int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7];
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3];
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int16_t (saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[7] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[3] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+}
+
+//   lowpass filter
+// input:  int16_t
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
+                               int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
+
+//   lowpass filter
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+                             int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = in[i << 1];
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
@ -0,0 +1,47 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+#define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
+
+#include "webrtc/typedefs.h"
+
+/*******************************************************************
+ * resample_by_2_fast.c
+ * Functions for internal use in the other resample functions
+ ******************************************************************/
+void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+                                 int32_t *state);
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len,
+                                 int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len,
+                               int32_t *out, int32_t *state);
+
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+                             int32_t *state);
+
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len,
+                               int16_t *out, int32_t *state);
+
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len,
+                               int32_t* out, int32_t* state);
+
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+                             int32_t* state);
+
+#endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
@ -0,0 +1,290 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling by two functions.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#if defined(MIPS32_LE)
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// allpass filter coefficients.
+static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
+static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
+
+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+// decimator
+void WebRtcSpl_DownsampleBy2(const int16_t* in,
+                             size_t len,
+                             int16_t* out,
+                             int32_t* filtState) {
+  int32_t out32;
+  size_t i, len1;
+
+  register int32_t state0 = filtState[0];
+  register int32_t state1 = filtState[1];
+  register int32_t state2 = filtState[2];
+  register int32_t state3 = filtState[3];
+  register int32_t state4 = filtState[4];
+  register int32_t state5 = filtState[5];
+  register int32_t state6 = filtState[6];
+  register int32_t state7 = filtState[7];
+
+#if defined(MIPS_DSP_R2_LE)
+  int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
+
+  k1Res0= 3284;
+  k1Res1= 24441;
+  k1Res2= 49528;
+  k2Res0= 12199;
+  k2Res1= 37471;
+  k2Res2= 60255;
+  len1 = (len >> 1);
+
+  const int32_t* inw = (int32_t*)in;
+  int32_t tmp11, tmp12, tmp21, tmp22;
+  int32_t in322, in321;
+  int32_t diff1, diff2;
+  for (i = len1; i > 0; i--) {
+    __asm__ volatile (
+      "lh         %[in321],    0(%[inw])                  \n\t"
+      "lh         %[in322],    2(%[inw])                  \n\t"
+
+      "sll        %[in321],    %[in321],      10          \n\t"
+      "sll        %[in322],    %[in322],      10          \n\t"
+
+      "addiu      %[inw],      %[inw],        4           \n\t"
+
+      "subu       %[diff1],    %[in321],      %[state1]   \n\t"
+      "subu       %[diff2],    %[in322],      %[state5]   \n\t"
+
+      : [in322] "=&r" (in322), [in321] "=&r" (in321),
+        [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
+      : [state1] "r" (state1), [state5] "r" (state5)
+      : "memory"
+    );
+
+    __asm__ volatile (
+      "mult       $ac0,       %[diff1],       %[k2Res0]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res0]   \n\t"
+
+      "extr.w     %[tmp11],   $ac0,           16          \n\t"
+      "extr.w     %[tmp12],   $ac1,           16          \n\t"
+
+      "addu       %[tmp11],   %[state0],      %[tmp11]    \n\t"
+      "addu       %[tmp12],   %[state4],      %[tmp12]    \n\t"
+
+      "addiu      %[state0],  %[in321],       0           \n\t"
+      "addiu      %[state4],  %[in322],       0           \n\t"
+
+      "subu       %[diff1],   %[tmp11],       %[state2]   \n\t"
+      "subu       %[diff2],   %[tmp12],       %[state6]   \n\t"
+
+      "mult       $ac0,       %[diff1],       %[k2Res1]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res1]   \n\t"
+
+      "extr.w     %[tmp21],   $ac0,           16          \n\t"
+      "extr.w     %[tmp22],   $ac1,           16          \n\t"
+
+      "addu       %[tmp21],   %[state1],      %[tmp21]    \n\t"
+      "addu       %[tmp22],   %[state5],      %[tmp22]    \n\t"
+
+      "addiu      %[state1],  %[tmp11],       0           \n\t"
+      "addiu      %[state5],  %[tmp12],       0           \n\t"
+      : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
+        [tmp11] "=&r" (tmp11), [state0] "+r" (state0),
+        [state1] "+r" (state1),
+        [state2] "+r" (state2),
+        [state4] "+r" (state4), [tmp12] "=&r" (tmp12),
+        [state6] "+r" (state6), [state5] "+r" (state5)
+      : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
+        [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
+        [in321] "r" (in321), [k1Res0] "r" (k1Res0)
+      : "hi", "lo", "$ac1hi", "$ac1lo"
+    );
+
+    // upper allpass filter
+    __asm__ volatile (
+      "subu       %[diff1],   %[tmp21],       %[state3]   \n\t"
+      "subu       %[diff2],   %[tmp22],       %[state7]   \n\t"
+
+      "mult       $ac0,       %[diff1],       %[k2Res2]   \n\t"
+      "mult       $ac1,       %[diff2],       %[k1Res2]   \n\t"
+      "extr.w     %[state3],  $ac0,           16          \n\t"
+      "extr.w     %[state7],  $ac1,           16          \n\t"
+      "addu       %[state3],  %[state2],      %[state3]   \n\t"
+      "addu       %[state7],  %[state6],      %[state7]   \n\t"
+
+      "addiu      %[state2],  %[tmp21],       0           \n\t"
+      "addiu      %[state6],  %[tmp22],       0           \n\t"
+
+      // add two allpass outputs, divide by two and round
+      "addu       %[out32],   %[state3],      %[state7]   \n\t"
+      "addiu      %[out32],   %[out32],       1024        \n\t"
+      "sra        %[out32],   %[out32],       11          \n\t"
+      : [state3] "+r" (state3), [state6] "+r" (state6),
+        [state2] "+r" (state2), [diff2] "=&r" (diff2),
+        [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
+      : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
+        [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
+      : "hi", "lo", "$ac1hi", "$ac1lo"
+    );
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+#else  // #if defined(MIPS_DSP_R2_LE)
+  int32_t tmp1, tmp2, diff;
+  int32_t in32;
+  len1 = (len >> 1)/4;
+  for (i = len1; i > 0; i--) {
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+    // lower allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;
+
+    // upper allpass filter
+    in32 = (int32_t)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;
+
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;
+
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+#endif  // #if defined(MIPS_DSP_R2_LE)
+  __asm__ volatile (
+    "sw       %[state0],      0(%[filtState])     \n\t"
+    "sw       %[state1],      4(%[filtState])     \n\t"
+    "sw       %[state2],      8(%[filtState])     \n\t"
+    "sw       %[state3],      12(%[filtState])    \n\t"
+    "sw       %[state4],      16(%[filtState])    \n\t"
+    "sw       %[state5],      20(%[filtState])    \n\t"
+    "sw       %[state6],      24(%[filtState])    \n\t"
+    "sw       %[state7],      28(%[filtState])    \n\t"
+    :
+    : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
+      [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
+      [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
+    : "memory"
+  );
+}
+
+#endif  // #if defined(MIPS32_LE)
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
@ -0,0 +1,239 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions between 48, 44, 32 and 24 kHz.
+ * The description headers can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// interpolation coefficients
+static const int16_t kCoefficients48To32[2][8] = {
+        {778, -2050, 1087, 23285, 12903, -3783, 441, 222},
+        {222, 441, -3783, 12903, 23285, 1087, -2050, 778}
+};
+
+static const int16_t kCoefficients32To24[3][8] = {
+        {767, -2362, 2434, 24406, 10620, -3838, 721, 90},
+        {386, -381, -2646, 19062, 19062, -2646, -381, 386},
+        {90, 721, -3838, 10620, 24406, 2434, -2362, 767}
+};
+
+static const int16_t kCoefficients44To32[4][9] = {
+        {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
+        {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
+        {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
+        {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
+};
+
+//   Resampling ratio: 2/3
+// input:  int32_t (normalized, not saturated) :: size 3 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (3 input samples -> 2 output samples);
+    // process in sub blocks of size 3 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[0][0] * In[0];
+        tmp += kCoefficients48To32[0][1] * In[1];
+        tmp += kCoefficients48To32[0][2] * In[2];
+        tmp += kCoefficients48To32[0][3] * In[3];
+        tmp += kCoefficients48To32[0][4] * In[4];
+        tmp += kCoefficients48To32[0][5] * In[5];
+        tmp += kCoefficients48To32[0][6] * In[6];
+        tmp += kCoefficients48To32[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[1][0] * In[1];
+        tmp += kCoefficients48To32[1][1] * In[2];
+        tmp += kCoefficients48To32[1][2] * In[3];
+        tmp += kCoefficients48To32[1][3] * In[4];
+        tmp += kCoefficients48To32[1][4] * In[5];
+        tmp += kCoefficients48To32[1][5] * In[6];
+        tmp += kCoefficients48To32[1][6] * In[7];
+        tmp += kCoefficients48To32[1][7] * In[8];
+        Out[1] = tmp;
+
+        // update pointers
+        In += 3;
+        Out += 2;
+    }
+}
+
+//   Resampling ratio: 3/4
+// input:  int32_t (normalized, not saturated) :: size 4 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (4 input samples -> 3 output samples);
+    // process in sub blocks of size 4 samples.
+    size_t m;
+    int32_t tmp;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[0][0] * In[0];
+        tmp += kCoefficients32To24[0][1] * In[1];
+        tmp += kCoefficients32To24[0][2] * In[2];
+        tmp += kCoefficients32To24[0][3] * In[3];
+        tmp += kCoefficients32To24[0][4] * In[4];
+        tmp += kCoefficients32To24[0][5] * In[5];
+        tmp += kCoefficients32To24[0][6] * In[6];
+        tmp += kCoefficients32To24[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[1][0] * In[1];
+        tmp += kCoefficients32To24[1][1] * In[2];
+        tmp += kCoefficients32To24[1][2] * In[3];
+        tmp += kCoefficients32To24[1][3] * In[4];
+        tmp += kCoefficients32To24[1][4] * In[5];
+        tmp += kCoefficients32To24[1][5] * In[6];
+        tmp += kCoefficients32To24[1][6] * In[7];
+        tmp += kCoefficients32To24[1][7] * In[8];
+        Out[1] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[2][0] * In[2];
+        tmp += kCoefficients32To24[2][1] * In[3];
+        tmp += kCoefficients32To24[2][2] * In[4];
+        tmp += kCoefficients32To24[2][3] * In[5];
+        tmp += kCoefficients32To24[2][4] * In[6];
+        tmp += kCoefficients32To24[2][5] * In[7];
+        tmp += kCoefficients32To24[2][6] * In[8];
+        tmp += kCoefficients32To24[2][7] * In[9];
+        Out[2] = tmp;
+
+        // update pointers
+        In += 4;
+        Out += 3;
+    }
+}
+
+//
+// fractional resampling filters
+//   Fout = 11/16 * Fin
+//   Fout =  8/11 * Fin
+//
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
+                                       const int16_t *coef_ptr, int32_t *out1,
+                                       int32_t *out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    *out1 = tmp1 + coef * in1[8];
+    *out2 = tmp2 + coef * in2[-8];
+}
+
+//   Resampling ratio: 8/11
+// input:  int32_t (normalized, not saturated) :: size 11 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size  8 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (11 input samples -> 8 output samples);
+    // process in sub blocks of size 11 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+
+        // first output sample
+        Out[0] = ((int32_t)In[3] << 15) + tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        tmp += kCoefficients44To32[3][0] * In[5];
+        tmp += kCoefficients44To32[3][1] * In[6];
+        tmp += kCoefficients44To32[3][2] * In[7];
+        tmp += kCoefficients44To32[3][3] * In[8];
+        tmp += kCoefficients44To32[3][4] * In[9];
+        tmp += kCoefficients44To32[3][5] * In[10];
+        tmp += kCoefficients44To32[3][6] * In[11];
+        tmp += kCoefficients44To32[3][7] * In[12];
+        tmp += kCoefficients44To32[3][8] * In[13];
+        Out[4] = tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
+
+        // update pointers
+        In += 11;
+        Out += 8;
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
@ -0,0 +1,579 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const size_t kVector16Size = 9;
+static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
+  WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345};
+
+class SplTest : public testing::Test {
+ protected:
+   SplTest() {
+     WebRtcSpl_Init();
+   }
+   virtual ~SplTest() {
+   }
+};
+
+TEST_F(SplTest, MacroTest) {
+    // Macros with inputs.
+    int A = 10;
+    int B = 21;
+    int a = -3;
+    int b = WEBRTC_SPL_WORD32_MAX;
+
+    EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B));
+    EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B));
+
+    EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a));
+    EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a));
+
+    EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B));
+    EXPECT_EQ(-2147483645, WEBRTC_SPL_MUL(a, b));
+    EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b));
+    b = WEBRTC_SPL_WORD16_MAX >> 1;
+    EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b));
+    EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b));
+
+    a = b;
+    b = -3;
+
+    EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b));
+    EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b));
+    EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b));
+    EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b));
+
+    EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2));
+    EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2));
+
+    EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B));
+    EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A));
+
+    // Shifting with negative numbers allowed
+    int shift_amount = 1;  // Workaround compiler warning using variable here.
+    // Positive means left shift
+    EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount));
+
+    // Shifting with negative numbers not allowed
+    // We cannot do casting here due to signed/unsigned problem
+    EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1));
+
+    EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1));
+
+    EXPECT_EQ(1470, WEBRTC_SPL_RAND(A));
+
+    EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b));
+    EXPECT_EQ(1073676289, WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX,
+                                               WEBRTC_SPL_WORD16_MAX));
+    EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX,
+                                                      WEBRTC_SPL_WORD32_MAX));
+    EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                                      WEBRTC_SPL_WORD32_MIN));
+#ifdef WEBRTC_ARCH_ARM_V7
+    EXPECT_EQ(-1073741824,
+              WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                          WEBRTC_SPL_WORD32_MAX));
+#else
+    EXPECT_EQ(-1073741823,
+              WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
+                                          WEBRTC_SPL_WORD32_MAX));
+#endif
+}
+
+TEST_F(SplTest, InlineTest) {
+    int16_t a16 = 121;
+    int16_t b16 = -17;
+    int32_t a32 = 111121;
+    int32_t b32 = -1711;
+
+    EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32));
+
+    EXPECT_EQ(0, WebRtcSpl_NormW32(0));
+    EXPECT_EQ(31, WebRtcSpl_NormW32(-1));
+    EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN));
+    EXPECT_EQ(14, WebRtcSpl_NormW32(a32));
+
+    EXPECT_EQ(0, WebRtcSpl_NormW16(0));
+    EXPECT_EQ(15, WebRtcSpl_NormW16(-1));
+    EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN));
+    EXPECT_EQ(4, WebRtcSpl_NormW16(b32));
+    for (int ii = 0; ii < 15; ++ii) {
+      int16_t value = 1 << ii;
+      EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value));
+      EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value));
+    }
+
+    EXPECT_EQ(0, WebRtcSpl_NormU32(0u));
+    EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff));
+    EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32)));
+
+    EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16));
+    EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16));
+
+    EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32));
+    EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32));
+
+    a32 = 0x80000000;
+    b32 = 0x80000000;
+    // Cast to signed int to avoid compiler complaint on gtest.h.
+    EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_AddSatW32(a32, b32));
+    a32 = 0x7fffffff;
+    b32 = 0x7fffffff;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32));
+    a32 = 0;
+    b32 = 0x80000000;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+    a32 = 0x7fffffff;
+    b32 = 0x80000000;
+    EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
+    a32 = 0x80000000;
+    b32 = 0x7fffffff;
+    EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_SubSatW32(a32, b32));
+}
+
+TEST_F(SplTest, MathOperationsTest) {
+    int A = 1134567892;
+    int32_t num = 117;
+    int32_t den = -5;
+    uint16_t denU = 5;
+    EXPECT_EQ(33700, WebRtcSpl_Sqrt(A));
+    EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A));
+
+
+    EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num));
+    EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den));
+    EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den));
+    EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU));
+    EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256));
+}
+
+TEST_F(SplTest, BasicArrayOperationsTest) {
+    const size_t kVectorSize = 4;
+    int B[] = {4, 12, 133, 1100};
+    int16_t b16[kVectorSize];
+    int32_t b32[kVectorSize];
+
+    int16_t bTmp16[kVectorSize];
+    int32_t bTmp32[kVectorSize];
+
+    WebRtcSpl_MemSetW16(b16, 3, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(3, b16[kk]);
+    }
+    WebRtcSpl_ZerosArrayW16(b16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(0, b16[kk]);
+    }
+    WebRtcSpl_MemSetW32(b32, 3, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(3, b32[kk]);
+    }
+    WebRtcSpl_ZerosArrayW32(b32, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(0, b32[kk]);
+    }
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        bTmp16[kk] = (int16_t)kk;
+        bTmp32[kk] = (int32_t)kk;
+    }
+    WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(b16[kk], bTmp16[kk]);
+    }
+//    WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize);
+//    for (int kk = 0; kk < kVectorSize; ++kk) {
+//        EXPECT_EQ(b32[kk], bTmp32[kk]);
+//    }
+    WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16);
+    for (size_t kk = 0; kk < 2; ++kk) {
+        EXPECT_EQ(static_cast<int16_t>(kk+2), bTmp16[kk]);
+    }
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b32[kk] = B[kk];
+        b16[kk] = (int16_t)B[kk];
+    }
+    WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+    }
+    WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
+    }
+    WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]>>1), bTmp32[kk]);
+    }
+
+    WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(b16[3-kk], bTmp16[kk]);
+    }
+}
+
+TEST_F(SplTest, MinMaxOperationsTest) {
+  const size_t kVectorSize = 17;
+
+  // Vectors to test the cases where minimum values have to be caught
+  // outside of the unrolled loops in ARM-Neon.
+  int16_t vector16[kVectorSize] = {-1, 7485, 0, 3333,
+      -18283, 0, 12334, -29871, 988, -3333,
+      345, -456, 222, 999,  888, 8774, WEBRTC_SPL_WORD16_MIN};
+  int32_t vector32[kVectorSize] = {-1, 0, 283211, 3333,
+      8712345, 0, -3333, 89345, -374585456, 222, 999, 122345334,
+      -12389756, -987329871, 888, -2, WEBRTC_SPL_WORD32_MIN};
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+            WebRtcSpl_MinValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+            WebRtcSpl_MinValueW32(vector32, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+
+  // Test the cases where maximum values have to be caught
+  // outside of the unrolled loops in ARM-Neon.
+  vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX;
+  vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX;
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+
+  // Test the cases where multiple maximum and minimum values are present.
+  vector16[1] = WEBRTC_SPL_WORD16_MAX;
+  vector16[6] = WEBRTC_SPL_WORD16_MIN;
+  vector16[11] = WEBRTC_SPL_WORD16_MIN;
+  vector32[1] = WEBRTC_SPL_WORD32_MAX;
+  vector32[6] = WEBRTC_SPL_WORD32_MIN;
+  vector32[11] = WEBRTC_SPL_WORD32_MIN;
+
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
+            WebRtcSpl_MaxValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
+            WebRtcSpl_MinValueW16(vector16, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
+            WebRtcSpl_MaxValueW32(vector32, kVectorSize));
+  EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
+            WebRtcSpl_MinValueW32(vector32, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
+  EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
+}
+
+TEST_F(SplTest, VectorOperationsTest) {
+    const size_t kVectorSize = 4;
+    int B[] = {4, 12, 133, 1100};
+    int16_t a16[kVectorSize];
+    int16_t b16[kVectorSize];
+    int16_t bTmp16[kVectorSize];
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        a16[kk] = B[kk];
+        b16[kk] = B[kk];
+    }
+
+    WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]*3+7)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((B[kk]*3+B[kk]*2+2)>>2, bTmp16[kk]);
+    }
+
+    WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]);
+    }
+
+    WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(((a16[kk]*13)>>2)+((b16[kk]*7)>>2), bTmp16[kk]);
+    }
+
+    WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(B[kk] >> 1, bTmp16[kk]);
+    }
+    WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((a16[kk]*b16[3-kk])>>2, bTmp16[kk]);
+    }
+    WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ((a16[kk]*b16[kk])>>6, bTmp16[kk]);
+    }
+
+    WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16);
+    for (size_t kk = 0; kk < kVectorSize - 1; ++kk) {
+        EXPECT_EQ(32767, bTmp16[kk]);
+    }
+    EXPECT_EQ(32749, bTmp16[kVectorSize - 1]);
+
+    EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1));
+}
+
+TEST_F(SplTest, EstimatorsTest) {
+  const size_t kOrder = 2;
+  const int32_t unstable_filter[] = { 4, 12, 133, 1100 };
+  const int32_t stable_filter[] = { 1100, 133, 12, 4 };
+  int16_t lpc[kOrder + 2] = { 0 };
+  int16_t refl[kOrder + 2] = { 0 };
+  int16_t lpc_result[] = { 4096, -497, 15, 0 };
+  int16_t refl_result[] = { -3962, 123, 0, 0 };
+
+  EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder));
+  EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder));
+  for (size_t i = 0; i < kOrder + 2; ++i) {
+    EXPECT_EQ(lpc_result[i], lpc[i]);
+    EXPECT_EQ(refl_result[i], refl[i]);
+  }
+}
+
+TEST_F(SplTest, FilterTest) {
+    const size_t kVectorSize = 4;
+    const size_t kFilterOrder = 3;
+    int16_t A[] = {1, 2, 33, 100};
+    int16_t A5[] = {1, 2, 33, 100, -5};
+    int16_t B[] = {4, 12, 133, 110};
+    int16_t data_in[kVectorSize];
+    int16_t data_out[kVectorSize];
+    int16_t bTmp16Low[kVectorSize];
+    int16_t bState[kVectorSize];
+    int16_t bStateLow[kVectorSize];
+
+    WebRtcSpl_ZerosArrayW16(bState, kVectorSize);
+    WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize);
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        data_in[kk] = A[kk];
+        data_out[kk] = 0;
+    }
+
+    // MA filters.
+    // Note that the input data has |kFilterOrder| states before the actual
+    // data (one sample).
+    WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B,
+                              kFilterOrder + 1, 1);
+    EXPECT_EQ(0, data_out[0]);
+    // AR filters.
+    // Note that the output data has |kFilterOrder| states before the actual
+    // data (one sample).
+    WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A,
+                              kFilterOrder + 1, 1);
+    EXPECT_EQ(0, data_out[kFilterOrder]);
+
+    EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5,
+                                              5,
+                                              data_in,
+                                              kVectorSize,
+                                              bState,
+                                              kVectorSize,
+                                              bStateLow,
+                                              kVectorSize,
+                                              data_out,
+                                              bTmp16Low,
+                                              kVectorSize));
+}
+
+TEST_F(SplTest, RandTest) {
+    const int kVectorSize = 4;
+    int16_t BU[] = {3653, 12446, 8525, 30691};
+    int16_t b16[kVectorSize];
+    uint32_t bSeed = 100000;
+
+    EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed));
+    EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed));
+    EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed));
+    EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed));
+    for (int kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(BU[kk], b16[kk]);
+    }
+}
+
+TEST_F(SplTest, DotProductWithScaleTest) {
+  EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16,
+      vector16, kVector16Size, 2));
+}
+
+TEST_F(SplTest, CrossCorrelationTest) {
+  // Note the function arguments relation specificed by API.
+  const size_t kCrossCorrelationDimension = 3;
+  const int kShift = 2;
+  const int kStep = 1;
+  const size_t kSeqDimension = 6;
+
+  const int16_t kVector16[kVector16Size] = {1, 4323, 1963,
+    WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142};
+  int32_t vector32[kCrossCorrelationDimension] = {0};
+
+  WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension,
+                             kCrossCorrelationDimension, kShift, kStep);
+
+  // WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon()
+  // are not bit-exact.
+  const int32_t kExpected[kCrossCorrelationDimension] =
+      {-266947903, -15579555, -171282001};
+  const int32_t* expected = kExpected;
+#if !defined(MIPS32_LE)
+  const int32_t kExpectedNeon[kCrossCorrelationDimension] =
+      {-266947901, -15579553, -171281999};
+  if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) {
+    expected = kExpectedNeon;
+  }
+#endif
+  for (size_t i = 0; i < kCrossCorrelationDimension; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
+TEST_F(SplTest, AutoCorrelationTest) {
+  int scale = 0;
+  int32_t vector32[kVector16Size];
+  const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063,
+    -85221647, -17104971, 61806945, 6644603, -669329, 43};
+
+  EXPECT_EQ(kVector16Size,
+            WebRtcSpl_AutoCorrelation(vector16, kVector16Size,
+                                      kVector16Size - 1, vector32, &scale));
+  EXPECT_EQ(3, scale);
+  for (size_t i = 0; i < kVector16Size; ++i) {
+    EXPECT_EQ(expected[i], vector32[i]);
+  }
+}
+
+TEST_F(SplTest, SignalProcessingTest) {
+    const size_t kVectorSize = 4;
+    int A[] = {1, 2, 33, 100};
+    const int16_t kHanning[4] = { 2399, 8192, 13985, 16384 };
+    int16_t b16[kVectorSize];
+
+    int16_t bTmp16[kVectorSize];
+
+    int bScale = 0;
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b16[kk] = A[kk];
+    }
+
+    // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring.
+//    WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+////    }
+//    WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(a16[kk], b16[kk]);
+////    }
+//    WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16);
+////    for (int kk = 0; kk < kVectorSize; ++kk) {
+////        EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
+////    }
+
+    WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize);
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        EXPECT_EQ(kHanning[kk], bTmp16[kk]);
+    }
+
+    for (size_t kk = 0; kk < kVectorSize; ++kk) {
+        b16[kk] = A[kk];
+    }
+    EXPECT_EQ(11094 , WebRtcSpl_Energy(b16, kVectorSize, &bScale));
+    EXPECT_EQ(0, bScale);
+}
+
+TEST_F(SplTest, FFTTest) {
+    int16_t B[] = {1, 2, 33, 100,
+            2, 3, 34, 101,
+            3, 4, 35, 102,
+            4, 5, 36, 103};
+
+    EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1));
+//    for (int kk = 0; kk < 16; ++kk) {
+//        EXPECT_EQ(A[kk], B[kk]);
+//    }
+    EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1));
+//    for (int kk = 0; kk < 16; ++kk) {
+//        EXPECT_EQ(A[kk], B[kk]);
+//    }
+    WebRtcSpl_ComplexBitReverse(B, 3);
+    for (int kk = 0; kk < 16; ++kk) {
+        //EXPECT_EQ(A[kk], B[kk]);
+    }
+}
+
+TEST_F(SplTest, Resample48WithSaturationTest) {
+  // The test resamples 3*kBlockSize number of samples to 2*kBlockSize number
+  // of samples.
+  const size_t kBlockSize = 16;
+
+  // Saturated input vector of 48 samples.
+  const int32_t kVectorSaturated[3 * kBlockSize + 7] = {
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+     32767, 32767, 32767, 32767, 32767, 32767, 32767
+  };
+
+  // All values in |out_vector| should be |kRefValue32kHz|.
+  const int32_t kRefValue32kHz1 = -1077493760;
+  const int32_t kRefValue32kHz2 = 1077493645;
+
+  // After bit shift with saturation, |out_vector_w16| is saturated.
+
+  const int16_t kRefValue16kHz1 = -32768;
+  const int16_t kRefValue16kHz2 = 32767;
+  // Vector for storing output.
+  int32_t out_vector[2 * kBlockSize];
+  int16_t out_vector_w16[2 * kBlockSize];
+
+  WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize);
+  WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector,
+                                   15);
+
+  // Comparing output values against references. The values at position
+  // 12-15 are skipped to account for the filter lag.
+  for (size_t i = 0; i < 12; ++i) {
+    EXPECT_EQ(kRefValue32kHz1, out_vector[i]);
+    EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]);
+  }
+  for (size_t i = 16; i < 2 * kBlockSize; ++i) {
+    EXPECT_EQ(kRefValue32kHz2, out_vector[i]);
+    EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]);
+  }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_init.c
@ -0,0 +1,140 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+#if (defined(WEBRTC_DETECT_NEON) || !defined(WEBRTC_HAS_NEON)) && \
+    !defined(MIPS32_LE)
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(MIPS32_LE)
+/* Initialize function pointers to the MIPS version. */
+static void InitPointersToMIPS() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
+#else
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+#endif
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    InitPointersToNeon();
+  } else {
+    InitPointersToC();
+  }
+#elif defined(WEBRTC_HAS_NEON)
+  InitPointersToNeon();
+#elif defined(MIPS32_LE)
+  InitPointersToMIPS();
+#else
+  InitPointersToC();
+#endif  /* WEBRTC_DETECT_NEON */
+}
+
+#if defined(WEBRTC_POSIX)
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+  static pthread_once_t lock = PTHREAD_ONCE_INIT;
+  pthread_once(&lock, func);
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void once(void (*func)(void)) {
+  /* Didn't use InitializeCriticalSection() since there's no race-free context
+   * in which to execute it.
+   *
+   * TODO(kma): Change to different implementation (e.g.
+   * InterlockedCompareExchangePointer) to avoid issues similar to
+   * http://code.google.com/p/webm/issues/detail?id=467.
+   */
+  static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
+  static int done = 0;
+
+  EnterCriticalSection(&lock);
+  if (!done) {
+    func();
+    done = 1;
+  }
+  LeaveCriticalSection(&lock);
+}
+
+/* There's no fallback version as an #else block here to ensure thread safety.
+ * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+ * system should pick it up.
+ */
+#endif  /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init() {
+  once(InitFunctionPointers);
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt.c
@ -0,0 +1,184 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Sqrt().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in);
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in)
+{
+
+    int16_t x_half, t16;
+    int32_t A, B, x2;
+
+    /* The following block performs:
+     y=in/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     */
+
+    B = in / 2;
+
+    B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
+    x_half = (int16_t)(B >> 16);  // x_half = x/2 = (in-1)/2
+    B = B + ((int32_t)0x40000000); // B = 1 + x/2
+    B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
+
+    x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
+    A = -x2; // A = -(x/2)^2
+    B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
+
+    A >>= 16;
+    A = A * A * 2; // A = (x/2)^4
+    t16 = (int16_t)(A >> 16);
+    B += -20480 * t16 * 2;  // B = B - 0.625*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
+
+    A = x_half * t16 * 2;  // A = (x/2)^5
+    t16 = (int16_t)(A >> 16);
+    B += 28672 * t16 * 2;  // B = B + 0.875*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    t16 = (int16_t)(x2 >> 16);
+    A = x_half * t16 * 2;  // A = x/2^3
+
+    B = B + (A >> 1); // B = B + 0.5*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    B = B + ((int32_t)32768); // Round off bit
+
+    return B;
+}
+
+int32_t WebRtcSpl_Sqrt(int32_t value)
+{
+    /*
+     Algorithm:
+
+     Six term Taylor Series is used here to compute the square root of a number
+     y^0.5 = (1+x)^0.5 where x = y-1
+     = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+     0.5 <= x < 1
+
+     Example of how the algorithm works, with ut=sqrt(in), and
+     with in=73632 and ut=271 (even shift value case):
+
+     in=73632
+     y= in/131072
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))*512
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y= in2/2^31
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 0.56176757812500
+     x   = -0.43823242187500
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y=in2/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 603193344
+     x   = -470548480
+     x_half =  -0.21911621093750
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     */
+
+    int16_t x_norm, nshift, t16, sh;
+    int32_t A;
+
+    int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
+
+    A = value;
+
+    if (A == 0)
+        return (int32_t)0; // sqrt(0) = 0
+
+    sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
+    A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
+    if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
+    {
+        A = A + ((int32_t)32768); // Round off bit
+    } else
+    {
+        A = WEBRTC_SPL_WORD32_MAX;
+    }
+
+    x_norm = (int16_t)(A >> 16);  // x_norm = AH
+
+    nshift = (sh / 2);
+    assert(nshift >= 0);
+
+    A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
+    A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
+    A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
+
+    if (2 * nshift == sh) {
+        // Even shift value case
+
+        t16 = (int16_t)(A >> 16);  // t16 = AH
+
+        A = k_sqrt_2 * t16 * 2;  // A = 1/sqrt(2)*t16
+        A = A + ((int32_t)32768); // Round off
+        A = A & ((int32_t)0x7fff0000); // Round off
+
+        A >>= 15;  // A = A>>16
+
+    } else
+    {
+        A >>= 16;  // A = A>>16
+    }
+
+    A = A & ((int32_t)0x0000ffff);
+    A >>= nshift;  // De-normalize the result.
+
+    return A;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
@ -0,0 +1,77 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+#define WEBRTC_SPL_SQRT_ITER(N)                 \
+  try1 = root + (1 << (N));                     \
+  if (value >= try1 << (N))                     \
+  {                                             \
+    value -= try1 << (N);                       \
+    root |= 2 << (N);                           \
+  }
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+  int32_t root = 0, try1;
+
+  WEBRTC_SPL_SQRT_ITER (15);
+  WEBRTC_SPL_SQRT_ITER (14);
+  WEBRTC_SPL_SQRT_ITER (13);
+  WEBRTC_SPL_SQRT_ITER (12);
+  WEBRTC_SPL_SQRT_ITER (11);
+  WEBRTC_SPL_SQRT_ITER (10);
+  WEBRTC_SPL_SQRT_ITER ( 9);
+  WEBRTC_SPL_SQRT_ITER ( 8);
+  WEBRTC_SPL_SQRT_ITER ( 7);
+  WEBRTC_SPL_SQRT_ITER ( 6);
+  WEBRTC_SPL_SQRT_ITER ( 5);
+  WEBRTC_SPL_SQRT_ITER ( 4);
+  WEBRTC_SPL_SQRT_ITER ( 3);
+  WEBRTC_SPL_SQRT_ITER ( 2);
+  WEBRTC_SPL_SQRT_ITER ( 1);
+  WEBRTC_SPL_SQRT_ITER ( 0);
+
+  return root >> 1;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
@ -0,0 +1,110 @@
+@
+@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+@ license.
+@
+@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+@ Date: Fri, Jun 24, 2011 at 3:20 AM
+@ Subject: Re: sqrt routine
+@ To: Kevin Ma <kma@google.com>
+@ Hi Kevin,
+@ Thanks for asking. Those routines are public domain (originally posted to
+@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
+@ Cheers,
+@ Wilco
+@
+@ ----- Original Message -----
+@ From: "Kevin Ma" <kma@google.com>
+@ To: <Wilco.Dijkstra@ntlworld.com>
+@ Sent: Thursday, June 23, 2011 11:44 PM
+@ Subject: Fwd: sqrt routine
+@ Hi Wilco,
+@ I saw your sqrt routine from several web sites, including
+@ http://www.finesse.demon.co.uk/steven/sqrt.html.
+@ Just wonder if there's any copyright information with your Successive
+@ approximation routines, or if I can freely use it for any purpose.
+@ Thanks.
+@ Kevin
+
+@ Minor modifications in code style for WebRTC, 2012.
+@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
+
+@ Input :             r0 32 bit unsigned integer
+@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
+@ Registers touched:  r1, r2
+
+#include "webrtc/system_wrappers/interface/asm_defines.h"
+
+GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
+.align  2
+DEFINE_FUNCTION WebRtcSpl_SqrtFloor
+  mov    r1, #3 << 30
+  mov    r2, #1 << 30
+
+  @ unroll for i = 0 .. 15
+
+  cmp    r0, r2, ror #2 * 0
+  subhs  r0, r0, r2, ror #2 * 0
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 1
+  subhs  r0, r0, r2, ror #2 * 1
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 2
+  subhs  r0, r0, r2, ror #2 * 2
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 3
+  subhs  r0, r0, r2, ror #2 * 3
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 4
+  subhs  r0, r0, r2, ror #2 * 4
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 5
+  subhs  r0, r0, r2, ror #2 * 5
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 6
+  subhs  r0, r0, r2, ror #2 * 6
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 7
+  subhs  r0, r0, r2, ror #2 * 7
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 8
+  subhs  r0, r0, r2, ror #2 * 8
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 9
+  subhs  r0, r0, r2, ror #2 * 9
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 10
+  subhs  r0, r0, r2, ror #2 * 10
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 11
+  subhs  r0, r0, r2, ror #2 * 11
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 12
+  subhs  r0, r0, r2, ror #2 * 12
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 13
+  subhs  r0, r0, r2, ror #2 * 13
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 14
+  subhs  r0, r0, r2, ror #2 * 14
+  adc    r2, r1, r2, lsl #1
+
+  cmp    r0, r2, ror #2 * 15
+  subhs  r0, r0, r2, ror #2 * 15
+  adc    r2, r1, r2, lsl #1
+
+  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
+  bx lr
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
@ -0,0 +1,207 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+// Code optimizations for MIPS, 2013.
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+  int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
+
+  __asm __volatile(
+    ".set   push                                       \n\t"
+    ".set   noreorder                                  \n\t"
+
+    "lui    %[tmp1],      0x4000                       \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "sub    %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "lui    %[tmp1],      0x1                          \n\t"
+    "or     %[tmp4],      %[root],      %[tmp1]        \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x4000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      14                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x8000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x2000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      13                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x4000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x1000         \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      12                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x2000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x800          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      11                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x1000         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x400          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      10                           \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x800          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x200          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      9                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],       0x400         \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x100          \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      8                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x200          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x80           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      7                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x100          \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x40           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      6                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x80           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x20           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      5                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x40           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x10           \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      4                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x20           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x8            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      3                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x10           \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x4            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      2                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x8            \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x2            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "sll    %[tmp1],      1                            \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x4            \n\t"
+    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    "addiu  %[tmp1],      $0,           0x1            \n\t"
+    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
+    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
+    "ori    %[tmp4],      %[root],      0x2            \n\t"
+    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
+
+    ".set   pop                                        \n\t"
+
+    : [root] "+r" (root), [value] "+r" (value),
+      [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
+      [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
+    :
+  );
+
+  return root >> 1;
+}
+
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
@ -0,0 +1,208 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the splitting filter functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include <assert.h>
+
+// Maximum number of samples in a low/high-band frame.
+enum
+{
+    kMaxBandFrameLength = 320  // 10 ms at 64 kHz.
+};
+
+// QMF filter coefficients in Q16.
+static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261};
+static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
+
+///////////////////////////////////////////////////////////////////////////////////////////////
+// WebRtcSpl_AllPassQMF(...)
+//
+// Allpass filter used by the analysis and synthesis parts of the QMF filter.
+//
+// Input:
+//    - in_data             : Input data sequence (Q10)
+//    - data_length         : Length of data sequence (>2)
+//    - filter_coefficients : Filter coefficients (length 3, Q16)
+//
+// Input & Output:
+//    - filter_state        : Filter state (length 6, Q10).
+//
+// Output:
+//    - out_data            : Output data sequence (Q10), length equal to
+//                            |data_length|
+//
+
+void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length,
+                          int32_t* out_data, const uint16_t* filter_coefficients,
+                          int32_t* filter_state)
+{
+    // The procedure is to filter the input with three first order all pass filters
+    // (cascade operations).
+    //
+    //         a_3 + q^-1    a_2 + q^-1    a_1 + q^-1
+    // y[n] =  -----------   -----------   -----------   x[n]
+    //         1 + a_3q^-1   1 + a_2q^-1   1 + a_1q^-1
+    //
+    // The input vector |filter_coefficients| includes these three filter coefficients.
+    // The filter state contains the in_data state, in_data[-1], followed by
+    // the out_data state, out_data[-1]. This is repeated for each cascade.
+    // The first cascade filter will filter the |in_data| and store the output in
+    // |out_data|. The second will the take the |out_data| as input and make an
+    // intermediate storage in |in_data|, to save memory. The third, and final, cascade
+    // filter operation takes the |in_data| (which is the output from the previous cascade
+    // filter) and store the output in |out_data|.
+    // Note that the input vector values are changed during the process.
+    size_t k;
+    int32_t diff;
+    // First all-pass cascade; filter from in_data to out_data.
+
+    // Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at
+    // vector position n. Then the final output will be y[n] = y_3[n]
+
+    // First loop, use the states stored in memory.
+    // "diff" should be safe from wrap around since max values are 2^25
+    // diff = (x[0] - y_1[-1])
+    diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
+    // y_1[0] =  x[-1] + a_1 * (x[0] - y_1[-1])
+    out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
+
+    // For the remaining loops, use previous values.
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (x[n] - y_1[n-1])
+        diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+        // y_1[n] =  x[n-1] + a_1 * (x[n] - y_1[n-1])
+        out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
+    }
+
+    // Update states.
+    filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
+    filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+
+    // Second all-pass cascade; filter from out_data to in_data.
+    // diff = (y_1[0] - y_2[-1])
+    diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
+    // y_2[0] =  y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+    in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (y_1[n] - y_2[n-1])
+        diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
+        // y_2[0] =  y_1[-1] + a_2 * (y_1[0] - y_2[-1])
+        in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
+    }
+
+    filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
+    filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+
+    // Third all-pass cascade; filter from in_data to out_data.
+    // diff = (y_2[0] - y[-1])
+    diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
+    // y[0] =  y_2[-1] + a_3 * (y_2[0] - y[-1])
+    out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
+    for (k = 1; k < data_length; k++)
+    {
+        // diff = (y_2[n] - y[n-1])
+        diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
+        // y[n] =  y_2[n-1] + a_3 * (y_2[n] - y[n-1])
+        out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
+    }
+    filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
+    filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
+}
+
+void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
+                           int16_t* low_band, int16_t* high_band,
+                           int32_t* filter_state1, int32_t* filter_state2)
+{
+    size_t i;
+    int16_t k;
+    int32_t tmp;
+    int32_t half_in1[kMaxBandFrameLength];
+    int32_t half_in2[kMaxBandFrameLength];
+    int32_t filter1[kMaxBandFrameLength];
+    int32_t filter2[kMaxBandFrameLength];
+    const size_t band_length = in_data_length / 2;
+    assert(in_data_length % 2 == 0);
+    assert(band_length <= kMaxBandFrameLength);
+
+    // Split even and odd samples. Also shift them to Q10.
+    for (i = 0, k = 0; i < band_length; i++, k += 2)
+    {
+        half_in2[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k], 10);
+        half_in1[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k + 1], 10);
+    }
+
+    // All pass filter even and odd samples, independently.
+    WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+                         WebRtcSpl_kAllPassFilter1, filter_state1);
+    WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+                         WebRtcSpl_kAllPassFilter2, filter_state2);
+
+    // Take the sum and difference of filtered version of odd and even
+    // branches to get upper & lower band.
+    for (i = 0; i < band_length; i++)
+    {
+        tmp = (filter1[i] + filter2[i] + 1024) >> 11;
+        low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+
+        tmp = (filter1[i] - filter2[i] + 1024) >> 11;
+        high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
+    }
+}
+
+void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
+                            size_t band_length, int16_t* out_data,
+                            int32_t* filter_state1, int32_t* filter_state2)
+{
+    int32_t tmp;
+    int32_t half_in1[kMaxBandFrameLength];
+    int32_t half_in2[kMaxBandFrameLength];
+    int32_t filter1[kMaxBandFrameLength];
+    int32_t filter2[kMaxBandFrameLength];
+    size_t i;
+    int16_t k;
+    assert(band_length <= kMaxBandFrameLength);
+
+    // Obtain the sum and difference channels out of upper and lower-band channels.
+    // Also shift to Q10 domain.
+    for (i = 0; i < band_length; i++)
+    {
+        tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
+        half_in1[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+        tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
+        half_in2[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
+    }
+
+    // all-pass filter the sum and difference channels
+    WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
+                         WebRtcSpl_kAllPassFilter2, filter_state1);
+    WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
+                         WebRtcSpl_kAllPassFilter1, filter_state2);
+
+    // The filtered signals are even and odd samples of the output. Combine
+    // them. The signals are Q10 should shift them back to Q0 and take care of
+    // saturation.
+    for (i = 0, k = 0; i < band_length; i++)
+    {
+        tmp = (filter2[i] + 512) >> 10;
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+
+        tmp = (filter1[i] + 512) >> 10;
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
+    }
+
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
+                                      int16_t *yQ15)
+{
+    int32_t sq;
+    size_t m;
+    int16_t tmp;
+
+    for (m = 0; m < vector_length; m++)
+    {
+        tmp = xQ15[m];
+        sq = tmp * tmp;  // x^2 in Q30
+        sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
+        sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
+        yQ15[m] = (int16_t)sq;
+    }
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
@ -0,0 +1,165 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_VectorBitShiftW16()
+ * WebRtcSpl_VectorBitShiftW32()
+ * WebRtcSpl_VectorBitShiftW32ToW16()
+ * WebRtcSpl_ScaleVector()
+ * WebRtcSpl_ScaleVectorWithSat()
+ * WebRtcSpl_ScaleAndAddVectors()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
+                                 const int16_t *in, int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) << (-right_shifts));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
+                                 size_t vector_length,
+                                 const int32_t *in_vector,
+                                 int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) << (-right_shifts));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
+                                      const int32_t* in, int right_shifts) {
+  size_t i;
+  int32_t tmp_w32;
+
+  if (right_shifts >= 0) {
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) >> right_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  } else {
+    int left_shifts = -right_shifts;
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) << left_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  }
+}
+
+void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
+                           int16_t gain, size_t in_vector_length,
+                           int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
+                                 int16_t gain, size_t in_vector_length,
+                                 int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++) {
+      *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
+                                  const int16_t *in2, int16_t gain2, int shift2,
+                                  int16_t *out, size_t vector_length)
+{
+    // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
+    size_t i;
+    const int16_t *in1ptr;
+    const int16_t *in2ptr;
+    int16_t *outptr;
+
+    in1ptr = in1;
+    in2ptr = in2;
+    outptr = out;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
+          (int16_t)((gain2 * *in2ptr++) >> shift2);
+    }
+}
+
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           size_t length) {
+  size_t i = 0;
+  int round_value = (1 << right_shifts) >> 1;
+
+  if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+      length == 0 || right_shifts < 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    out_vector[i] = (int16_t)((
+        in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
+        round_value) >> right_shifts);
+  }
+
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
+++ b/third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
+                                               int16_t in_vector1_scale,
+                                               const int16_t* in_vector2,
+                                               int16_t in_vector2_scale,
+                                               int right_shifts,
+                                               int16_t* out_vector,
+                                               size_t length) {
+  int16_t r0 = 0, r1 = 0;
+  int16_t *in1 = (int16_t*)in_vector1;
+  int16_t *in2 = (int16_t*)in_vector2;
+  int16_t *out = out_vector;
+  size_t i = 0;
+  int value32 = 0;
+
+  if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+      length == 0 || right_shifts < 0) {
+    return -1;
+  }
+  for (i = 0; i < length; i++) {
+    __asm __volatile (
+      "lh         %[r0],          0(%[in1])                               \n\t"
+      "lh         %[r1],          0(%[in2])                               \n\t"
+      "mult       %[r0],          %[in_vector1_scale]                     \n\t"
+      "madd       %[r1],          %[in_vector2_scale]                     \n\t"
+      "extrv_r.w  %[value32],     $ac0,               %[right_shifts]     \n\t"
+      "addiu      %[in1],         %[in1],             2                   \n\t"
+      "addiu      %[in2],         %[in2],             2                   \n\t"
+      "sh         %[value32],     0(%[out])                               \n\t"
+      "addiu      %[out],         %[out],             2                   \n\t"
+      : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
+        [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
+      : [in_vector1_scale] "r" (in_vector1_scale),
+        [in_vector2_scale] "r" (in_vector2_scale),
+        [right_shifts] "r" (right_shifts)
+      : "hi", "lo", "memory"
+    );
+  }
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/common_audio/wav_file.h
+++ b/third_party/webrtc/src/webrtc/common_audio/wav_file.h
@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_
+
+#ifdef __cplusplus
+
+#include <stdint.h>
+#include <cstddef>
+#include <string>
+
+#include "webrtc/base/constructormagic.h"
+
+namespace webrtc {
+
+// Interface to provide access to WAV file parameters.
+class WavFile {
+ public:
+  virtual ~WavFile() {}
+
+  virtual int sample_rate() const = 0;
+  virtual int num_channels() const = 0;
+  virtual uint32_t num_samples() const = 0;
+};
+
+// Simple C++ class for writing 16-bit PCM WAV files. All error handling is
+// by calls to RTC_CHECK(), making it unsuitable for anything but debug code.
+class WavWriter final : public WavFile {
+ public:
+  // Open a new WAV file for writing.
+  WavWriter(const std::string& filename, int sample_rate, int num_channels);
+
+  // Close the WAV file, after writing its header.
+  ~WavWriter();
+
+  // Write additional samples to the file. Each sample is in the range
+  // [-32768,32767], and there must be the previously specified number of
+  // interleaved channels.
+  void WriteSamples(const float* samples, size_t num_samples);
+  void WriteSamples(const int16_t* samples, size_t num_samples);
+
+  int sample_rate() const override { return sample_rate_; }
+  int num_channels() const override { return num_channels_; }
+  uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+  void Close();
+  const int sample_rate_;
+  const int num_channels_;
+  uint32_t num_samples_;  // Total number of samples written to file.
+  FILE* file_handle_;  // Output file, owned by this class
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter);
+};
+
+// Follows the conventions of WavWriter.
+class WavReader final : public WavFile {
+ public:
+  // Opens an existing WAV file for reading.
+  explicit WavReader(const std::string& filename);
+
+  // Close the WAV file.
+  ~WavReader();
+
+  // Returns the number of samples read. If this is less than requested,
+  // verifies that the end of the file was reached.
+  size_t ReadSamples(size_t num_samples, float* samples);
+  size_t ReadSamples(size_t num_samples, int16_t* samples);
+
+  int sample_rate() const override { return sample_rate_; }
+  int num_channels() const override { return num_channels_; }
+  uint32_t num_samples() const override { return num_samples_; }
+
+ private:
+  void Close();
+  int sample_rate_;
+  int num_channels_;
+  uint32_t num_samples_;  // Total number of samples in the file.
+  uint32_t num_samples_remaining_;
+  FILE* file_handle_;  // Input file, owned by this class.
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(WavReader);
+};
+
+}  // namespace webrtc
+
+extern "C" {
+#endif  // __cplusplus
+
+// C wrappers for the WavWriter class.
+typedef struct rtc_WavWriter rtc_WavWriter;
+rtc_WavWriter* rtc_WavOpen(const char* filename,
+                           int sample_rate,
+                           int num_channels);
+void rtc_WavClose(rtc_WavWriter* wf);
+void rtc_WavWriteSamples(rtc_WavWriter* wf,
+                         const float* samples,
+                         size_t num_samples);
+int rtc_WavSampleRate(const rtc_WavWriter* wf);
+int rtc_WavNumChannels(const rtc_WavWriter* wf);
+uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBRTC_COMMON_AUDIO_WAV_FILE_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_common.h
@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
+extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kMinFarendPSD;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.c
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core.h
@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Specifies the interface for the AEC core.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+#define FRAME_LEN 80
+#define PART_LEN 64               // Length of partition
+#define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
+#define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
+#define NUM_HIGH_BANDS_MAX  2     // Max number of high bands
+
+typedef float complex_t[2];
+// For performance reasons, some arrays of complex numbers are replaced by twice
+// as long arrays of float, all the real parts followed by all the imaginary
+// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
+// is better than two arrays (one for the real parts and one for the imaginary
+// parts) as this other way would require two pointers instead of one and cause
+// extra register spilling. This also allows the offsets to be calculated at
+// compile time.
+
+// Metrics
+enum {
+  kOffsetLevel = -100
+};
+
+typedef struct Stats {
+  float instant;
+  float average;
+  float min;
+  float max;
+  float sum;
+  float hisum;
+  float himean;
+  int counter;
+  int hicounter;
+} Stats;
+
+typedef struct AecCore AecCore;
+
+AecCore* WebRtcAec_CreateAec();  // Returns NULL on error.
+void WebRtcAec_FreeAec(AecCore* aec);
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
+void WebRtcAec_InitAec_SSE2(void);
+#if defined(MIPS_FPU_LE)
+void WebRtcAec_InitAec_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAec_InitAec_neon(void);
+#endif
+
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
+void WebRtcAec_ProcessFrames(AecCore* aec,
+                             const float* const* nearend,
+                             size_t num_bands,
+                             size_t num_samples,
+                             int knownDelay,
+                             float* const* out);
+
+// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
+// Returns the number of elements moved, and adjusts |system_delay| by the
+// corresponding amount in ms.
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
+
+// Calculates the median, standard deviation and amount of poor values among the
+// delay estimates aggregated up to the first call to the function. After that
+// first call the metrics are aggregated and updated every second. With poor
+// values we mean values that most likely will cause the AEC to perform poorly.
+// TODO(bjornv): Consider changing tests and tools to handle constant
+// constant aggregation window throughout the session instead.
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+                                  float* fraction_poor_delays);
+
+// Returns the echo state (1: echo, 0: no echo).
+int WebRtcAec_echo_state(AecCore* self);
+
+// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
+void WebRtcAec_GetEchoStats(AecCore* self,
+                            Stats* erl,
+                            Stats* erle,
+                            Stats* a_nlp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self);
+#endif
+
+// Sets local configuration modes.
+void WebRtcAec_SetConfigCore(AecCore* self,
+                             int nlp_mode,
+                             int metrics_mode,
+                             int delay_logging);
+
+// Non-zero enables, zero disables.
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
+
+// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
+// enabled and zero if disabled.
+int WebRtcAec_delay_agnostic_enabled(AecCore* self);
+
+// Enables or disables extended filter mode. Non-zero enables, zero disables.
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
+
+// Returns non-zero if extended filter mode is enabled and zero if disabled.
+int WebRtcAec_extended_filter_enabled(AecCore* self);
+
+// Returns the current |system_delay|, i.e., the buffered difference between
+// far-end and near-end.
+int WebRtcAec_system_delay(AecCore* self);
+
+// Sets the |system_delay| to |value|.  Note that if the value is changed
+// improperly, there can be a performance regression.  So it should be used with
+// care.
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
@ -0,0 +1,202 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/typedefs.h"
+
+// Number of partitions for the extended filter mode. The first one is an enum
+// to be used in array declarations, as it represents the maximum filter length.
+enum {
+  kExtendedNumPartitions = 32
+};
+static const int kNormalNumPartitions = 12;
+
+// Delay estimator constants, used for logging and delay compensation if
+// if reported delays are disabled.
+enum {
+  kLookaheadBlocks = 15
+};
+enum {
+  // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
+  kHistorySizeBlocks = 125
+};
+
+// Extended filter adaptation parameters.
+// TODO(ajm): No narrowband tuning yet.
+static const float kExtendedMu = 0.4f;
+static const float kExtendedErrorThreshold = 1.0e-6f;
+
+typedef struct PowerLevel {
+  float sfrsum;
+  int sfrcounter;
+  float framelevel;
+  float frsum;
+  int frcounter;
+  float minlevel;
+  float averagelevel;
+} PowerLevel;
+
+struct AecCore {
+  int farBufWritePos, farBufReadPos;
+
+  int knownDelay;
+  int inSamples, outSamples;
+  int delayEstCtr;
+
+  RingBuffer* nearFrBuf;
+  RingBuffer* outFrBuf;
+
+  RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
+  RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
+
+  float dBuf[PART_LEN2];  // nearend
+  float eBuf[PART_LEN2];  // error
+
+  float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2];  // nearend
+
+  float xPow[PART_LEN1];
+  float dPow[PART_LEN1];
+  float dMinPow[PART_LEN1];
+  float dInitMinPow[PART_LEN1];
+  float* noisePow;
+
+  float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
+  float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
+  complex_t sde[PART_LEN1];  // cross-psd of nearend and error
+  complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
+  // Farend windowed fft buffer.
+  complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
+
+  float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
+  float hNs[PART_LEN1];
+  float hNlFbMin, hNlFbLocalMin;
+  float hNlXdAvgMin;
+  int hNlNewMin, hNlMinCtr;
+  float overDrive, overDriveSm;
+  int nlp_mode;
+  float outBuf[PART_LEN];
+  int delayIdx;
+
+  short stNearState, echoState;
+  short divergeState;
+
+  int xfBufBlockPos;
+
+  RingBuffer* far_buf;
+  RingBuffer* far_buf_windowed;
+  int system_delay;  // Current system delay buffered in AEC.
+
+  int mult;  // sampling frequency multiple
+  int sampFreq;
+  size_t num_bands;
+  uint32_t seed;
+
+  float normal_mu;               // stepsize
+  float normal_error_threshold;  // error threshold
+
+  int noiseEstCtr;
+
+  PowerLevel farlevel;
+  PowerLevel nearlevel;
+  PowerLevel linoutlevel;
+  PowerLevel nlpoutlevel;
+
+  int metricsMode;
+  int stateCounter;
+  Stats erl;
+  Stats erle;
+  Stats aNlp;
+  Stats rerl;
+
+  // Quantities to control H band scaling for SWB input
+  int freq_avg_ic;       // initial bin for averaging nlp gain
+  int flag_Hband_cn;     // for comfort noise
+  float cn_scale_Hband;  // scale for comfort noise in H band
+
+  int delay_metrics_delivered;
+  int delay_histogram[kHistorySizeBlocks];
+  int num_delay_values;
+  int delay_median;
+  int delay_std;
+  float fraction_poor_delays;
+  int delay_logging_enabled;
+  void* delay_estimator_farend;
+  void* delay_estimator;
+  // Variables associated with delay correction through signal based delay
+  // estimation feedback.
+  int signal_delay_correction;
+  int previous_delay;
+  int delay_correction_count;
+  int shift_offset;
+  float delay_quality_threshold;
+  int frame_count;
+
+  // 0 = delay agnostic mode (signal based delay correction) disabled.
+  // Otherwise enabled.
+  int delay_agnostic_enabled;
+  // 1 = extended filter mode enabled, 0 = disabled.
+  int extended_filter_enabled;
+  // Runtime selection of number of filter partitions.
+  int num_partitions;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  // Sequence number of this AEC instance, so that different instances can
+  // choose different dump file names.
+  int instance_index;
+
+  // Number of times we've restarted dumping; used to pick new dump file names
+  // each time.
+  int debug_dump_count;
+
+  RingBuffer* far_time_buf;
+  rtc_WavWriter* farFile;
+  rtc_WavWriter* nearFile;
+  rtc_WavWriter* outFile;
+  rtc_WavWriter* outLinearFile;
+  FILE* e_fft_file;
+#endif
+};
+
+typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
+extern WebRtcAecFilterFar WebRtcAec_FilterFar;
+typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
+extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
+                                          float* fft,
+                                          float ef[2][PART_LEN1]);
+extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
+                                              float hNl[PART_LEN1],
+                                              const float hNlFb,
+                                              float efw[2][PART_LEN1]);
+extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+
+typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
+                                      float efw[2][PART_LEN1],
+                                      complex_t* comfortNoiseHband,
+                                      const float* noisePow,
+                                      const float* lambda);
+extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+
+typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
+                                          float efw[2][PART_LEN1],
+                                          float xfw[2][PART_LEN1],
+                                          float* fft,
+                                          float* cohde,
+                                          float* cohxd);
+extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_mips.c
@ -0,0 +1,774 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#include <math.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+static const int flagHbandCn = 1; // flag for adding comfort noise in H band
+extern const float WebRtcAec_weightCurve[65];
+extern const float WebRtcAec_overDriveCurve[65];
+
+void WebRtcAec_ComfortNoise_mips(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 complex_t* comfortNoiseHband,
+                                 const float* noisePow,
+                                 const float* lambda) {
+  int i, num;
+  float rand[PART_LEN];
+  float noise, noiseAvg, tmp, tmpAvg;
+  int16_t randW16[PART_LEN];
+  complex_t u[PART_LEN1];
+
+  const float pi2 = 6.28318530717959f;
+  const float pi2t = pi2 / 32768;
+
+  // Generate a uniform random array on [0 1]
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+
+  int16_t* randWptr = randW16;
+  float randTemp, randTemp2, randTemp3, randTemp4;
+  int32_t tmp1s, tmp2s, tmp3s, tmp4s;
+
+  for (i = 0; i < PART_LEN; i+=4) {
+    __asm __volatile (
+      ".set     push                                           \n\t"
+      ".set     noreorder                                      \n\t"
+      "lh       %[tmp1s],       0(%[randWptr])                 \n\t"
+      "lh       %[tmp2s],       2(%[randWptr])                 \n\t"
+      "lh       %[tmp3s],       4(%[randWptr])                 \n\t"
+      "lh       %[tmp4s],       6(%[randWptr])                 \n\t"
+      "mtc1     %[tmp1s],       %[randTemp]                    \n\t"
+      "mtc1     %[tmp2s],       %[randTemp2]                   \n\t"
+      "mtc1     %[tmp3s],       %[randTemp3]                   \n\t"
+      "mtc1     %[tmp4s],       %[randTemp4]                   \n\t"
+      "cvt.s.w  %[randTemp],    %[randTemp]                    \n\t"
+      "cvt.s.w  %[randTemp2],   %[randTemp2]                   \n\t"
+      "cvt.s.w  %[randTemp3],   %[randTemp3]                   \n\t"
+      "cvt.s.w  %[randTemp4],   %[randTemp4]                   \n\t"
+      "addiu    %[randWptr],    %[randWptr],      8            \n\t"
+      "mul.s    %[randTemp],    %[randTemp],      %[pi2t]      \n\t"
+      "mul.s    %[randTemp2],   %[randTemp2],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp3],   %[randTemp3],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp4],   %[randTemp4],     %[pi2t]      \n\t"
+      ".set     pop                                            \n\t"
+      : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
+        [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
+        [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
+        [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
+        [tmp4s] "=&r" (tmp4s)
+      : [pi2t] "f" (pi2t)
+      : "memory"
+    );
+
+    u[i+1][0] = cosf(randTemp);
+    u[i+1][1] = sinf(randTemp);
+    u[i+2][0] = cosf(randTemp2);
+    u[i+2][1] = sinf(randTemp2);
+    u[i+3][0] = cosf(randTemp3);
+    u[i+3][1] = sinf(randTemp3);
+    u[i+4][0] = cosf(randTemp4);
+    u[i+4][1] = sinf(randTemp4);
+  }
+
+  // Reject LF noise
+  float* u_ptr = &u[1][0];
+  float noise2, noise3, noise4;
+  float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
+
+  u[0][0] = 0;
+  u[0][1] = 0;
+  for (i = 1; i < PART_LEN1; i+=4) {
+    __asm __volatile (
+      ".set     push                                            \n\t"
+      ".set     noreorder                                       \n\t"
+      "lwc1     %[noise],       4(%[noisePow])                  \n\t"
+      "lwc1     %[noise2],      8(%[noisePow])                  \n\t"
+      "lwc1     %[noise3],      12(%[noisePow])                 \n\t"
+      "lwc1     %[noise4],      16(%[noisePow])                 \n\t"
+      "sqrt.s   %[noise],       %[noise]                        \n\t"
+      "sqrt.s   %[noise2],      %[noise2]                       \n\t"
+      "sqrt.s   %[noise3],      %[noise3]                       \n\t"
+      "sqrt.s   %[noise4],      %[noise4]                       \n\t"
+      "lwc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[noisePow],    %[noisePow],      16            \n\t"
+      "mul.s    %[tmp1f],       %[tmp1f],         %[noise]      \n\t"
+      "mul.s    %[tmp2f],       %[tmp2f],         %[noise]      \n\t"
+      "mul.s    %[tmp3f],       %[tmp3f],         %[noise2]     \n\t"
+      "mul.s    %[tmp4f],       %[tmp4f],         %[noise2]     \n\t"
+      "mul.s    %[tmp5f],       %[tmp5f],         %[noise3]     \n\t"
+      "mul.s    %[tmp6f],       %[tmp6f],         %[noise3]     \n\t"
+      "swc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "swc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "mul.s    %[tmp8f],       %[tmp8f],         %[noise4]     \n\t"
+      "mul.s    %[tmp7f],       %[tmp7f],         %[noise4]     \n\t"
+      "neg.s    %[tmp2f]                                        \n\t"
+      "neg.s    %[tmp4f]                                        \n\t"
+      "neg.s    %[tmp6f]                                        \n\t"
+      "neg.s    %[tmp8f]                                        \n\t"
+      "swc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "swc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "swc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "swc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "swc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "swc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[u_ptr],       %[u_ptr],         32            \n\t"
+      ".set     pop                                             \n\t"
+      : [u_ptr] "+r" (u_ptr),  [noisePow] "+r" (noisePow),
+        [noise] "=&f" (noise), [noise2] "=&f" (noise2),
+        [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
+        [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
+        [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
+        [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
+        [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
+      :
+      : "memory"
+    );
+  }
+  u[PART_LEN][1] = 0;
+  noisePow -= PART_LEN;
+
+  u_ptr = &u[0][0];
+  float* u_ptr_end = &u[PART_LEN][0];
+  float* efw_ptr_0 = &efw[0][0];
+  float* efw_ptr_1 = &efw[1][0];
+  float tmp9f, tmp10f;
+  const float tmp1c = 1.0;
+
+  __asm __volatile (
+    ".set     push                                                        \n\t"
+    ".set     noreorder                                                   \n\t"
+   "1:                                                                    \n\t"
+    "lwc1     %[tmp1f],       0(%[lambda])                                \n\t"
+    "lwc1     %[tmp6f],       4(%[lambda])                                \n\t"
+    "addiu    %[lambda],      %[lambda],        8                         \n\t"
+    "c.lt.s   %[tmp1f],       %[tmp1c]                                    \n\t"
+    "bc1f     4f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     3f                                                          \n\t"
+    " nop                                                                 \n\t"
+   "2:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "3:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+   "4:                                                                    \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     5f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "5:                                                                    \n\t"
+    "addiu    %[u_ptr],       %[u_ptr],         16                        \n\t"
+    "addiu    %[efw_ptr_0],   %[efw_ptr_0],     8                         \n\t"
+    "bne      %[u_ptr],       %[u_ptr_end],     1b                        \n\t"
+    " addiu   %[efw_ptr_1],   %[efw_ptr_1],     8                         \n\t"
+    ".set     pop                                                         \n\t"
+    : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
+      [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
+      [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
+      [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
+      [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
+      [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
+    : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
+    : "memory"
+  );
+
+  lambda -= PART_LEN;
+  tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
+  //tmp = 1 - lambda[i];
+  efw[0][PART_LEN] += tmp * u[PART_LEN][0];
+  efw[1][PART_LEN] += tmp * u[PART_LEN][1];
+
+  // For H band comfort noise
+  // TODO: don't compute noise and "tmp" twice. Use the previous results.
+  noiseAvg = 0.0;
+  tmpAvg = 0.0;
+  num = 0;
+  if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
+    for (i = 0; i < PART_LEN; i++) {
+      rand[i] = ((float)randW16[i]) / 32768;
+    }
+
+    // average noise scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      noiseAvg += sqrtf(noisePow[i]);
+    }
+    noiseAvg /= (float)num;
+
+    // average nlp scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    num = 0;
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    }
+    tmpAvg /= (float)num;
+
+    // Use average noise for H band
+    // TODO: we should probably have a new random vector here.
+    // Reject LF noise
+    u[0][0] = 0;
+    u[0][1] = 0;
+    for (i = 1; i < PART_LEN1; i++) {
+      tmp = pi2 * rand[i - 1];
+
+      // Use average noise for H band
+      u[i][0] = noiseAvg * (float)cos(tmp);
+      u[i][1] = -noiseAvg * (float)sin(tmp);
+    }
+    u[PART_LEN][1] = 0;
+
+    for (i = 0; i < PART_LEN1; i++) {
+      // Use average NLP weight for H band
+      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+    }
+  }
+}
+
+void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >=  aec->num_partitions) {
+      xPos -=  aec->num_partitions * (PART_LEN1);
+    }
+    float* yf0 = yf[0];
+    float* yf1 = yf[1];
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = aec->wfBuf[0] + pos;
+    float* bIm = aec->wfBuf[1] + pos;
+    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+    int len = PART_LEN1 >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "lwc1       %[f5],      4(%[yf1])                               \n\t"
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
+      "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      "swc1       %[f6],      4(%[yf0])                               \n\t"
+      "swc1       %[f5],      4(%[yf1])                               \n\t"
+      "addiu      %[yf0],     %[yf0],         8                       \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[yf1],     %[yf1],         8                       \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
+        [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
+        [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
+                                     float* fft,
+                                     float ef[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+    int pos;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+      xPos -= aec->num_partitions * PART_LEN1;
+    }
+
+    pos = i * PART_LEN1;
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = ef[0];
+    float* bIm = ef[1];
+    float* fft_tmp;
+
+    float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
+    int len = PART_LEN >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+      "addiu      %[fft_tmp], %[fft],         0                       \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
+      "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+      "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+      "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
+      "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
+      "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
+      "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
+      "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      4(%[fft])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
+        [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
+        [len] "+r" (len)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      __asm __volatile (
+        ".set     push                                    \n\t"
+        ".set     noreorder                               \n\t"
+        "addiu    %[fft_tmp], %[fft],        0            \n\t"
+        "addiu    %[len],     $zero,         8            \n\t"
+       "1:                                                \n\t"
+        "addiu    %[len],     %[len],        -1           \n\t"
+        "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
+        "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
+        "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
+        "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
+        "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
+        "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
+        "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
+        "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
+        "swc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "swc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "swc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "swc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "swc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "swc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "swc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "swc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "bgtz     %[len],     1b                          \n\t"
+        " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
+        ".set     pop                                     \n\t"
+        : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+          [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+          [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+          [fft_tmp] "=&r" (fft_tmp)
+        : [scale] "f" (scale), [fft] "r" (fft)
+        : "memory"
+      );
+    }
+    aec_rdft_forward_128(fft);
+    aRe = aec->wfBuf[0] + pos;
+    aIm = aec->wfBuf[1] + pos;
+    __asm __volatile (
+      ".set     push                                    \n\t"
+      ".set     noreorder                               \n\t"
+      "addiu    %[fft_tmp], %[fft],        0            \n\t"
+      "addiu    %[len],     $zero,         31           \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      256(%[aRe])                 \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      256(%[aRe])                 \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "addiu    %[aIm],     %[aIm],        8            \n\t"
+     "1:                                                \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      0(%[aIm])                   \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[len],     %[len],        -1           \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      0(%[aIm])                   \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "bgtz     %[len],     1b                          \n\t"
+      " addiu   %[aIm],     %[aIm],        8            \n\t"
+      ".set     pop                                     \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+        [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
+                                         float hNl[PART_LEN1],
+                                         const float hNlFb,
+                                         float efw[2][PART_LEN1]) {
+  int i;
+  const float one = 1.0;
+  float* p_hNl;
+  float* p_efw0;
+  float* p_efw1;
+  float* p_WebRtcAec_wC;
+  float temp1, temp2, temp3, temp4;
+
+  p_hNl = &hNl[0];
+  p_efw0 = &efw[0][0];
+  p_efw1 = &efw[1][0];
+  p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
+
+  for (i = 0; i < PART_LEN1; i++) {
+    // Weight subbands
+    __asm __volatile (
+      ".set      push                                              \n\t"
+      ".set      noreorder                                         \n\t"
+      "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
+      "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
+      "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
+      "bc1f      1f                                                \n\t"
+      " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
+      "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
+      "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
+     "1:                                                           \n\t"
+      "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
+      ".set      pop                                               \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
+      : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
+      : "memory"
+    );
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    __asm __volatile (
+      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
+      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
+      "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
+      "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
+      "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
+      "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
+      "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
+      "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
+      "neg.s     %[temp4],    %[temp3]                 \n\t"
+      "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
+      "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
+        [p_hNl] "+r" (p_hNl)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled
+                                    ? kExtendedErrorThreshold
+                                    : aec->normal_error_threshold;
+  int len = (PART_LEN1);
+  float* ef0 = ef[0];
+  float* ef1 = ef[1];
+  float* xPow = aec->xPow;
+  float fac1 = 1e-10f;
+  float err_th2 = error_threshold * error_threshold;
+  float f0, f1, f2;
+#if !defined(MIPS32_R2_LE)
+  float f3;
+#endif
+
+  __asm __volatile (
+    ".set       push                                   \n\t"
+    ".set       noreorder                              \n\t"
+   "1:                                                 \n\t"
+    "lwc1       %[f0],     0(%[xPow])                  \n\t"
+    "lwc1       %[f1],     0(%[ef0])                   \n\t"
+    "lwc1       %[f2],     0(%[ef1])                   \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "div.s      %[f2],     %[f2],       %[f0]          \n\t"
+    "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
+#if defined(MIPS32_R2_LE)
+    "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
+#else
+    "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
+    "add.s      %[f0],     %[f0],       %[f3]          \n\t"
+#endif
+    "c.le.s     %[f0],     %[err_th2]                  \n\t"
+    "nop                                               \n\t"
+    "bc1t       2f                                     \n\t"
+    " nop                                              \n\t"
+    "sqrt.s     %[f0],     %[f0]                       \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
+    "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
+   "2:                                                 \n\t"
+    "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
+    "swc1       %[f1],     0(%[ef0])                   \n\t"
+    "swc1       %[f2],     0(%[ef1])                   \n\t"
+    "addiu      %[len],    %[len],      -1             \n\t"
+    "addiu      %[xPow],   %[xPow],     4              \n\t"
+    "addiu      %[ef0],    %[ef0],      4              \n\t"
+    "bgtz       %[len],    1b                          \n\t"
+    " addiu     %[ef1],    %[ef1],      4              \n\t"
+    ".set       pop                                    \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+#if !defined(MIPS32_R2_LE)
+      [f3] "=&f" (f3),
+#endif
+      [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
+      [len] "+r" (len)
+    : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
+      [err_th] "f" (error_threshold)
+    : "memory"
+  );
+}
+
+void WebRtcAec_InitAec_mips(void) {
+  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
+  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
+  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
+  WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
+  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
+}
+
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_neon.c
@ -0,0 +1,736 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on aec_core_sse2.c.
+ */
+
+#include <arm_neon.h>
+#include <math.h>
+#include <string.h>  // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+enum { kShiftExponentIntoTopMantissa = 8 };
+enum { kFloatExponentShift = 23 };
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
+      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
+      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
+      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
+      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
+      const float32x4_t g = vaddq_f32(yf_re, e);
+      const float32x4_t h = vaddq_f32(yf_im, f);
+      vst1q_f32(&yf[0][j], g);
+      vst1q_f32(&yf[1][j], h);
+    }
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
+#if !defined (WEBRTC_ARCH_ARM64)
+static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
+  int i;
+  float32x4_t x = vrecpeq_f32(b);
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (2 - d * x[n])
+  // converges to (1/d) if x0 is the result of VRECPE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrecpsq_f32(b, x), x);
+  }
+  // a/b = a*(1/b)
+  return vmulq_f32(a, x);
+}
+
+static float32x4_t vsqrtq_f32(float32x4_t s) {
+  int i;
+  float32x4_t x = vrsqrteq_f32(s);
+
+  // Code to handle sqrt(0).
+  // If the input to sqrtf() is zero, a zero will be returned.
+  // If the input to vrsqrteq_f32() is zero, positive infinity is returned.
+  const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
+  // check for divide by zero
+  const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
+  // zero out the positive infinity results
+  x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
+                                      vreinterpretq_u32_f32(x)));
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
+  // converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
+  }
+  // sqrt(s) = s * 1/sqrt(s)
+  return vmulq_f32(s, x);;
+}
+#endif  // WEBRTC_ARCH_ARM64
+
+static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled ?
+      kExtendedErrorThreshold : aec->normal_error_threshold;
+  const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
+  const float32x4_t kMu = vmovq_n_f32(mu);
+  const float32x4_t kThresh = vmovq_n_f32(error_threshold);
+  int i;
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
+    const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
+    const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
+    const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
+    float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
+    float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
+    const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
+    const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
+    const float32x4_t absEf = vsqrtq_f32(ef_sum2);
+    const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
+    const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
+    const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
+    uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
+    uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
+    uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_re));
+    uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_im));
+    ef_re_if = vandq_u32(bigger, ef_re_if);
+    ef_im_if = vandq_u32(bigger, ef_im_if);
+    ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
+    ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
+    ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
+    ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
+    vst1q_f32(&ef[0][i], ef_re);
+    vst1q_f32(&ef[1][i], ef_im);
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    float abs_ef;
+    ef[0][i] /= (aec->xPow[i] + 1e-10f);
+    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+    if (abs_ef > error_threshold) {
+      abs_ef = error_threshold / (abs_ef + 1e-10f);
+      ef[0][i] *= abs_ef;
+      ef[1][i] *= abs_ef;
+    }
+
+    // Stepsize factor
+    ef[0][i] *= mu;
+    ef[1][i] *= mu;
+  }
+}
+
+static void FilterAdaptationNEON(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    int j;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // Process the whole array...
+    for (j = 0; j < PART_LEN; j += 4) {
+      // Load xfBuf and ef.
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
+      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
+      // Calculate the product of conjugate(xfBuf) by ef.
+      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
+      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
+      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
+      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+      // Interleave real and imaginary parts.
+      const float32x4x2_t g_n_h = vzipq_f32(e, f);
+      // Store
+      vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
+      vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
+    }
+    // ... and fixup the first imaginary entry.
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      const float scale = 2.0f / PART_LEN2;
+      const float32x4_t scale_ps = vmovq_n_f32(scale);
+      for (j = 0; j < PART_LEN; j += 4) {
+        const float32x4_t fft_ps = vld1q_f32(&fft[j]);
+        const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
+        vst1q_f32(&fft[j], fft_scale);
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    {
+      const float wt1 = aec->wfBuf[1][pos];
+      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      for (j = 0; j < PART_LEN; j += 4) {
+        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+        const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
+        const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
+        const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
+        wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
+        wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
+
+        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
+        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+      }
+      aec->wfBuf[1][pos] = wt1;
+    }
+  }
+}
+
+static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
+  // a^b = exp2(b * log2(a))
+  //   exp2(x) and log2(x) are calculated using polynomial approximations.
+  float32x4_t log2_a, b_log2_a, a_exp_b;
+
+  // Calculate log2(x), x = a.
+  {
+    // To calculate log2(x), we decompose x like this:
+    //   x = y * 2^n
+    //     n is an integer
+    //     y is in the [1.0, 2.0) range
+    //
+    //   log2(x) = log2(y) + n
+    //     n       can be evaluated by playing with float representation.
+    //     log2(y) in a small range can be approximated, this code uses an order
+    //             five polynomial approximation. The coefficients have been
+    //             estimated with the Remez algorithm and the resulting
+    //             polynomial has a maximum relative error of 0.00086%.
+
+    // Compute n.
+    //    This is done by masking the exponent, shifting it into the top bit of
+    //    the mantissa, putting eight into the biased exponent (to shift/
+    //    compensate the fact that the exponent has been shifted in the top/
+    //    fractional part and finally getting rid of the implicit leading one
+    //    from the mantissa by substracting it out.
+    const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
+    const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
+    const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
+    const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
+                                       vec_float_exponent_mask);
+    const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
+    const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
+    const float32x4_t n =
+        vsubq_f32(vreinterpretq_f32_u32(n_0),
+                  vreinterpretq_f32_u32(vec_implicit_leading_one));
+    // Compute y.
+    const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
+    const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
+    const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
+                                          vec_mantissa_mask);
+    const float32x4_t y =
+        vreinterpretq_f32_u32(vorrq_u32(mantissa,
+                                        vec_zero_biased_exponent_is_one));
+    // Approximate log2(y) ~= (y - 1) * pol5(y).
+    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+    const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
+    const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
+    const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
+    const float32x4_t C2 = vdupq_n_f32(2.5988452f);
+    const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
+    const float32x4_t C0 = vdupq_n_f32(3.1157899f);
+    float32x4_t pol5_y = C5;
+    pol5_y = vmlaq_f32(C4, y, pol5_y);
+    pol5_y = vmlaq_f32(C3, y, pol5_y);
+    pol5_y = vmlaq_f32(C2, y, pol5_y);
+    pol5_y = vmlaq_f32(C1, y, pol5_y);
+    pol5_y = vmlaq_f32(C0, y, pol5_y);
+    const float32x4_t y_minus_one =
+        vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
+    const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
+
+    // Combine parts.
+    log2_a = vaddq_f32(n, log2_y);
+  }
+
+  // b * log2(a)
+  b_log2_a = vmulq_f32(b, log2_a);
+
+  // Calculate exp2(x), x = b * log2(a).
+  {
+    // To calculate 2^x, we decompose x like this:
+    //   x = n + y
+    //     n is an integer, the value of x - 0.5 rounded down, therefore
+    //     y is in the [0.5, 1.5) range
+    //
+    //   2^x = 2^n * 2^y
+    //     2^n can be evaluated by playing with float representation.
+    //     2^y in a small range can be approximated, this code uses an order two
+    //         polynomial approximation. The coefficients have been estimated
+    //         with the Remez algorithm and the resulting polynomial has a
+    //         maximum relative error of 0.17%.
+    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+    const float32x4_t max_input = vdupq_n_f32(129.f);
+    const float32x4_t min_input = vdupq_n_f32(-126.99999f);
+    const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
+    const float32x4_t x_max = vmaxq_f32(x_min, min_input);
+    // Compute n.
+    const float32x4_t half = vdupq_n_f32(0.5f);
+    const float32x4_t x_minus_half = vsubq_f32(x_max, half);
+    const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
+
+    // Compute 2^n.
+    const int32x4_t float_exponent_bias = vdupq_n_s32(127);
+    const int32x4_t two_n_exponent =
+        vaddq_s32(x_minus_half_floor, float_exponent_bias);
+    const float32x4_t two_n =
+        vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
+    // Compute y.
+    const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
+
+    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+    const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
+    const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
+    const float32x4_t C0 = vdupq_n_f32(1.0017247f);
+    float32x4_t exp2_y = C2;
+    exp2_y = vmlaq_f32(C1, y, exp2_y);
+    exp2_y = vmlaq_f32(C0, y, exp2_y);
+
+    // Combine parts.
+    a_exp_b = vmulq_f32(exp2_y, two_n);
+  }
+
+  return a_exp_b;
+}
+
+static void OverdriveAndSuppressNEON(AecCore* aec,
+                                     float hNl[PART_LEN1],
+                                     const float hNlFb,
+                                     float efw[2][PART_LEN1]) {
+  int i;
+  const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
+  const float32x4_t vec_one = vdupq_n_f32(1.0f);
+  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
+  const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
+
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Weight subbands
+    float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
+    const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
+    const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
+    const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
+                                                        vec_hNlFb);
+    const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
+    const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
+                                                          vec_hNl);
+    const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
+                                         vreinterpretq_u32_f32(vec_hNl));
+    const float32x4_t vec_one_weightCurve_add =
+        vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
+    const uint32x4_t vec_if1 =
+        vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
+
+    vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
+
+    {
+      const float32x4_t vec_overDriveCurve =
+          vld1q_f32(&WebRtcAec_overDriveCurve[i]);
+      const float32x4_t vec_overDriveSm_overDriveCurve =
+          vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
+      vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
+      vst1q_f32(&hNl[i], vec_hNl);
+    }
+
+    // Suppress error signal
+    {
+      float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
+      float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
+      vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
+
+      // Ooura fft returns incorrect sign on imaginary component. It matters
+      // here because we are making an additive change with comfort noise.
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
+      vst1q_f32(&efw[0][i], vec_efw_re);
+      vst1q_f32(&efw[1][i], vec_efw_im);
+    }
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
+    }
+    {
+      float32x2_t vec_total;
+      // A B C D
+      vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
+      // A+B C+D
+      vec_total = vpadd_f32(vec_total, vec_total);
+      // A+B+C+D A+B+C+D
+      wfEn = vget_lane_f32(vec_total, 0);
+    }
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const float32x4_t vec_15 =  vdupq_n_f32(WebRtcAec_kMinFarendPSD);
+  float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
+  float32x4_t vec_seSum = vdupq_n_f32(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
+    const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
+    const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
+    const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
+    const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
+    const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
+    float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
+    float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
+    float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
+    float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
+    float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
+    float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
+
+    vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
+    vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
+    vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
+    vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
+    vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
+    vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
+    vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
+
+    vst1q_f32(&aec->sd[i], vec_sd);
+    vst1q_f32(&aec->se[i], vec_se);
+    vst1q_f32(&aec->sx[i], vec_sx);
+
+    {
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
+      float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
+      vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
+      vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
+      vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
+      vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
+      vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
+      vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sde[i][0], vec_sde);
+    }
+
+    {
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
+      float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
+      vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
+      vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
+      vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
+      vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
+      vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
+      vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sxd[i][0], vec_sxd);
+    }
+
+    vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
+    vec_seSum = vaddq_f32(vec_seSum, vec_se);
+  }
+  {
+    float32x2_t vec_sdSum_total;
+    float32x2_t vec_seSum_total;
+    // A B C D
+    vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
+                                vget_high_f32(vec_sdSum));
+    vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
+                                vget_high_f32(vec_seSum));
+    // A+B C+D
+    vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
+    vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
+    // A+B+C+D A+B+C+D
+    sdSum = vget_lane_f32(vec_sdSum_total, 0);
+    seSum = vget_lane_f32(vec_seSum_total, 0);
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
+    const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
+    const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    float32x4_t vec_sqrtHanning_rev =
+        vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // B A D C
+    vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
+    // D C B A
+    vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
+                                       vget_low_f32(vec_sqrtHanning_rev));
+    vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
+    vst1q_f32(&x_windowed[PART_LEN + i],
+            vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
+    vst1q_f32(&data_complex[0][i], vec_data.val[0]);
+    vst1q_f32(&data_complex[1][i], vec_data.val[1]);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceNEON(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const float32x4_t vec_1eminus10 =  vdupq_n_f32(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
+      const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
+      const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
+      const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
+      const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
+      float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
+      vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
+      vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
+      vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
+      vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
+
+      vst1q_f32(&cohde[i], vec_cohde);
+      vst1q_f32(&cohxd[i], vec_cohxd);
+    }
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    cohde[i] =
+        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+        (aec->sd[i] * aec->se[i] + 1e-10f);
+    cohxd[i] =
+        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+        (aec->sx[i] * aec->sd[i] + 1e-10f);
+  }
+}
+
+void WebRtcAec_InitAec_neon(void) {
+  WebRtcAec_FilterFar = FilterFarNEON;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
+  WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
+}
+
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@ -0,0 +1,731 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, SSE2 version of speed-critical functions.
+ */
+
+#include <emmintrin.h>
+#include <math.h>
+#include <string.h>  // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
+    }
+
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+      const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+      const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
+      const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
+      const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
+      const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
+      const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
+      const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
+      const __m128 e = _mm_sub_ps(a, b);
+      const __m128 f = _mm_add_ps(c, d);
+      const __m128 g = _mm_add_ps(yf_re, e);
+      const __m128 h = _mm_add_ps(yf_im, f);
+      _mm_storeu_ps(&yf[0][j], g);
+      _mm_storeu_ps(&yf[1][j], h);
+    }
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
+  const __m128 k1e_10f = _mm_set1_ps(1e-10f);
+  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
+                                                  : _mm_set1_ps(aec->normal_mu);
+  const __m128 kThresh = aec->extended_filter_enabled
+                             ? _mm_set1_ps(kExtendedErrorThreshold)
+                             : _mm_set1_ps(aec->normal_error_threshold);
+
+  int i;
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
+    const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
+    const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
+
+    const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
+    __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
+    __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
+    const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
+    const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
+    const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
+    const __m128 absEf = _mm_sqrt_ps(ef_sum2);
+    const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
+    __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
+    const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
+    __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
+    __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
+    ef_re_if = _mm_and_ps(bigger, ef_re_if);
+    ef_im_if = _mm_and_ps(bigger, ef_im_if);
+    ef_re = _mm_andnot_ps(bigger, ef_re);
+    ef_im = _mm_andnot_ps(bigger, ef_im);
+    ef_re = _mm_or_ps(ef_re, ef_re_if);
+    ef_im = _mm_or_ps(ef_im, ef_im_if);
+    ef_re = _mm_mul_ps(ef_re, kMu);
+    ef_im = _mm_mul_ps(ef_im, kMu);
+
+    _mm_storeu_ps(&ef[0][i], ef_re);
+    _mm_storeu_ps(&ef[1][i], ef_im);
+  }
+  // scalar code for the remaining items.
+  {
+    const float mu =
+        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+    const float error_threshold = aec->extended_filter_enabled
+                                      ? kExtendedErrorThreshold
+                                      : aec->normal_error_threshold;
+    for (; i < (PART_LEN1); i++) {
+      float abs_ef;
+      ef[0][i] /= (aec->xPow[i] + 1e-10f);
+      ef[1][i] /= (aec->xPow[i] + 1e-10f);
+      abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+      if (abs_ef > error_threshold) {
+        abs_ef = error_threshold / (abs_ef + 1e-10f);
+        ef[0][i] *= abs_ef;
+        ef[1][i] *= abs_ef;
+      }
+
+      // Stepsize factor
+      ef[0][i] *= mu;
+      ef[1][i] *= mu;
+    }
+  }
+}
+
+static void FilterAdaptationSSE2(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
+  int i, j;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // Process the whole array...
+    for (j = 0; j < PART_LEN; j += 4) {
+      // Load xfBuf and ef.
+      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
+      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
+      const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
+      const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
+      // Calculate the product of conjugate(xfBuf) by ef.
+      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
+      const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
+      const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
+      const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
+      const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
+      const __m128 e = _mm_add_ps(a, b);
+      const __m128 f = _mm_sub_ps(c, d);
+      // Interleave real and imaginary parts.
+      const __m128 g = _mm_unpacklo_ps(e, f);
+      const __m128 h = _mm_unpackhi_ps(e, f);
+      // Store
+      _mm_storeu_ps(&fft[2 * j + 0], g);
+      _mm_storeu_ps(&fft[2 * j + 4], h);
+    }
+    // ... and fixup the first imaginary entry.
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      const __m128 scale_ps = _mm_load_ps1(&scale);
+      for (j = 0; j < PART_LEN; j += 4) {
+        const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
+        const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
+        _mm_storeu_ps(&fft[j], fft_scale);
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    {
+      float wt1 = aec->wfBuf[1][pos];
+      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      for (j = 0; j < PART_LEN; j += 4) {
+        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+        const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
+        const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
+        const __m128 fft_re =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
+        const __m128 fft_im =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
+        wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
+        wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
+        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
+        _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
+      }
+      aec->wfBuf[1][pos] = wt1;
+    }
+  }
+}
+
+static __m128 mm_pow_ps(__m128 a, __m128 b) {
+  // a^b = exp2(b * log2(a))
+  //   exp2(x) and log2(x) are calculated using polynomial approximations.
+  __m128 log2_a, b_log2_a, a_exp_b;
+
+  // Calculate log2(x), x = a.
+  {
+    // To calculate log2(x), we decompose x like this:
+    //   x = y * 2^n
+    //     n is an integer
+    //     y is in the [1.0, 2.0) range
+    //
+    //   log2(x) = log2(y) + n
+    //     n       can be evaluated by playing with float representation.
+    //     log2(y) in a small range can be approximated, this code uses an order
+    //             five polynomial approximation. The coefficients have been
+    //             estimated with the Remez algorithm and the resulting
+    //             polynomial has a maximum relative error of 0.00086%.
+
+    // Compute n.
+    //    This is done by masking the exponent, shifting it into the top bit of
+    //    the mantissa, putting eight into the biased exponent (to shift/
+    //    compensate the fact that the exponent has been shifted in the top/
+    //    fractional part and finally getting rid of the implicit leading one
+    //    from the mantissa by substracting it out.
+    static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
+        0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
+    static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
+        0x43800000, 0x43800000, 0x43800000, 0x43800000};
+    static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
+        0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
+    static const int shift_exponent_into_top_mantissa = 8;
+    const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
+    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
+        _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
+    const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
+    const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
+
+    // Compute y.
+    static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
+        0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
+    static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
+        0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
+    const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
+    const __m128 y =
+        _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
+
+    // Approximate log2(y) ~= (y - 1) * pol5(y).
+    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+    static const ALIGN16_BEG float ALIGN16_END C5[4] = {
+        -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
+    const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
+    const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
+    const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
+    const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
+    const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
+    const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
+    const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
+    const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
+    const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
+    const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
+    const __m128 y_minus_one =
+        _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
+    const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
+
+    // Combine parts.
+    log2_a = _mm_add_ps(n, log2_y);
+  }
+
+  // b * log2(a)
+  b_log2_a = _mm_mul_ps(b, log2_a);
+
+  // Calculate exp2(x), x = b * log2(a).
+  {
+    // To calculate 2^x, we decompose x like this:
+    //   x = n + y
+    //     n is an integer, the value of x - 0.5 rounded down, therefore
+    //     y is in the [0.5, 1.5) range
+    //
+    //   2^x = 2^n * 2^y
+    //     2^n can be evaluated by playing with float representation.
+    //     2^y in a small range can be approximated, this code uses an order two
+    //         polynomial approximation. The coefficients have been estimated
+    //         with the Remez algorithm and the resulting polynomial has a
+    //         maximum relative error of 0.17%.
+
+    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+    static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
+                                                               129.f, 129.f};
+    static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
+        -126.99999f, -126.99999f, -126.99999f, -126.99999f};
+    const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
+    const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
+    // Compute n.
+    static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
+                                                          0.5f, 0.5f};
+    const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
+    const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
+    // Compute 2^n.
+    static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
+        127, 127, 127, 127};
+    static const int float_exponent_shift = 23;
+    const __m128i two_n_exponent =
+        _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
+    const __m128 two_n =
+        _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
+    // Compute y.
+    const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
+    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+    static const ALIGN16_BEG float C2[4] ALIGN16_END = {
+        3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
+    static const ALIGN16_BEG float C1[4] ALIGN16_END = {
+        6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
+    static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
+                                                        1.0017247f, 1.0017247f};
+    const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
+    const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
+    const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
+    const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
+
+    // Combine parts.
+    a_exp_b = _mm_mul_ps(exp2_y, two_n);
+  }
+  return a_exp_b;
+}
+
+static void OverdriveAndSuppressSSE2(AecCore* aec,
+                                     float hNl[PART_LEN1],
+                                     const float hNlFb,
+                                     float efw[2][PART_LEN1]) {
+  int i;
+  const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
+  const __m128 vec_one = _mm_set1_ps(1.0f);
+  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
+  const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Weight subbands
+    __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
+    const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
+    const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
+    const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
+    const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
+    const __m128 vec_one_weightCurve_hNl =
+        _mm_mul_ps(vec_one_weightCurve, vec_hNl);
+    const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
+    const __m128 vec_if1 = _mm_and_ps(
+        bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
+    vec_hNl = _mm_or_ps(vec_if0, vec_if1);
+
+    {
+      const __m128 vec_overDriveCurve =
+          _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
+      const __m128 vec_overDriveSm_overDriveCurve =
+          _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
+      vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
+      _mm_storeu_ps(&hNl[i], vec_hNl);
+    }
+
+    // Suppress error signal
+    {
+      __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
+      __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
+      vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
+      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
+
+      // Ooura fft returns incorrect sign on imaginary component. It matters
+      // here because we are making an additive change with comfort noise.
+      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
+      _mm_storeu_ps(&efw[0][i], vec_efw_re);
+      _mm_storeu_ps(&efw[1][i], vec_efw_im);
+    }
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
+  // A+B C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
+  // A+B+C+D A+B+C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
+  _mm_store_ss(dst, sum);
+}
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    __m128 vec_wfEn = _mm_set1_ps(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+      const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
+    }
+    _mm_add_ps_4x1(vec_wfEn, &wfEn);
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const __m128 vec_15 =  _mm_set1_ps(WebRtcAec_kMinFarendPSD);
+  const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
+  const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
+  __m128 vec_sdSum = _mm_set1_ps(0.0f);
+  __m128 vec_seSum = _mm_set1_ps(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
+    const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
+    const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
+    const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
+    const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
+    const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
+    __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
+    __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
+    __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
+    __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
+    __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
+    __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
+    vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
+    vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
+    vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
+    vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
+    vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
+    vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
+    vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
+    _mm_storeu_ps(&aec->sd[i], vec_sd);
+    _mm_storeu_ps(&aec->se[i], vec_se);
+    _mm_storeu_ps(&aec->sx[i], vec_sx);
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
+      __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_efw1));
+      vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_efw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
+      __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw1));
+      vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
+    vec_seSum = _mm_add_ps(vec_seSum, vec_se);
+  }
+
+  _mm_add_ps_4x1(vec_sdSum, &sdSum);
+  _mm_add_ps_4x1(vec_seSum, &seSum);
+
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
+    const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
+    const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    __m128 vec_sqrtHanning_rev =
+        _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // D C B A
+    vec_sqrtHanning_rev =
+        _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
+                       _MM_SHUFFLE(0, 1, 2, 3));
+    _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
+    _mm_storeu_ps(&x_windowed[PART_LEN + i],
+                  _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
+    const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
+    const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(2, 0, 2, 0));
+    const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(3, 1, 3, 1));
+    _mm_storeu_ps(&data_complex[0][i], vec_a);
+    _mm_storeu_ps(&data_complex[1][i], vec_b);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceSSE2(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const __m128 vec_1eminus10 =  _mm_set1_ps(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
+      const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
+      const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
+      const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_se));
+      const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_sx));
+      const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
+      __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
+      vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
+      vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
+      vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
+      vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
+      _mm_storeu_ps(&cohde[i], vec_cohde);
+      _mm_storeu_ps(&cohxd[i], vec_cohxd);
+    }
+
+    // scalar code for the remaining items.
+    for (; i < PART_LEN1; i++) {
+      cohde[i] =
+          (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+          (aec->sd[i] * aec->se[i] + 1e-10f);
+      cohxd[i] =
+          (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+          (aec->sx[i] * aec->sd[i] + 1e-10f);
+    }
+  }
+}
+
+void WebRtcAec_InitAec_SSE2(void) {
+  WebRtcAec_FilterFar = FilterFarSSE2;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
+  WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.c
@ -0,0 +1,589 @@
+/*
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ * Copyright Takuya OOURA, 1996-2001
+ *
+ * You may use, copy, modify and distribute this code for any purpose (include
+ * commercial use) and without fee. Please refer to this package when you modify
+ * this code.
+ *
+ * Changes by the WebRTC authors:
+ *    - Trivial type modifications.
+ *    - Minimal code subset to do rdft of length 128.
+ *    - Optimizations because of known length.
+ *
+ *  All changes are covered by the WebRTC license and IP grant:
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <math.h>
+
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
+// to see the initialization code.
+const float rdft_w[64] = {
+    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
+    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
+    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
+    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
+    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
+    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
+    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
+    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
+    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
+    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
+    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
+    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
+    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+const float rdft_wk3ri_first[16] = {
+    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
+    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
+    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
+    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
+    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
+    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
+    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
+    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
+    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
+    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
+    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
+    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
+    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
+    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
+    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
+    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
+    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
+    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
+    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
+    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
+    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
+    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
+    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
+    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
+    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
+    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
+    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
+    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
+
+static void bitrv2_128_C(float* a) {
+  /*
+      Following things have been attempted but are no faster:
+      (a) Storing the swap indexes in a LUT (index calculations are done
+          for 'free' while waiting on memory/L1).
+      (b) Consolidate the load/store of two consecutive floats by a 64 bit
+          integer (execution is memory/L1 bound).
+      (c) Do a mix of floats and 64 bit integer to maximize register
+          utilization (execution is memory/L1 bound).
+      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+      (e) Hard-coding of the offsets to completely eliminates index
+          calculations.
+  */
+
+  unsigned int j, j1, k, k1;
+  float xr, xi, yr, yi;
+
+  static const int ip[4] = {0, 64, 32, 96};
+  for (k = 0; k < 4; k++) {
+    for (j = 0; j < k; j++) {
+      j1 = 2 * j + ip[k];
+      k1 = 2 * k + ip[j];
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 -= 8;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+    }
+    j1 = 2 * k + 8 + ip[k];
+    k1 = j1 + 8;
+    xr = a[j1 + 0];
+    xi = a[j1 + 1];
+    yr = a[k1 + 0];
+    yi = a[k1 + 1];
+    a[j1 + 0] = yr;
+    a[j1 + 1] = yi;
+    a[k1 + 0] = xr;
+    a[k1 + 1] = xi;
+  }
+}
+
+static void cft1st_128_C(float* a) {
+  const int n = 128;
+  int j, k1, k2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  // The processing of the first set of elements was simplified in C to avoid
+  // some operations (multiplication by zero or one, addition of two elements
+  // multiplied by the same weight, ...).
+  x0r = a[0] + a[2];
+  x0i = a[1] + a[3];
+  x1r = a[0] - a[2];
+  x1i = a[1] - a[3];
+  x2r = a[4] + a[6];
+  x2i = a[5] + a[7];
+  x3r = a[4] - a[6];
+  x3i = a[5] - a[7];
+  a[0] = x0r + x2r;
+  a[1] = x0i + x2i;
+  a[4] = x0r - x2r;
+  a[5] = x0i - x2i;
+  a[2] = x1r - x3i;
+  a[3] = x1i + x3r;
+  a[6] = x1r + x3i;
+  a[7] = x1i - x3r;
+  wk1r = rdft_w[2];
+  x0r = a[8] + a[10];
+  x0i = a[9] + a[11];
+  x1r = a[8] - a[10];
+  x1i = a[9] - a[11];
+  x2r = a[12] + a[14];
+  x2i = a[13] + a[15];
+  x3r = a[12] - a[14];
+  x3i = a[13] - a[15];
+  a[8] = x0r + x2r;
+  a[9] = x0i + x2i;
+  a[12] = x2i - x0i;
+  a[13] = x0r - x2r;
+  x0r = x1r - x3i;
+  x0i = x1i + x3r;
+  a[10] = wk1r * (x0r - x0i);
+  a[11] = wk1r * (x0r + x0i);
+  x0r = x3i + x1r;
+  x0i = x3r - x1i;
+  a[14] = wk1r * (x0i - x0r);
+  a[15] = wk1r * (x0i + x0r);
+  k1 = 0;
+  for (j = 16; j < n; j += 16) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = rdft_w[k1 + 0];
+    wk2i = rdft_w[k1 + 1];
+    wk1r = rdft_w[k2 + 0];
+    wk1i = rdft_w[k2 + 1];
+    wk3r = rdft_wk3ri_first[k1 + 0];
+    wk3i = rdft_wk3ri_first[k1 + 1];
+    x0r = a[j + 0] + a[j + 2];
+    x0i = a[j + 1] + a[j + 3];
+    x1r = a[j + 0] - a[j + 2];
+    x1i = a[j + 1] - a[j + 3];
+    x2r = a[j + 4] + a[j + 6];
+    x2i = a[j + 5] + a[j + 7];
+    x3r = a[j + 4] - a[j + 6];
+    x3i = a[j + 5] - a[j + 7];
+    a[j + 0] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 4] = wk2r * x0r - wk2i * x0i;
+    a[j + 5] = wk2r * x0i + wk2i * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 2] = wk1r * x0r - wk1i * x0i;
+    a[j + 3] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 6] = wk3r * x0r - wk3i * x0i;
+    a[j + 7] = wk3r * x0i + wk3i * x0r;
+    wk1r = rdft_w[k2 + 2];
+    wk1i = rdft_w[k2 + 3];
+    wk3r = rdft_wk3ri_second[k1 + 0];
+    wk3i = rdft_wk3ri_second[k1 + 1];
+    x0r = a[j + 8] + a[j + 10];
+    x0i = a[j + 9] + a[j + 11];
+    x1r = a[j + 8] - a[j + 10];
+    x1i = a[j + 9] - a[j + 11];
+    x2r = a[j + 12] + a[j + 14];
+    x2i = a[j + 13] + a[j + 15];
+    x3r = a[j + 12] - a[j + 14];
+    x3i = a[j + 13] - a[j + 15];
+    a[j + 8] = x0r + x2r;
+    a[j + 9] = x0i + x2i;
+    x0r -= x2r;
+    x0i -= x2i;
+    a[j + 12] = -wk2i * x0r - wk2r * x0i;
+    a[j + 13] = -wk2i * x0i + wk2r * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j + 10] = wk1r * x0r - wk1i * x0i;
+    a[j + 11] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j + 14] = wk3r * x0r - wk3i * x0i;
+    a[j + 15] = wk3r * x0i + wk3i * x0r;
+  }
+}
+
+static void cftmdl_128_C(float* a) {
+  const int l = 8;
+  const int n = 128;
+  const int m = 32;
+  int j0, j1, j2, j3, k, k1, k2, m2;
+  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  for (j0 = 0; j0 < l; j0 += 2) {
+    j1 = j0 + 8;
+    j2 = j0 + 16;
+    j3 = j0 + 24;
+    x0r = a[j0 + 0] + a[j1 + 0];
+    x0i = a[j0 + 1] + a[j1 + 1];
+    x1r = a[j0 + 0] - a[j1 + 0];
+    x1i = a[j0 + 1] - a[j1 + 1];
+    x2r = a[j2 + 0] + a[j3 + 0];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2 + 0] - a[j3 + 0];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j0 + 0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j2 + 0] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1 + 0] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3 + 0] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+  wk1r = rdft_w[2];
+  for (j0 = m; j0 < l + m; j0 += 2) {
+    j1 = j0 + 8;
+    j2 = j0 + 16;
+    j3 = j0 + 24;
+    x0r = a[j0 + 0] + a[j1 + 0];
+    x0i = a[j0 + 1] + a[j1 + 1];
+    x1r = a[j0 + 0] - a[j1 + 0];
+    x1i = a[j0 + 1] - a[j1 + 1];
+    x2r = a[j2 + 0] + a[j3 + 0];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2 + 0] - a[j3 + 0];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j0 + 0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j2 + 0] = x2i - x0i;
+    a[j2 + 1] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j1 + 0] = wk1r * (x0r - x0i);
+    a[j1 + 1] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[j3 + 0] = wk1r * (x0i - x0r);
+    a[j3 + 1] = wk1r * (x0i + x0r);
+  }
+  k1 = 0;
+  m2 = 2 * m;
+  for (k = m2; k < n; k += m2) {
+    k1 += 2;
+    k2 = 2 * k1;
+    wk2r = rdft_w[k1 + 0];
+    wk2i = rdft_w[k1 + 1];
+    wk1r = rdft_w[k2 + 0];
+    wk1i = rdft_w[k2 + 1];
+    wk3r = rdft_wk3ri_first[k1 + 0];
+    wk3i = rdft_wk3ri_first[k1 + 1];
+    for (j0 = k; j0 < l + k; j0 += 2) {
+      j1 = j0 + 8;
+      j2 = j0 + 16;
+      j3 = j0 + 24;
+      x0r = a[j0 + 0] + a[j1 + 0];
+      x0i = a[j0 + 1] + a[j1 + 1];
+      x1r = a[j0 + 0] - a[j1 + 0];
+      x1i = a[j0 + 1] - a[j1 + 1];
+      x2r = a[j2 + 0] + a[j3 + 0];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2 + 0] - a[j3 + 0];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j0 + 0] = x0r + x2r;
+      a[j0 + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2 + 0] = wk2r * x0r - wk2i * x0i;
+      a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+    wk1r = rdft_w[k2 + 2];
+    wk1i = rdft_w[k2 + 3];
+    wk3r = rdft_wk3ri_second[k1 + 0];
+    wk3i = rdft_wk3ri_second[k1 + 1];
+    for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
+      j1 = j0 + 8;
+      j2 = j0 + 16;
+      j3 = j0 + 24;
+      x0r = a[j0 + 0] + a[j1 + 0];
+      x0i = a[j0 + 1] + a[j1 + 1];
+      x1r = a[j0 + 0] - a[j1 + 0];
+      x1i = a[j0 + 1] - a[j1 + 1];
+      x2r = a[j2 + 0] + a[j3 + 0];
+      x2i = a[j2 + 1] + a[j3 + 1];
+      x3r = a[j2 + 0] - a[j3 + 0];
+      x3i = a[j2 + 1] - a[j3 + 1];
+      a[j0 + 0] = x0r + x2r;
+      a[j0 + 1] = x0i + x2i;
+      x0r -= x2r;
+      x0i -= x2i;
+      a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
+      a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+      x0r = x1r - x3i;
+      x0i = x1i + x3r;
+      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
+      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+      x0r = x1r + x3i;
+      x0i = x1i - x3r;
+      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
+      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+    }
+  }
+}
+
+static void cftfsub_128_C(float* a) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+}
+
+static void cftbsub_128_C(float* a) {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = -a[j + 1] - a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = -a[j + 1] + a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i - x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i + x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i - x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i + x3r;
+  }
+}
+
+static void rftfsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr - wki * xi;
+    yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  a[1] = -a[1];
+  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr + wki * xi;
+    yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_forward_128(float* a) {
+  float xi;
+  bitrv2_128(a);
+  cftfsub_128(a);
+  rftfsub_128(a);
+  xi = a[0] - a[1];
+  a[0] += a[1];
+  a[1] = xi;
+}
+
+void aec_rdft_inverse_128(float* a) {
+  a[1] = 0.5f * (a[0] - a[1]);
+  a[0] -= a[1];
+  rftbsub_128(a);
+  bitrv2_128(a);
+  cftbsub_128(a);
+}
+
+// code path selection
+RftSub128 cft1st_128;
+RftSub128 cftmdl_128;
+RftSub128 rftfsub_128;
+RftSub128 rftbsub_128;
+RftSub128 cftfsub_128;
+RftSub128 cftbsub_128;
+RftSub128 bitrv2_128;
+
+void aec_rdft_init(void) {
+  cft1st_128 = cft1st_128_C;
+  cftmdl_128 = cftmdl_128_C;
+  rftfsub_128 = rftfsub_128_C;
+  rftbsub_128 = rftbsub_128_C;
+  cftfsub_128 = cftfsub_128_C;
+  cftbsub_128 = cftbsub_128_C;
+  bitrv2_128 = bitrv2_128_C;
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  if (WebRtc_GetCPUInfo(kSSE2)) {
+    aec_rdft_init_sse2();
+  }
+#endif
+#if defined(MIPS_FPU_LE)
+  aec_rdft_init_mips();
+#endif
+#if defined(WEBRTC_HAS_NEON)
+  aec_rdft_init_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    aec_rdft_init_neon();
+  }
+#endif
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft.h
@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+#include <emmintrin.h>
+static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
+static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
+#endif
+
+// Constants shared by all paths (C, SSE2, NEON).
+extern const float rdft_w[64];
+// Constants used by the C path.
+extern const float rdft_wk3ri_first[16];
+extern const float rdft_wk3ri_second[16];
+// Constants used by SSE2 and NEON but initialized in the C path.
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
+extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
+
+// code path selection function pointers
+typedef void (*RftSub128)(float* a);
+extern RftSub128 rftfsub_128;
+extern RftSub128 rftbsub_128;
+extern RftSub128 cft1st_128;
+extern RftSub128 cftmdl_128;
+extern RftSub128 cftfsub_128;
+extern RftSub128 cftbsub_128;
+extern RftSub128 bitrv2_128;
+
+// entry points
+void aec_rdft_init(void);
+void aec_rdft_init_sse2(void);
+void aec_rdft_forward_128(float* a);
+void aec_rdft_inverse_128(float* a);
+
+#if defined(MIPS_FPU_LE)
+void aec_rdft_init_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void aec_rdft_init_neon(void);
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
@ -0,0 +1,355 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The rdft AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on the sse2 version.
+ */
+
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <arm_neon.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_neon(float* a) {
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  int j, k2;
+
+  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+    float32x4_t a00v = vld1q_f32(&a[j + 0]);
+    float32x4_t a04v = vld1q_f32(&a[j + 4]);
+    float32x4_t a08v = vld1q_f32(&a[j + 8]);
+    float32x4_t a12v = vld1q_f32(&a[j + 12]);
+    float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
+    float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
+    float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
+    float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
+    const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
+    float32x4_t x0v = vaddq_f32(a01v, a23v);
+    const float32x4_t x1v = vsubq_f32(a01v, a23v);
+    const float32x4_t x2v = vaddq_f32(a45v, a67v);
+    const float32x4_t x3v = vsubq_f32(a45v, a67v);
+    const float32x4_t x3w = vrev64q_f32(x3v);
+    float32x4_t x0w;
+    a01v = vaddq_f32(x0v, x2v);
+    x0v = vsubq_f32(x0v, x2v);
+    x0w = vrev64q_f32(x0v);
+    a45v = vmulq_f32(wk2rv, x0v);
+    a45v = vmlaq_f32(a45v, wk2iv, x0w);
+    x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a23v = vmulq_f32(wk1rv, x0v);
+    a23v = vmlaq_f32(a23v, wk1iv, x0w);
+    x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a67v = vmulq_f32(wk3rv, x0v);
+    a67v = vmlaq_f32(a67v, wk3iv, x0w);
+    a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
+    a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
+    a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
+    a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
+    vst1q_f32(&a[j + 0], a00v);
+    vst1q_f32(&a[j + 4], a04v);
+    vst1q_f32(&a[j + 8], a08v);
+    vst1q_f32(&a[j + 12], a12v);
+  }
+}
+
+static void cftmdl_128_neon(float* a) {
+  int j;
+  const int l = 8;
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
+
+  for (j = 0; j < l; j += 2) {
+    const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+    const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+    const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+    const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+    const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+    const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+    const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+    const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+    const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+    const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+    const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+    const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+    const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+    const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+    const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+    const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+    const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+    const float32x4_t x1_x3_add =
+        vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x4_t x1_x3_sub =
+        vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
+    const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
+    const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
+    const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
+    const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
+    const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
+    const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
+    const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
+    const float32x4_t xx1_rev = vrev64q_f32(xx1);
+    const float32x4_t yy4_rev = vrev64q_f32(yy4);
+
+    vst1_f32(&a[j + 0], vget_low_f32(xx0));
+    vst1_f32(&a[j + 32], vget_high_f32(xx0));
+    vst1_f32(&a[j + 16], vget_low_f32(xx1));
+    vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
+
+    a[j + 48] = -a[j + 48];
+
+    vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
+    vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
+    vst1_f32(&a[j + 40], vget_low_f32(yy4));
+    vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
+  }
+
+  {
+    const int k = 64;
+    const int k1 = 2;
+    const int k2 = 2 * k1;
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
+    wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
+    for (j = k; j < l + k; j += 2) {
+      const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+      const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+      const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+      const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+      const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+      const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+      const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+      const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+      const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+      const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+      const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+      const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+      const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+      const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+      const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+      const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+      const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+      const float32x4_t x1_x3_add =
+          vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      const float32x4_t x1_x3_sub =
+          vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
+      float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
+      float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
+      xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
+      xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
+      xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
+
+      vst1_f32(&a[j + 0], vget_low_f32(xx));
+      vst1_f32(&a[j + 32], vget_high_f32(xx));
+      vst1_f32(&a[j + 16], vget_low_f32(xx4));
+      vst1_f32(&a[j + 48], vget_high_f32(xx4));
+      vst1_f32(&a[j + 8], vget_low_f32(xx12));
+      vst1_f32(&a[j + 40], vget_high_f32(xx12));
+      vst1_f32(&a[j + 24], vget_low_f32(xx22));
+      vst1_f32(&a[j + 56], vget_high_f32(xx22));
+    }
+  }
+}
+
+__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
+  // A B C D -> C D A B
+  const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
+  // C D A B -> D C B A
+  return vrev64q_f32(rev);
+}
+
+static void rftfsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  // Vectorized code (four at once).
+  // Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);          //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);     // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);   // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt);  // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                       //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vsubq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vaddq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
+    // Shuffle in right order and store.
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr - wki * xi;
+    const float yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  a[1] = -a[1];
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);         //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);    // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);  // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                      //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vaddq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vsubq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
+    // Shuffle in right order and store.
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr + wki * xi;
+    const float yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_init_neon(void) {
+  cft1st_128 = cft1st_128_neon;
+  cftmdl_128 = cftmdl_128_neon;
+  rftfsub_128 = rftfsub_128_neon;
+  rftbsub_128 = rftbsub_128_neon;
+}
+
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
@ -0,0 +1,427 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <emmintrin.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_SSE2(float* a) {
+  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+  int j, k2;
+
+  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+    __m128 a00v = _mm_loadu_ps(&a[j + 0]);
+    __m128 a04v = _mm_loadu_ps(&a[j + 4]);
+    __m128 a08v = _mm_loadu_ps(&a[j + 8]);
+    __m128 a12v = _mm_loadu_ps(&a[j + 12]);
+    __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
+    __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
+
+    const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
+    __m128 x0v = _mm_add_ps(a01v, a23v);
+    const __m128 x1v = _mm_sub_ps(a01v, a23v);
+    const __m128 x2v = _mm_add_ps(a45v, a67v);
+    const __m128 x3v = _mm_sub_ps(a45v, a67v);
+    __m128 x0w;
+    a01v = _mm_add_ps(x0v, x2v);
+    x0v = _mm_sub_ps(x0v, x2v);
+    x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+    {
+      const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
+      const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
+      a45v = _mm_add_ps(a45_0v, a45_1v);
+    }
+    {
+      __m128 a23_0v, a23_1v;
+      const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
+      x0v = _mm_add_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+      a23_0v = _mm_mul_ps(wk1rv, x0v);
+      a23_1v = _mm_mul_ps(wk1iv, x0w);
+      a23v = _mm_add_ps(a23_0v, a23_1v);
+
+      x0v = _mm_sub_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+    }
+    {
+      const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
+      const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
+      a67v = _mm_add_ps(a67_0v, a67_1v);
+    }
+
+    a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
+    a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
+    a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
+    a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
+    _mm_storeu_ps(&a[j + 0], a00v);
+    _mm_storeu_ps(&a[j + 4], a04v);
+    _mm_storeu_ps(&a[j + 8], a08v);
+    _mm_storeu_ps(&a[j + 12], a12v);
+  }
+}
+
+static void cftmdl_128_SSE2(float* a) {
+  const int l = 8;
+  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
+  int j0;
+
+  __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
+  for (j0 = 0; j0 < l; j0 += 2) {
+    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+    const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                          _mm_castsi128_ps(a_32),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                          _mm_castsi128_ps(a_40),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+    const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+    const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+    const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                          _mm_castsi128_ps(a_48),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                          _mm_castsi128_ps(a_56),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+    const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+    const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+
+    const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+        _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+    const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+    const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+    const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+    const __m128 yy0 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
+    const __m128 yy1 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
+    const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
+    const __m128 yy3 = _mm_add_ps(yy0, yy2);
+    const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 32],
+        _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 48],
+        _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
+    a[j0 + 48] = -a[j0 + 48];
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
+    _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 56],
+        _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
+  }
+
+  {
+    int k = 64;
+    int k1 = 2;
+    int k2 = 2 * k1;
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
+    wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
+    for (j0 = k; j0 < l + k; j0 += 2) {
+      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
+      const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+      const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                            _mm_castsi128_ps(a_32),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                            _mm_castsi128_ps(a_40),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+      const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+
+      const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+      const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+      const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+      const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                            _mm_castsi128_ps(a_48),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                            _mm_castsi128_ps(a_56),
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+      const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+
+      const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
+      const __m128 xx3 =
+          _mm_mul_ps(wk2iv,
+                     _mm_castsi128_ps(_mm_shuffle_epi32(
+                         _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx4 = _mm_add_ps(xx2, xx3);
+
+      const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+          _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+      const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+      const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+
+      const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
+      const __m128 xx11 = _mm_mul_ps(
+          wk1iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx12 = _mm_add_ps(xx10, xx11);
+
+      const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
+      const __m128 xx21 = _mm_mul_ps(
+          wk3iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx22 = _mm_add_ps(xx20, xx21);
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 32],
+          _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 48],
+          _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 40],
+          _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 56],
+          _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
+    }
+  }
+}
+
+static void rftfsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  const __m128 mm_half = _mm_load_ps(k_half);
+
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
+    const __m128 wkr_ =
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
+    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
+    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
+    // Calculate 'x'.
+    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+    const __m128 b_ = _mm_mul_ps(wki_, xi_);
+    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+    const __m128 d_ = _mm_mul_ps(wki_, xr_);
+    const __m128 yr_ = _mm_sub_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_add_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] -= yr;
+                                            //    a[j2 + 1] -= yi;
+                                            //    a[k2 + 0] += yr;
+    //    a[k2 + 1] -= yi;
+    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
+    const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_);  //   3,   5,   7,   9,
+    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
+    const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_);  // 127, 125, 123, 121,
+    // Shuffle in right order and store.
+    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+    //   2,   3,   4,   5,
+    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+    //   6,   7,   8,   9,
+    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+    // 122, 123, 120, 121,
+    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+  }
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr - wki * xi;
+    yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2, k1, k2;
+  float wkr, wki, xr, xi, yr, yi;
+
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  const __m128 mm_half = _mm_load_ps(k_half);
+
+  a[1] = -a[1];
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
+    const __m128 wkr_ =
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
+    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
+    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
+    // Calculate 'x'.
+    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr + wki * xi;
+    //    yi = wkr * xi - wki * xr;
+    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
+    const __m128 b_ = _mm_mul_ps(wki_, xi_);
+    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
+    const __m128 d_ = _mm_mul_ps(wki_, xr_);
+    const __m128 yr_ = _mm_add_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_sub_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] = a[j2 + 0] - yr;
+                                            //    a[j2 + 1] = yi - a[j2 + 1];
+                                            //    a[k2 + 0] = yr + a[k2 + 0];
+    //    a[k2 + 1] = yi - a[k2 + 1];
+    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
+    const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1);  //   3,   5,   7,   9,
+    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
+    const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1);  // 127, 125, 123, 121,
+    // Shuffle in right order and store.
+    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
+    //   2,   3,   4,   5,
+    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
+    //   6,   7,   8,   9,
+    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
+    // 122, 123, 120, 121,
+    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
+    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
+    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
+  }
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    k2 = 128 - j2;
+    k1 = 32 - j1;
+    wkr = 0.5f - c[k1];
+    wki = c[j1];
+    xr = a[j2 + 0] - a[k2 + 0];
+    xi = a[j2 + 1] + a[k2 + 1];
+    yr = wkr * xr + wki * xi;
+    yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_init_sse2(void) {
+  cft1st_128 = cft1st_128_SSE2;
+  cftmdl_128 = cftmdl_128_SSE2;
+  rftfsub_128 = rftfsub_128_SSE2;
+  rftbsub_128 = rftbsub_128_SSE2;
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.c
@ -0,0 +1,209 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
+ * clock skew by resampling the farend signal.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kEstimateLengthFrames = 400
+};
+
+typedef struct {
+  float buffer[kResamplerBufferSize];
+  float position;
+
+  int deviceSampleRateHz;
+  int skewData[kEstimateLengthFrames];
+  int skewDataIndex;
+  float skewEstimate;
+} AecResampler;
+
+static int EstimateSkew(const int* rawSkew,
+                        int size,
+                        int absLimit,
+                        float* skewEst);
+
+void* WebRtcAec_CreateResampler() {
+  return malloc(sizeof(AecResampler));
+}
+
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  memset(obj->buffer, 0, sizeof(obj->buffer));
+  obj->position = 0.0;
+
+  obj->deviceSampleRateHz = deviceSampleRateHz;
+  memset(obj->skewData, 0, sizeof(obj->skewData));
+  obj->skewDataIndex = 0;
+  obj->skewEstimate = 0.0;
+
+  return 0;
+}
+
+void WebRtcAec_FreeResampler(void* resampInst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  free(obj);
+}
+
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out) {
+  AecResampler* obj = (AecResampler*)resampInst;
+
+  float* y;
+  float be, tnew;
+  size_t tn, mm;
+
+  assert(size <= 2 * FRAME_LEN);
+  assert(resampInst != NULL);
+  assert(inspeech != NULL);
+  assert(outspeech != NULL);
+  assert(size_out != NULL);
+
+  // Add new frame data in lookahead
+  memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
+         inspeech,
+         size * sizeof(inspeech[0]));
+
+  // Sample rate ratio
+  be = 1 + skew;
+
+  // Loop over input frame
+  mm = 0;
+  y = &obj->buffer[FRAME_LEN];  // Point at current frame
+
+  tnew = be * mm + obj->position;
+  tn = (size_t)tnew;
+
+  while (tn < size) {
+
+    // Interpolation
+    outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
+    mm++;
+
+    tnew = be * mm + obj->position;
+    tn = (int)tnew;
+  }
+
+  *size_out = mm;
+  obj->position += (*size_out) * be - size;
+
+  // Shift buffer
+  memmove(obj->buffer,
+          &obj->buffer[size],
+          (kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
+}
+
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  int err = 0;
+
+  if (obj->skewDataIndex < kEstimateLengthFrames) {
+    obj->skewData[obj->skewDataIndex] = rawSkew;
+    obj->skewDataIndex++;
+  } else if (obj->skewDataIndex == kEstimateLengthFrames) {
+    err = EstimateSkew(
+        obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
+    obj->skewEstimate = *skewEst;
+    obj->skewDataIndex++;
+  } else {
+    *skewEst = obj->skewEstimate;
+  }
+
+  return err;
+}
+
+int EstimateSkew(const int* rawSkew,
+                 int size,
+                 int deviceSampleRateHz,
+                 float* skewEst) {
+  const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
+  const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
+  int i = 0;
+  int n = 0;
+  float rawAvg = 0;
+  float err = 0;
+  float rawAbsDev = 0;
+  int upperLimit = 0;
+  int lowerLimit = 0;
+  float cumSum = 0;
+  float x = 0;
+  float x2 = 0;
+  float y = 0;
+  float xy = 0;
+  float xAvg = 0;
+  float denom = 0;
+  float skew = 0;
+
+  *skewEst = 0;  // Set in case of error below.
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      n++;
+      rawAvg += rawSkew[i];
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  rawAvg /= n;
+
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      err = rawSkew[i] - rawAvg;
+      rawAbsDev += err >= 0 ? err : -err;
+    }
+  }
+  assert(n > 0);
+  rawAbsDev /= n;
+  upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1);  // +1 for ceiling.
+  lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1);  // -1 for floor.
+
+  n = 0;
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
+        (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
+      n++;
+      cumSum += rawSkew[i];
+      x += n;
+      x2 += n * n;
+      y += cumSum;
+      xy += n * cumSum;
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  xAvg = x / n;
+  denom = x2 - xAvg * x;
+
+  if (denom != 0) {
+    skew = (xy - xAvg * y) / denom;
+  }
+
+  *skewEst = skew;
+  return 0;
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_resampler.h
@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kResamplingDelay = 1
+};
+enum {
+  kResamplerBufferSize = FRAME_LEN * 4
+};
+
+// Unless otherwise specified, functions return 0 on success and -1 on error.
+void* WebRtcAec_CreateResampler();  // Returns NULL on error.
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
+void WebRtcAec_FreeResampler(void* resampInst);
+
+// Estimates skew from raw measurement.
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
+
+// Resamples input using linear interpolation.
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
@ -0,0 +1,923 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * Contains the API functions for the AEC.
+ */
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <math.h>
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/typedefs.h"
+
+// Measured delays [ms]
+// Device                Chrome  GTP
+// MacBook Air           10
+// MacBook Retina        10      100
+// MacPro                30?
+//
+// Win7 Desktop          70      80?
+// Win7 T430s            110
+// Win8 T420s            70
+//
+// Daisy                 50
+// Pixel (w/ preproc?)           240
+// Pixel (w/o preproc?)  110     110
+
+// The extended filter mode gives us the flexibility to ignore the system's
+// reported delays. We do this for platforms which we believe provide results
+// which are incompatible with the AEC's expectations. Based on measurements
+// (some provided above) we set a conservative (i.e. lower than measured)
+// fixed delay.
+//
+// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode|
+// is enabled. See the note along with |DelayCorrection| in
+// echo_cancellation_impl.h for more details on the mode.
+//
+// Justification:
+// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays
+// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms
+// and then compensate by rewinding by 10 ms (in wideband) through
+// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind
+// values, but fortunately this is sufficient.
+//
+// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond
+// well to reality. The variance doesn't match the AEC's buffer changes, and the
+// bulk values tend to be too low. However, the range across different hardware
+// appears to be too large to choose a single value.
+//
+// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values.
+#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC)
+#define WEBRTC_UNTRUSTED_DELAY
+#endif
+
+#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC)
+static const int kDelayDiffOffsetSamples = -160;
+#else
+// Not enabled for now.
+static const int kDelayDiffOffsetSamples = 0;
+#endif
+
+#if defined(WEBRTC_MAC)
+static const int kFixedDelayMs = 20;
+#else
+static const int kFixedDelayMs = 50;
+#endif
+#if !defined(WEBRTC_UNTRUSTED_DELAY)
+static const int kMinTrustedDelayMs = 20;
+#endif
+static const int kMaxTrustedDelayMs = 500;
+
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+// TODO(bjornv): Replace with kResamplerBufferSize
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const int kMaxBufSizeStart = 62;  // In partitions
+static const int sampMsNb = 8;           // samples per ms in nb
+static const int initCheck = 42;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+int webrtc_aec_instance_count = 0;
+#endif
+
+// Estimates delay to set the position of the far-end buffer read pointer
+// (controlled by knownDelay)
+static void EstBufDelayNormal(Aec* aecInst);
+static void EstBufDelayExtended(Aec* aecInst);
+static int ProcessNormal(Aec* self,
+                         const float* const* near,
+                         size_t num_bands,
+                         float* const* out,
+                         size_t num_samples,
+                         int16_t reported_delay_ms,
+                         int32_t skew);
+static void ProcessExtended(Aec* self,
+                            const float* const* near,
+                            size_t num_bands,
+                            float* const* out,
+                            size_t num_samples,
+                            int16_t reported_delay_ms,
+                            int32_t skew);
+
+void* WebRtcAec_Create() {
+  Aec* aecpc = malloc(sizeof(Aec));
+
+  if (!aecpc) {
+    return NULL;
+  }
+
+  aecpc->aec = WebRtcAec_CreateAec();
+  if (!aecpc->aec) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+  aecpc->resampler = WebRtcAec_CreateResampler();
+  if (!aecpc->resampler) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+  // Create far-end pre-buffer. The buffer size has to be large enough for
+  // largest possible drift compensation (kResamplerBufferSize) + "almost" an
+  // FFT buffer (PART_LEN2 - 1).
+  aecpc->far_pre_buf =
+      WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
+  if (!aecpc->far_pre_buf) {
+    WebRtcAec_Free(aecpc);
+    return NULL;
+  }
+
+  aecpc->initFlag = 0;
+  aecpc->lastError = 0;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    char filename[64];
+    sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count);
+    aecpc->bufFile = fopen(filename, "wb");
+    sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count);
+    aecpc->skewFile = fopen(filename, "wb");
+    sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count);
+    aecpc->delayFile = fopen(filename, "wb");
+    webrtc_aec_instance_count++;
+  }
+#endif
+
+  return aecpc;
+}
+
+void WebRtcAec_Free(void* aecInst) {
+  Aec* aecpc = aecInst;
+
+  if (aecpc == NULL) {
+    return;
+  }
+
+  WebRtc_FreeBuffer(aecpc->far_pre_buf);
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  fclose(aecpc->bufFile);
+  fclose(aecpc->skewFile);
+  fclose(aecpc->delayFile);
+#endif
+
+  WebRtcAec_FreeAec(aecpc->aec);
+  WebRtcAec_FreeResampler(aecpc->resampler);
+  free(aecpc);
+}
+
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
+  Aec* aecpc = aecInst;
+  AecConfig aecConfig;
+
+  if (sampFreq != 8000 &&
+      sampFreq != 16000 &&
+      sampFreq != 32000 &&
+      sampFreq != 48000) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  aecpc->sampFreq = sampFreq;
+
+  if (scSampFreq < 1 || scSampFreq > 96000) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  aecpc->scSampFreq = scSampFreq;
+
+  // Initialize echo canceller core
+  if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  WebRtc_InitBuffer(aecpc->far_pre_buf);
+  WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);  // Start overlap.
+
+  aecpc->initFlag = initCheck;  // indicates that initialization has been done
+
+  if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) {
+    aecpc->splitSampFreq = 16000;
+  } else {
+    aecpc->splitSampFreq = sampFreq;
+  }
+
+  aecpc->delayCtr = 0;
+  aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq;
+  // Sampling frequency multiplier (SWB is processed as 160 frame size).
+  aecpc->rate_factor = aecpc->splitSampFreq / 8000;
+
+  aecpc->sum = 0;
+  aecpc->counter = 0;
+  aecpc->checkBuffSize = 1;
+  aecpc->firstVal = 0;
+
+  // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
+  // but not extended_filter mode.
+  aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
+      !WebRtcAec_delay_agnostic_enabled(aecpc->aec);
+  aecpc->bufSizeStart = 0;
+  aecpc->checkBufSizeCtr = 0;
+  aecpc->msInSndCardBuf = 0;
+  aecpc->filtDelay = -1;  // -1 indicates an initialized state.
+  aecpc->timeForDelayChange = 0;
+  aecpc->knownDelay = 0;
+  aecpc->lastDelayDiff = 0;
+
+  aecpc->skewFrCtr = 0;
+  aecpc->resample = kAecFalse;
+  aecpc->highSkewCtr = 0;
+  aecpc->skew = 0;
+
+  aecpc->farend_started = 0;
+
+  // Default settings.
+  aecConfig.nlpMode = kAecNlpModerate;
+  aecConfig.skewMode = kAecFalse;
+  aecConfig.metricsMode = kAecFalse;
+  aecConfig.delay_logging = kAecFalse;
+
+  if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
+    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
+    return -1;
+  }
+
+  return 0;
+}
+
+// only buffer L band for farend
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+                               const float* farend,
+                               size_t nrOfSamples) {
+  Aec* aecpc = aecInst;
+  size_t newNrOfSamples = nrOfSamples;
+  float new_farend[MAX_RESAMP_LEN];
+  const float* farend_ptr = farend;
+
+  if (farend == NULL) {
+    aecpc->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->initFlag != initCheck) {
+    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  // number of samples == 160 for SWB input
+  if (nrOfSamples != 80 && nrOfSamples != 160) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+    // Resample and get a new number of samples
+    WebRtcAec_ResampleLinear(aecpc->resampler,
+                             farend,
+                             nrOfSamples,
+                             aecpc->skew,
+                             new_farend,
+                             &newNrOfSamples);
+    farend_ptr = new_farend;
+  }
+
+  aecpc->farend_started = 1;
+  WebRtcAec_SetSystemDelay(
+      aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples);
+
+  // Write the time-domain data to |far_pre_buf|.
+  WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
+
+  // Transform to frequency domain if we have enough data.
+  while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
+    // We have enough data to pass to the FFT, hence read PART_LEN2 samples.
+    {
+      float* ptmp = NULL;
+      float tmp[PART_LEN2];
+      WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
+      WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+      WebRtc_WriteBuffer(
+          WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
+#endif
+    }
+
+    // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
+    WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
+  }
+
+  return 0;
+}
+
+int32_t WebRtcAec_Process(void* aecInst,
+                          const float* const* nearend,
+                          size_t num_bands,
+                          float* const* out,
+                          size_t nrOfSamples,
+                          int16_t msInSndCardBuf,
+                          int32_t skew) {
+  Aec* aecpc = aecInst;
+  int32_t retVal = 0;
+
+  if (out == NULL) {
+    aecpc->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+
+  if (aecpc->initFlag != initCheck) {
+    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  // number of samples == 160 for SWB input
+  if (nrOfSamples != 80 && nrOfSamples != 160) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (msInSndCardBuf < 0) {
+    msInSndCardBuf = 0;
+    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+    retVal = -1;
+  } else if (msInSndCardBuf > kMaxTrustedDelayMs) {
+    // The clamping is now done in ProcessExtended/Normal().
+    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+    retVal = -1;
+  }
+
+  // This returns the value of aec->extended_filter_enabled.
+  if (WebRtcAec_extended_filter_enabled(aecpc->aec)) {
+    ProcessExtended(aecpc,
+                    nearend,
+                    num_bands,
+                    out,
+                    nrOfSamples,
+                    msInSndCardBuf,
+                    skew);
+  } else {
+    if (ProcessNormal(aecpc,
+                      nearend,
+                      num_bands,
+                      out,
+                      nrOfSamples,
+                      msInSndCardBuf,
+                      skew) != 0) {
+      retVal = -1;
+    }
+  }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
+                                        (sampMsNb * aecpc->rate_factor));
+    (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
+    (void)fwrite(
+        &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
+  }
+#endif
+
+  return retVal;
+}
+
+int WebRtcAec_set_config(void* handle, AecConfig config) {
+  Aec* self = (Aec*)handle;
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  self->skewMode = config.skewMode;
+
+  if (config.nlpMode != kAecNlpConservative &&
+      config.nlpMode != kAecNlpModerate &&
+      config.nlpMode != kAecNlpAggressive) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
+    self->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+
+  WebRtcAec_SetConfigCore(
+      self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
+  return 0;
+}
+
+int WebRtcAec_get_echo_status(void* handle, int* status) {
+  Aec* self = (Aec*)handle;
+  if (status == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  *status = WebRtcAec_echo_state(self->aec);
+
+  return 0;
+}
+
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
+  const float kUpWeight = 0.7f;
+  float dtmp;
+  int stmp;
+  Aec* self = (Aec*)handle;
+  Stats erl;
+  Stats erle;
+  Stats a_nlp;
+
+  if (handle == NULL) {
+    return -1;
+  }
+  if (metrics == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+
+  WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
+
+  // ERL
+  metrics->erl.instant = (int)erl.instant;
+
+  if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
+    metrics->erl.average = (int)dtmp;
+  } else {
+    metrics->erl.average = kOffsetLevel;
+  }
+
+  metrics->erl.max = (int)erl.max;
+
+  if (erl.min < (kOffsetLevel * (-1))) {
+    metrics->erl.min = (int)erl.min;
+  } else {
+    metrics->erl.min = kOffsetLevel;
+  }
+
+  // ERLE
+  metrics->erle.instant = (int)erle.instant;
+
+  if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
+    metrics->erle.average = (int)dtmp;
+  } else {
+    metrics->erle.average = kOffsetLevel;
+  }
+
+  metrics->erle.max = (int)erle.max;
+
+  if (erle.min < (kOffsetLevel * (-1))) {
+    metrics->erle.min = (int)erle.min;
+  } else {
+    metrics->erle.min = kOffsetLevel;
+  }
+
+  // RERL
+  if ((metrics->erl.average > kOffsetLevel) &&
+      (metrics->erle.average > kOffsetLevel)) {
+    stmp = metrics->erl.average + metrics->erle.average;
+  } else {
+    stmp = kOffsetLevel;
+  }
+  metrics->rerl.average = stmp;
+
+  // No other statistics needed, but returned for completeness.
+  metrics->rerl.instant = stmp;
+  metrics->rerl.max = stmp;
+  metrics->rerl.min = stmp;
+
+  // A_NLP
+  metrics->aNlp.instant = (int)a_nlp.instant;
+
+  if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
+    // Use a mix between regular average and upper part average.
+    dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
+    metrics->aNlp.average = (int)dtmp;
+  } else {
+    metrics->aNlp.average = kOffsetLevel;
+  }
+
+  metrics->aNlp.max = (int)a_nlp.max;
+
+  if (a_nlp.min < (kOffsetLevel * (-1))) {
+    metrics->aNlp.min = (int)a_nlp.min;
+  } else {
+    metrics->aNlp.min = kOffsetLevel;
+  }
+
+  return 0;
+}
+
+int WebRtcAec_GetDelayMetrics(void* handle,
+                              int* median,
+                              int* std,
+                              float* fraction_poor_delays) {
+  Aec* self = handle;
+  if (median == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (std == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+  if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
+                                    fraction_poor_delays) ==
+      -1) {
+    // Logging disabled.
+    self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
+    return -1;
+  }
+
+  return 0;
+}
+
+int32_t WebRtcAec_get_error_code(void* aecInst) {
+  Aec* aecpc = aecInst;
+  return aecpc->lastError;
+}
+
+AecCore* WebRtcAec_aec_core(void* handle) {
+  if (!handle) {
+    return NULL;
+  }
+  return ((Aec*)handle)->aec;
+}
+
+static int ProcessNormal(Aec* aecpc,
+                         const float* const* nearend,
+                         size_t num_bands,
+                         float* const* out,
+                         size_t nrOfSamples,
+                         int16_t msInSndCardBuf,
+                         int32_t skew) {
+  int retVal = 0;
+  size_t i;
+  size_t nBlocks10ms;
+  // Limit resampling to doubling/halving of signal
+  const float minSkewEst = -0.5f;
+  const float maxSkewEst = 1.0f;
+
+  msInSndCardBuf =
+      msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
+  // TODO(andrew): we need to investigate if this +10 is really wanted.
+  msInSndCardBuf += 10;
+  aecpc->msInSndCardBuf = msInSndCardBuf;
+
+  if (aecpc->skewMode == kAecTrue) {
+    if (aecpc->skewFrCtr < 25) {
+      aecpc->skewFrCtr++;
+    } else {
+      retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
+      if (retVal == -1) {
+        aecpc->skew = 0;
+        aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+      }
+
+      aecpc->skew /= aecpc->sampFactor * nrOfSamples;
+
+      if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
+        aecpc->resample = kAecFalse;
+      } else {
+        aecpc->resample = kAecTrue;
+      }
+
+      if (aecpc->skew < minSkewEst) {
+        aecpc->skew = minSkewEst;
+      } else if (aecpc->skew > maxSkewEst) {
+        aecpc->skew = maxSkewEst;
+      }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+      (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile);
+#endif
+    }
+  }
+
+  nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor);
+
+  if (aecpc->startup_phase) {
+    for (i = 0; i < num_bands; ++i) {
+      // Only needed if they don't already point to the same place.
+      if (nearend[i] != out[i]) {
+        memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples);
+      }
+    }
+
+    // The AEC is in the start up mode
+    // AEC is disabled until the system delay is OK
+
+    // Mechanism to ensure that the system delay is reasonably stable.
+    if (aecpc->checkBuffSize) {
+      aecpc->checkBufSizeCtr++;
+      // Before we fill up the far-end buffer we require the system delay
+      // to be stable (+/-8 ms) compared to the first value. This
+      // comparison is made during the following 6 consecutive 10 ms
+      // blocks. If it seems to be stable then we start to fill up the
+      // far-end buffer.
+      if (aecpc->counter == 0) {
+        aecpc->firstVal = aecpc->msInSndCardBuf;
+        aecpc->sum = 0;
+      }
+
+      if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) <
+          WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
+        aecpc->sum += aecpc->msInSndCardBuf;
+        aecpc->counter++;
+      } else {
+        aecpc->counter = 0;
+      }
+
+      if (aecpc->counter * nBlocks10ms >= 6) {
+        // The far-end buffer size is determined in partitions of
+        // PART_LEN samples. Use 75% of the average value of the system
+        // delay as buffer size to start with.
+        aecpc->bufSizeStart =
+            WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
+                               (4 * aecpc->counter * PART_LEN),
+                           kMaxBufSizeStart);
+        // Buffer size has now been determined.
+        aecpc->checkBuffSize = 0;
+      }
+
+      if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
+        // For really bad systems, don't disable the echo canceller for
+        // more than 0.5 sec.
+        aecpc->bufSizeStart = WEBRTC_SPL_MIN(
+            (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
+            kMaxBufSizeStart);
+        aecpc->checkBuffSize = 0;
+      }
+    }
+
+    // If |checkBuffSize| changed in the if-statement above.
+    if (!aecpc->checkBuffSize) {
+      // The system delay is now reasonably stable (or has been unstable
+      // for too long). When the far-end buffer is filled with
+      // approximately the same amount of data as reported by the system
+      // we end the startup phase.
+      int overhead_elements =
+          WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart;
+      if (overhead_elements == 0) {
+        // Enable the AEC
+        aecpc->startup_phase = 0;
+      } else if (overhead_elements > 0) {
+        // TODO(bjornv): Do we need a check on how much we actually
+        // moved the read pointer? It should always be possible to move
+        // the pointer |overhead_elements| since we have only added data
+        // to the buffer and no delay compensation nor AEC processing
+        // has been done.
+        WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements);
+
+        // Enable the AEC
+        aecpc->startup_phase = 0;
+      }
+    }
+  } else {
+    // AEC is enabled.
+    EstBufDelayNormal(aecpc);
+
+    // Call the AEC.
+    // TODO(bjornv): Re-structure such that we don't have to pass
+    // |aecpc->knownDelay| as input. Change name to something like
+    // |system_buffer_diff|.
+    WebRtcAec_ProcessFrames(aecpc->aec,
+                            nearend,
+                            num_bands,
+                            nrOfSamples,
+                            aecpc->knownDelay,
+                            out);
+  }
+
+  return retVal;
+}
+
+static void ProcessExtended(Aec* self,
+                            const float* const* near,
+                            size_t num_bands,
+                            float* const* out,
+                            size_t num_samples,
+                            int16_t reported_delay_ms,
+                            int32_t skew) {
+  size_t i;
+  const int delay_diff_offset = kDelayDiffOffsetSamples;
+#if defined(WEBRTC_UNTRUSTED_DELAY)
+  reported_delay_ms = kFixedDelayMs;
+#else
+  // This is the usual mode where we trust the reported system delay values.
+  // Due to the longer filter, we no longer add 10 ms to the reported delay
+  // to reduce chance of non-causality. Instead we apply a minimum here to avoid
+  // issues with the read pointer jumping around needlessly.
+  reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
+                          ? kMinTrustedDelayMs
+                          : reported_delay_ms;
+  // If the reported delay appears to be bogus, we attempt to recover by using
+  // the measured fixed delay values. We use >= here because higher layers
+  // may already clamp to this maximum value, and we would otherwise not
+  // detect it here.
+  reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
+                          ? kFixedDelayMs
+                          : reported_delay_ms;
+#endif
+  self->msInSndCardBuf = reported_delay_ms;
+
+  if (!self->farend_started) {
+    for (i = 0; i < num_bands; ++i) {
+      // Only needed if they don't already point to the same place.
+      if (near[i] != out[i]) {
+        memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples);
+      }
+    }
+    return;
+  }
+  if (self->startup_phase) {
+    // In the extended mode, there isn't a startup "phase", just a special
+    // action on the first frame. In the trusted delay case, we'll take the
+    // current reported delay, unless it's less then our conservative
+    // measurement.
+    int startup_size_ms =
+        reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
+#if defined(WEBRTC_ANDROID)
+    int target_delay = startup_size_ms * self->rate_factor * 8;
+#else
+    // To avoid putting the AEC in a non-causal state we're being slightly
+    // conservative and scale by 2. On Android we use a fixed delay and
+    // therefore there is no need to scale the target_delay.
+    int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
+#endif
+    int overhead_elements =
+        (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
+    WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
+    self->startup_phase = 0;
+  }
+
+  EstBufDelayExtended(self);
+
+  {
+    // |delay_diff_offset| gives us the option to manually rewind the delay on
+    // very low delay platforms which can't be expressed purely through
+    // |reported_delay_ms|.
+    const int adjusted_known_delay =
+        WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
+
+    WebRtcAec_ProcessFrames(self->aec,
+                            near,
+                            num_bands,
+                            num_samples,
+                            adjusted_known_delay,
+                            out);
+  }
+}
+
+static void EstBufDelayNormal(Aec* aecpc) {
+  int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor;
+  int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec);
+  int delay_difference = 0;
+
+  // Before we proceed with the delay estimate filtering we:
+  // 1) Compensate for the frame that will be read.
+  // 2) Compensate for drift resampling.
+  // 3) Compensate for non-causality if needed, since the estimated delay can't
+  //    be negative.
+
+  // 1) Compensating for the frame(s) that will be read/processed.
+  current_delay += FRAME_LEN * aecpc->rate_factor;
+
+  // 2) Account for resampling frame delay.
+  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
+    current_delay -= kResamplingDelay;
+  }
+
+  // 3) Compensate for non-causality, if needed, by flushing one block.
+  if (current_delay < PART_LEN) {
+    current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN;
+  }
+
+  // We use -1 to signal an initialized state in the "extended" implementation;
+  // compensate for that.
+  aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
+  aecpc->filtDelay =
+      WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
+
+  delay_difference = aecpc->filtDelay - aecpc->knownDelay;
+  if (delay_difference > 224) {
+    if (aecpc->lastDelayDiff < 96) {
+      aecpc->timeForDelayChange = 0;
+    } else {
+      aecpc->timeForDelayChange++;
+    }
+  } else if (delay_difference < 96 && aecpc->knownDelay > 0) {
+    if (aecpc->lastDelayDiff > 224) {
+      aecpc->timeForDelayChange = 0;
+    } else {
+      aecpc->timeForDelayChange++;
+    }
+  } else {
+    aecpc->timeForDelayChange = 0;
+  }
+  aecpc->lastDelayDiff = delay_difference;
+
+  if (aecpc->timeForDelayChange > 25) {
+    aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
+  }
+}
+
+static void EstBufDelayExtended(Aec* self) {
+  int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor;
+  int current_delay = reported_delay - WebRtcAec_system_delay(self->aec);
+  int delay_difference = 0;
+
+  // Before we proceed with the delay estimate filtering we:
+  // 1) Compensate for the frame that will be read.
+  // 2) Compensate for drift resampling.
+  // 3) Compensate for non-causality if needed, since the estimated delay can't
+  //    be negative.
+
+  // 1) Compensating for the frame(s) that will be read/processed.
+  current_delay += FRAME_LEN * self->rate_factor;
+
+  // 2) Account for resampling frame delay.
+  if (self->skewMode == kAecTrue && self->resample == kAecTrue) {
+    current_delay -= kResamplingDelay;
+  }
+
+  // 3) Compensate for non-causality, if needed, by flushing two blocks.
+  if (current_delay < PART_LEN) {
+    current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN;
+  }
+
+  if (self->filtDelay == -1) {
+    self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
+  } else {
+    self->filtDelay = WEBRTC_SPL_MAX(
+        0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
+  }
+
+  delay_difference = self->filtDelay - self->knownDelay;
+  if (delay_difference > 384) {
+    if (self->lastDelayDiff < 128) {
+      self->timeForDelayChange = 0;
+    } else {
+      self->timeForDelayChange++;
+    }
+  } else if (delay_difference < 128 && self->knownDelay > 0) {
+    if (self->lastDelayDiff > 384) {
+      self->timeForDelayChange = 0;
+    } else {
+      self->timeForDelayChange++;
+    }
+  } else {
+    self->timeForDelayChange = 0;
+  }
+  self->lastDelayDiff = delay_difference;
+
+  if (self->timeForDelayChange > 25) {
+    self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
+  }
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+typedef struct {
+  int delayCtr;
+  int sampFreq;
+  int splitSampFreq;
+  int scSampFreq;
+  float sampFactor;  // scSampRate / sampFreq
+  short skewMode;
+  int bufSizeStart;
+  int knownDelay;
+  int rate_factor;
+
+  short initFlag;  // indicates if AEC has been initialized
+
+  // Variables used for averaging far end buffer size
+  short counter;
+  int sum;
+  short firstVal;
+  short checkBufSizeCtr;
+
+  // Variables used for delay shifts
+  short msInSndCardBuf;
+  short filtDelay;  // Filtered delay estimate.
+  int timeForDelayChange;
+  int startup_phase;
+  int checkBuffSize;
+  short lastDelayDiff;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  FILE* bufFile;
+  FILE* delayFile;
+  FILE* skewFile;
+#endif
+
+  // Structures
+  void* resampler;
+
+  int skewFrCtr;
+  int resample;  // if the skew is small enough we don't resample
+  int highSkewCtr;
+  float skew;
+
+  RingBuffer* far_pre_buf;  // Time domain far-end pre-buffer.
+
+  int lastError;
+
+  int farend_started;
+
+  AecCore* aec;
+} Aec;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// TODO(bjornv): Make this a comprehensive test.
+
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+
+#include <stdlib.h>
+#include <time.h>
+
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+
+TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) {
+  void* handle = WebRtcAec_Create();
+  ASSERT_TRUE(handle);
+  WebRtcAec_Free(nullptr);
+  WebRtcAec_Free(handle);
+}
+
+TEST(EchoCancellationTest, ApplyAecCoreHandle) {
+  void* handle = WebRtcAec_Create();
+  ASSERT_TRUE(handle);
+  EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL);
+  AecCore* aec_core = WebRtcAec_aec_core(handle);
+  EXPECT_TRUE(aec_core != NULL);
+  // A simple test to verify that we can set and get a value from the lower
+  // level |aec_core| handle.
+  int delay = 111;
+  WebRtcAec_SetSystemDelay(aec_core, delay);
+  EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core));
+  WebRtcAec_Free(handle);
+}
+
+}  // namespace webrtc
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
@ -0,0 +1,245 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+// Errors
+#define AEC_UNSPECIFIED_ERROR 12000
+#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
+#define AEC_UNINITIALIZED_ERROR 12002
+#define AEC_NULL_POINTER_ERROR 12003
+#define AEC_BAD_PARAMETER_ERROR 12004
+
+// Warnings
+#define AEC_BAD_PARAMETER_WARNING 12050
+
+enum {
+  kAecNlpConservative = 0,
+  kAecNlpModerate,
+  kAecNlpAggressive
+};
+
+enum {
+  kAecFalse = 0,
+  kAecTrue
+};
+
+typedef struct {
+  int16_t nlpMode;      // default kAecNlpModerate
+  int16_t skewMode;     // default kAecFalse
+  int16_t metricsMode;  // default kAecFalse
+  int delay_logging;    // default kAecFalse
+  // float realSkew;
+} AecConfig;
+
+typedef struct {
+  int instant;
+  int average;
+  int max;
+  int min;
+} AecLevel;
+
+typedef struct {
+  AecLevel rerl;
+  AecLevel erl;
+  AecLevel erle;
+  AecLevel aNlp;
+} AecMetrics;
+
+struct AecCore;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AEC. The memory needs to be initialized
+ * separately using the WebRtcAec_Init() function. Returns a pointer to the
+ * object or NULL on error.
+ */
+void* WebRtcAec_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAec_Create().
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecInst         Pointer to the AEC instance
+ */
+void WebRtcAec_Free(void* aecInst);
+
+/*
+ * Initializes an AEC instance.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * int32_t        sampFreq      Sampling frequency of data
+ * int32_t        scSampFreq    Soundcard sampling frequency
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * const float*   farend        In buffer containing one frame of
+ *                              farend signal for L band
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+                               const float* farend,
+                               size_t nrOfSamples);
+
+/*
+ * Runs the echo canceller on an 80 or 160 sample blocks of data.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*         aecInst        Pointer to the AEC instance
+ * float* const* nearend        In buffer containing one frame of
+ *                              nearend+echo signal for each band
+ * int           num_bands      Number of bands in nearend buffer
+ * int16_t       nrOfSamples    Number of samples in nearend buffer
+ * int16_t       msInSndCardBuf Delay estimate for sound card and
+ *                              system buffers
+ * int16_t       skew           Difference between number of samples played
+ *                              and recorded at the soundcard (for clock skew
+ *                              compensation)
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * float* const* out            Out buffer, one frame of processed nearend
+ *                              for each band
+ * int32_t       return         0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Process(void* aecInst,
+                          const float* const* nearend,
+                          size_t num_bands,
+                          float* const* out,
+                          size_t nrOfSamples,
+                          int16_t msInSndCardBuf,
+                          int32_t skew);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ * AecConfig      config        Config instance that contains all
+ *                              properties to be set
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_set_config(void* handle, AecConfig config);
+
+/*
+ * Gets the current echo status of the nearend signal.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*           status        0: Almost certainly nearend single-talk
+ *                              1: Might not be neared single-talk
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_get_echo_status(void* handle, int* status);
+
+/*
+ * Gets the current echo metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * AecMetrics*    metrics       Struct which will be filled out with the
+ *                              current echo metrics.
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
+
+/*
+ * Gets the current delay metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*   handle               Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*    median               Delay median value.
+ * int*    std                  Delay standard deviation.
+ * float*  fraction_poor_delays Fraction of the delay estimates that may
+ *                              cause the AEC to perform poorly.
+ *
+ * int     return                0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetDelayMetrics(void* handle,
+                              int* median,
+                              int* std,
+                              float* fraction_poor_delays);
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        11000-11100: error code
+ */
+int32_t WebRtcAec_get_error_code(void* aecInst);
+
+// Returns a pointer to the low level AEC handle.
+//
+// Input:
+//  - handle                    : Pointer to the AEC instance.
+//
+// Return value:
+//  - AecCore pointer           : NULL for error.
+//
+struct AecCore* WebRtcAec_aec_core(void* handle);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
@ -0,0 +1,602 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/typedefs.h"
+
+namespace {
+
+class SystemDelayTest : public ::testing::Test {
+ protected:
+  SystemDelayTest();
+  virtual void SetUp();
+  virtual void TearDown();
+
+  // Initialization of AEC handle with respect to |sample_rate_hz|. Since the
+  // device sample rate is unimportant we set that value to 48000 Hz.
+  void Init(int sample_rate_hz);
+
+  // Makes one render call and one capture call in that specific order.
+  void RenderAndCapture(int device_buffer_ms);
+
+  // Fills up the far-end buffer with respect to the default device buffer size.
+  size_t BufferFillUp();
+
+  // Runs and verifies the behavior in a stable startup procedure.
+  void RunStableStartup();
+
+  // Maps buffer size in ms into samples, taking the unprocessed frame into
+  // account.
+  int MapBufferSizeToSamples(int size_in_ms, bool extended_filter);
+
+  void* handle_;
+  Aec* self_;
+  size_t samples_per_frame_;
+  // Dummy input/output speech data.
+  static const int kSamplesPerChunk = 160;
+  float far_[kSamplesPerChunk];
+  float near_[kSamplesPerChunk];
+  float out_[kSamplesPerChunk];
+  const float* near_ptr_;
+  float* out_ptr_;
+};
+
+SystemDelayTest::SystemDelayTest()
+    : handle_(NULL), self_(NULL), samples_per_frame_(0) {
+  // Dummy input data are set with more or less arbitrary non-zero values.
+  for (int i = 0; i < kSamplesPerChunk; i++) {
+    far_[i] = 257.0;
+    near_[i] = 514.0;
+  }
+  memset(out_, 0, sizeof(out_));
+  near_ptr_ = near_;
+  out_ptr_ = out_;
+}
+
+void SystemDelayTest::SetUp() {
+  handle_ = WebRtcAec_Create();
+  ASSERT_TRUE(handle_);
+  self_ = reinterpret_cast<Aec*>(handle_);
+}
+
+void SystemDelayTest::TearDown() {
+  // Free AEC
+  WebRtcAec_Free(handle_);
+  handle_ = NULL;
+}
+
+// In SWB mode nothing is added to the buffer handling with respect to
+// functionality compared to WB. We therefore only verify behavior in NB and WB.
+static const int kSampleRateHz[] = {8000, 16000};
+static const size_t kNumSampleRates =
+    sizeof(kSampleRateHz) / sizeof(*kSampleRateHz);
+
+// Default audio device buffer size used.
+static const int kDeviceBufMs = 100;
+
+// Requirement for a stable device convergence time in ms. Should converge in
+// less than |kStableConvergenceMs|.
+static const int kStableConvergenceMs = 100;
+
+// Maximum convergence time in ms. This means that we should leave the startup
+// phase after |kMaxConvergenceMs| independent of device buffer stability
+// conditions.
+static const int kMaxConvergenceMs = 500;
+
+void SystemDelayTest::Init(int sample_rate_hz) {
+  // Initialize AEC
+  EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000));
+  EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec));
+
+  // One frame equals 10 ms of data.
+  samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100);
+}
+
+void SystemDelayTest::RenderAndCapture(int device_buffer_ms) {
+  EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+  EXPECT_EQ(0,
+            WebRtcAec_Process(handle_,
+                              &near_ptr_,
+                              1,
+                              &out_ptr_,
+                              samples_per_frame_,
+                              device_buffer_ms,
+                              0));
+}
+
+size_t SystemDelayTest::BufferFillUp() {
+  // To make sure we have a full buffer when we verify stability we first fill
+  // up the far-end buffer with the same amount as we will report in through
+  // Process().
+  size_t buffer_size = 0;
+  for (int i = 0; i < kDeviceBufMs / 10; i++) {
+    EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+    buffer_size += samples_per_frame_;
+    EXPECT_EQ(static_cast<int>(buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+  }
+  return buffer_size;
+}
+
+void SystemDelayTest::RunStableStartup() {
+  // To make sure we have a full buffer when we verify stability we first fill
+  // up the far-end buffer with the same amount as we will report in through
+  // Process().
+  size_t buffer_size = BufferFillUp();
+
+  if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) {
+    // In extended_filter mode we set the buffer size after the first processed
+    // 10 ms chunk. Hence, we don't need to wait for the reported system delay
+    // values to become stable.
+    RenderAndCapture(kDeviceBufMs);
+    buffer_size += samples_per_frame_;
+    EXPECT_EQ(0, self_->startup_phase);
+  } else {
+    // A stable device should be accepted and put in a regular process mode
+    // within |kStableConvergenceMs|.
+    int process_time_ms = 0;
+    for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+      RenderAndCapture(kDeviceBufMs);
+      buffer_size += samples_per_frame_;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GT(kStableConvergenceMs, process_time_ms);
+  }
+  // Verify that the buffer has been flushed.
+  EXPECT_GE(static_cast<int>(buffer_size),
+            WebRtcAec_system_delay(self_->aec));
+}
+
+  int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms,
+                                              bool extended_filter) {
+  // If extended_filter is disabled we add an extra 10 ms for the unprocessed
+  // frame. That is simply how the algorithm is constructed.
+  return static_cast<int>(
+      (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10);
+}
+
+// The tests should meet basic requirements and not be adjusted to what is
+// actually implemented. If we don't get good code coverage this way we either
+// lack in tests or have unnecessary code.
+// General requirements:
+// 1) If we add far-end data the system delay should be increased with the same
+//    amount we add.
+// 2) If the far-end buffer is full we should flush the oldest data to make room
+//    for the new. In this case the system delay is unaffected.
+// 3) There should exist a startup phase in which the buffer size is to be
+//    determined. In this phase no cancellation should be performed.
+// 4) Under stable conditions (small variations in device buffer sizes) the AEC
+//    should determine an appropriate local buffer size within
+//    |kStableConvergenceMs| ms.
+// 5) Under unstable conditions the AEC should make a decision within
+//    |kMaxConvergenceMs| ms.
+// 6) If the local buffer runs out of data we should stuff the buffer with older
+//    frames.
+// 7) The system delay should within |kMaxConvergenceMs| ms heal from
+//    disturbances like drift, data glitches, toggling events and outliers.
+// 8) The system delay should never become negative.
+
+TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) {
+  // When we add data to the AEC buffer the internal system delay should be
+  // incremented with the same amount as the size of data.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        // Loop through a couple of calls to make sure the system delay
+        // increments correctly.
+        for (int j = 1; j <= 5; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          EXPECT_EQ(static_cast<int>(j * samples_per_frame_),
+                    WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full
+// when adding new data.
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) {
+  // We run the system in a stable startup. After that we verify that the system
+  // delay meets the requirements.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // Verify system delay with respect to requirements, i.e., the
+        // |system_delay| is in the interval [75%, 100%] of what's reported on
+        // the average.
+        // In extended_filter mode we target 50% and measure after one processed
+        // 10 ms chunk.
+        int average_reported_delay =
+            static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10);
+        EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
+        int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec)
+                              ? average_reported_delay / 2 - samples_per_frame_
+                              : average_reported_delay * 3 / 4;
+        EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec));
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+  // In an unstable system we would start processing after |kMaxConvergenceMs|.
+  // On the last frame the AEC buffer is adjusted to 60% of the last reported
+  // device buffer size.
+  // We construct an unstable system by altering the device buffer size between
+  // two values |kDeviceBufMs| +- 25 ms.
+  for (size_t i = 0; i < kNumSampleRates; i++) {
+    Init(kSampleRateHz[i]);
+
+    // To make sure we have a full buffer when we verify stability we first fill
+    // up the far-end buffer with the same amount as we will report in on the
+    // average through Process().
+    size_t buffer_size = BufferFillUp();
+
+    int buffer_offset_ms = 25;
+    int reported_delay_ms = 0;
+    int process_time_ms = 0;
+    for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+      reported_delay_ms = kDeviceBufMs + buffer_offset_ms;
+      RenderAndCapture(reported_delay_ms);
+      buffer_size += samples_per_frame_;
+      buffer_offset_ms = -buffer_offset_ms;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GE(kMaxConvergenceMs, process_time_ms);
+    // Verify that the buffer has been flushed.
+    EXPECT_GE(static_cast<int>(buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+
+    // Verify system delay with respect to requirements, i.e., the
+    // |system_delay| is in the interval [60%, 100%] of what's last reported.
+    EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10),
+              WebRtcAec_system_delay(self_->aec));
+    EXPECT_LE(
+        static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5),
+        WebRtcAec_system_delay(self_->aec));
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size. Neither does
+  // it apply if DA-AEC is on because that overrides the startup procedure.
+  WebRtcAec_enable_extended_filter(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
+  WebRtcAec_enable_delay_agnostic(self_->aec, 0);
+  EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
+
+  // In this test we start by establishing the device buffer size during stable
+  // conditions, but with an empty internal far-end buffer. Once that is done we
+  // verify that the system delay is increased correctly until we have reach an
+  // internal buffer size of 75% of what's been reported.
+  for (size_t i = 0; i < kNumSampleRates; i++) {
+    Init(kSampleRateHz[i]);
+
+    // We assume that running |kStableConvergenceMs| calls will put the
+    // algorithm in a state where the device buffer size has been determined. We
+    // can make that assumption since we have a separate stability test.
+    int process_time_ms = 0;
+    for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
+      EXPECT_EQ(0,
+                WebRtcAec_Process(handle_,
+                                  &near_ptr_,
+                                  1,
+                                  &out_ptr_,
+                                  samples_per_frame_,
+                                  kDeviceBufMs,
+                                  0));
+    }
+    // Verify that a buffer size has been established.
+    EXPECT_EQ(0, self_->checkBuffSize);
+
+    // We now have established the required buffer size. Let us verify that we
+    // fill up before leaving the startup phase for normal processing.
+    size_t buffer_size = 0;
+    size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4;
+    process_time_ms = 0;
+    for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
+      RenderAndCapture(kDeviceBufMs);
+      buffer_size += samples_per_frame_;
+      if (self_->startup_phase == 0) {
+        // We have left the startup phase.
+        break;
+      }
+    }
+    // Verify convergence time.
+    EXPECT_GT(kMaxConvergenceMs, process_time_ms);
+    // Verify that the buffer has reached the desired size.
+    EXPECT_LE(static_cast<int>(target_buffer_size),
+              WebRtcAec_system_delay(self_->aec));
+
+    // Verify normal behavior (system delay is kept constant) after startup by
+    // running a couple of calls to BufferFarend() and Process().
+    for (int j = 0; j < 6; j++) {
+      int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+      RenderAndCapture(kDeviceBufMs);
+      EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) {
+  // Here we test a buffer under run scenario. If we keep on calling
+  // WebRtcAec_Process() we will finally run out of data, but should
+  // automatically stuff the buffer. We verify this behavior by checking if the
+  // system delay goes negative.
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // The AEC has now left the Startup phase. We now have at most
+        // |kStableConvergenceMs| in the buffer. Keep on calling Process() until
+        // we run out of data and verify that the system delay is non-negative.
+        for (int j = 0; j <= kStableConvergenceMs; j += 10) {
+          EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_,
+                                         samples_per_frame_, kDeviceBufMs, 0));
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectDelayDuringDrift) {
+  // This drift test should verify that the system delay is never exceeding the
+  // device buffer. The drift is simulated by decreasing the reported device
+  // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30
+  // ms we jump (add) 10 ms to give a repeated pattern.
+
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+
+        // We have left the startup phase and proceed with normal processing.
+        int jump = 0;
+        for (int j = 0; j < 1000; j++) {
+          // Drift = -1 ms per 100 ms of data.
+          int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
+          int device_buf = MapBufferSizeToSamples(device_buf_ms,
+                                                  extended_filter == 1);
+
+          if (device_buf_ms < 30) {
+            // Add 10 ms data, taking affect next frame.
+            jump += 10;
+          }
+          RenderAndCapture(device_buf_ms);
+
+          // Verify that the system delay does not exceed the device buffer.
+          EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) {
+  // This glitch test should verify that the system delay recovers if there is
+  // a glitch in data. The data glitch is constructed as 200 ms of buffering
+  // after which the stable procedure continues. The glitch is never reported by
+  // the device.
+  // The system is said to be in a non-causal state if the difference between
+  // the device buffer and system delay is less than a block (64 samples).
+
+  // This process should be independent of DA-AEC and extended_filter mode.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                extended_filter == 1);
+        // Glitch state.
+        for (int j = 0; j < 20; j++) {
+          EXPECT_EQ(0,
+                    WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
+          // No need to verify system delay, since that is done in a separate
+          // test.
+        }
+        // Verify that we are in a non-causal state, i.e.,
+        // |system_delay| > |device_buf|.
+        EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
+
+        // Recover state. Should recover at least 4 ms of data per 10 ms, hence
+        // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover
+        // from.
+        bool non_causal = true;  // We are currently in a non-causal state.
+        for (int j = 0; j < 50; j++) {
+          int system_delay_before = WebRtcAec_system_delay(self_->aec);
+          RenderAndCapture(kDeviceBufMs);
+          int system_delay_after = WebRtcAec_system_delay(self_->aec);
+          // We have recovered if
+          // |device_buf| - |system_delay_after| >= PART_LEN (1 block).
+          // During recovery, |system_delay_after| < |system_delay_before|,
+          // otherwise they are equal.
+          if (non_causal) {
+            EXPECT_LT(system_delay_after, system_delay_before);
+            if (device_buf - system_delay_after >= PART_LEN) {
+              non_causal = false;
+            }
+          } else {
+            EXPECT_EQ(system_delay_before, system_delay_after);
+          }
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Check that we have recovered.
+        EXPECT_FALSE(non_causal);
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) {
+  // This test does not apply in extended_filter mode, since we only use the
+  // the first 10 ms chunk to determine a reasonable buffer size.
+  const int extended_filter = 0;
+  WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+  EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+
+  // Should be DA-AEC independent.
+  for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+    WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+    EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+    // This spurious device buffer data test aims at verifying that the system
+    // delay is unaffected by large outliers.
+    // The system is said to be in a non-causal state if the difference between
+    // the device buffer and system delay is less than a block (64 samples).
+    for (size_t i = 0; i < kNumSampleRates; i++) {
+      Init(kSampleRateHz[i]);
+      RunStableStartup();
+      int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                              extended_filter == 1);
+
+      // Normal state. We are currently not in a non-causal state.
+      bool non_causal = false;
+
+      // Run 1 s and replace device buffer size with 500 ms every 100 ms.
+      for (int j = 0; j < 100; j++) {
+        int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+        int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs;
+        RenderAndCapture(device_buf_ms);
+
+        // Check for non-causality.
+        if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) {
+          non_causal = true;
+        }
+        EXPECT_FALSE(non_causal);
+        EXPECT_EQ(system_delay_before_calls,
+                  WebRtcAec_system_delay(self_->aec));
+
+        // Verify that the system delay is non-negative.
+        EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+      }
+    }
+  }
+}
+
+TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) {
+  // This test aims at verifying that the system delay is "unaffected" by
+  // toggling values reported by the device.
+  // The test is constructed such that every other device buffer value is zero
+  // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The
+  // zero values will force us into a non-causal state and thereby lowering the
+  // system delay until we basically run out of data. Once that happens the
+  // buffer will be stuffed.
+  // TODO(bjornv): This test will have a better impact if we verified that the
+  // delay estimate goes up when the system delay goes down to meet the average
+  // device buffer size.
+
+  // This test does not apply if DA-AEC is enabled and extended_filter mode
+  // disabled.
+  for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
+    WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
+    EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
+    for (int da_aec = 0; da_aec <= 1; ++da_aec) {
+      WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
+      EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
+      if (extended_filter == 0 && da_aec == 1) {
+        continue;
+      }
+      for (size_t i = 0; i < kNumSampleRates; i++) {
+        Init(kSampleRateHz[i]);
+        RunStableStartup();
+        const int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
+                                                      extended_filter == 1);
+
+        // Normal state. We are currently not in a non-causal state.
+        bool non_causal = false;
+
+        // Loop through 100 frames (both render and capture), which equals 1 s
+        // of data. Every odd frame we set the device buffer size to
+        // 2 * |kDeviceBufMs| and even frames we set the device buffer size to
+        // zero.
+        for (int j = 0; j < 100; j++) {
+          int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
+          int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
+          RenderAndCapture(device_buf_ms);
+
+          // Check for non-causality, compared with the average device buffer
+          // size.
+          non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
+          EXPECT_GE(system_delay_before_calls,
+                    WebRtcAec_system_delay(self_->aec));
+
+          // Verify that the system delay is non-negative.
+          EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
+        }
+        // Verify we are not in a non-causal state.
+        EXPECT_FALSE(non_causal);
+      }
+    }
+  }
+}
+
+}  // namespace
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.c
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core.h
@ -0,0 +1,434 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs echo control (suppression) with fft routines in fixed-point.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER  // visual c++
+#define ALIGN8_BEG __declspec(align(8))
+#define ALIGN8_END
+#else  // gcc or icc
+#define ALIGN8_BEG
+#define ALIGN8_END __attribute__((aligned(8)))
+#endif
+
+typedef struct {
+    int16_t real;
+    int16_t imag;
+} ComplexInt16;
+
+typedef struct {
+    int farBufWritePos;
+    int farBufReadPos;
+    int knownDelay;
+    int lastKnownDelay;
+    int firstVAD;  // Parameter to control poorly initialized channels
+
+    RingBuffer* farFrameBuf;
+    RingBuffer* nearNoisyFrameBuf;
+    RingBuffer* nearCleanFrameBuf;
+    RingBuffer* outFrameBuf;
+
+    int16_t farBuf[FAR_BUF_LEN];
+
+    int16_t mult;
+    uint32_t seed;
+
+    // Delay estimation variables
+    void* delay_estimator_farend;
+    void* delay_estimator;
+    uint16_t currentDelay;
+    // Far end history variables
+    // TODO(bjornv): Replace |far_history| with ring_buffer.
+    uint16_t far_history[PART_LEN1 * MAX_DELAY];
+    int far_history_pos;
+    int far_q_domains[MAX_DELAY];
+
+    int16_t nlpFlag;
+    int16_t fixedDelay;
+
+    uint32_t totCount;
+
+    int16_t dfaCleanQDomain;
+    int16_t dfaCleanQDomainOld;
+    int16_t dfaNoisyQDomain;
+    int16_t dfaNoisyQDomainOld;
+
+    int16_t nearLogEnergy[MAX_BUF_LEN];
+    int16_t farLogEnergy;
+    int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
+    int16_t echoStoredLogEnergy[MAX_BUF_LEN];
+
+    // The extra 16 or 32 bytes in the following buffers are for alignment based
+    // Neon code.
+    // It's designed this way since the current GCC compiler can't align a
+    // buffer in 16 or 32 byte boundaries properly.
+    int16_t channelStored_buf[PART_LEN1 + 8];
+    int16_t channelAdapt16_buf[PART_LEN1 + 8];
+    int32_t channelAdapt32_buf[PART_LEN1 + 8];
+    int16_t xBuf_buf[PART_LEN2 + 16];  // farend
+    int16_t dBufClean_buf[PART_LEN2 + 16];  // nearend
+    int16_t dBufNoisy_buf[PART_LEN2 + 16];  // nearend
+    int16_t outBuf_buf[PART_LEN + 8];
+
+    // Pointers to the above buffers
+    int16_t *channelStored;
+    int16_t *channelAdapt16;
+    int32_t *channelAdapt32;
+    int16_t *xBuf;
+    int16_t *dBufClean;
+    int16_t *dBufNoisy;
+    int16_t *outBuf;
+
+    int32_t echoFilt[PART_LEN1];
+    int16_t nearFilt[PART_LEN1];
+    int32_t noiseEst[PART_LEN1];
+    int           noiseEstTooLowCtr[PART_LEN1];
+    int           noiseEstTooHighCtr[PART_LEN1];
+    int16_t noiseEstCtr;
+    int16_t cngMode;
+
+    int32_t mseAdaptOld;
+    int32_t mseStoredOld;
+    int32_t mseThreshold;
+
+    int16_t farEnergyMin;
+    int16_t farEnergyMax;
+    int16_t farEnergyMaxMin;
+    int16_t farEnergyVAD;
+    int16_t farEnergyMSE;
+    int currentVADValue;
+    int16_t vadUpdateCount;
+
+    int16_t startupState;
+    int16_t mseChannelCount;
+    int16_t supGain;
+    int16_t supGainOld;
+
+    int16_t supGainErrParamA;
+    int16_t supGainErrParamD;
+    int16_t supGainErrParamDiffAB;
+    int16_t supGainErrParamDiffBD;
+
+    struct RealFFT* real_fft;
+
+#ifdef AEC_DEBUG
+    FILE *farFile;
+    FILE *nearFile;
+    FILE *outFile;
+#endif
+} AecmCore;
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CreateCore()
+//
+// Allocates the memory needed by the AECM. The memory needs to be
+// initialized separately using the WebRtcAecm_InitCore() function.
+// Returns a pointer to the instance and a nullptr at failure.
+AecmCore* WebRtcAecm_CreateCore();
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitCore(...)
+//
+// This function initializes the AECM instant created with
+// WebRtcAecm_CreateCore()
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//      - samplingFreq  : Sampling Frequency
+//
+// Output:
+//      - aecm          : Initialized instance
+//
+// Return value         :  0 - Ok
+//                        -1 - Error
+//
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FreeCore(...)
+//
+// This function releases the memory allocated by WebRtcAecm_CreateCore()
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//
+void WebRtcAecm_FreeCore(AecmCore* aecm);
+
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_InitEchoPathCore(...)
+//
+// This function resets the echo channel adaptation with the specified channel.
+// Input:
+//      - aecm          : Pointer to the AECM instance
+//      - echo_path     : Pointer to the data that should initialize the echo
+//                        path
+//
+// Output:
+//      - aecm          : Initialized instance
+//
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessFrame(...)
+//
+// This function processes frames and sends blocks to
+// WebRtcAecm_ProcessBlock(...)
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of echo signal
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
+//
+// Output:
+//      - out           : Out buffer, one frame of nearend signal          :
+//
+//
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_ProcessBlock(...)
+//
+// This function is called for every block within one frame
+// This function is called by WebRtcAecm_ProcessFrame(...)
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one block of echo signal
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
+//
+// Output:
+//      - out           : Out buffer, one block of nearend signal          :
+//
+//
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* noisyClean,
+                            int16_t* out);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_BufferFarFrame()
+//
+// Inserts a frame of data into farend buffer.
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of farend signal
+//      - farLen        : Length of frame
+//
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+                               const int16_t* const farend,
+                               const int farLen);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_FetchFarFrame()
+//
+// Read the farend buffer to account for known delay
+//
+// Inputs:
+//      - aecm          : Pointer to the AECM instance
+//      - farend        : In buffer containing one frame of farend signal
+//      - farLen        : Length of frame
+//      - knownDelay    : known delay
+//
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+                              int16_t* const farend,
+                              const int farLen,
+                              const int knownDelay);
+
+// All the functions below are intended to be private
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateFarHistory()
+//
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q);
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_AlignedFarend()
+//
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+//      - self              : Pointer to the AECM instance.
+//      - delay             : Current delay estimate.
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcSuppressionGain()
+//
+// This function calculates the suppression gain that is used in the
+// Wiener filter.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - supGain           : Suppression gain with which to scale the noise
+//                            level (Q14).
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcEnergies()
+//
+// This function calculates the log of energies for nearend, farend and
+// estimated echoes. There is also an update of energy decision levels,
+// i.e. internal VAD.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Pointer to farend spectrum.
+//      - far_q             : Q-domain of farend spectrum.
+//      - nearEner          : Near end energy for current block in
+//                            Q(aecm->dfaQDomain).
+//
+// Output:
+//     - echoEst            : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+                             const uint16_t* far_spectrum,
+                             const int16_t far_q,
+                             const uint32_t nearEner,
+                             int32_t* echoEst);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcStepSize()
+//
+// This function calculates the step size used in channel estimation
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - mu                : Stepsize in log2(), i.e. number of shifts.
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);
+
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation.
+// NLMS and decision on channel storage.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Absolute value of the farend signal in Q(far_q)
+//      - far_q             : Q-domain of the farend signal
+//      - dfa               : Absolute value of the nearend signal
+//                            (Q[aecm->dfaQDomain])
+//      - mu                : NLMS step size.
+// Input/Output:
+//      - echoEst           : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+                              const uint16_t* far_spectrum,
+                              const int16_t far_q,
+                              const uint16_t* const dfa,
+                              const int16_t mu,
+                              int32_t* echoEst);
+
+extern const int16_t WebRtcAecm_kCosTable[];
+extern const int16_t WebRtcAecm_kSinTable[];
+
+///////////////////////////////////////////////////////////////////////////////
+// Some function pointers, for internal functions shared by ARM NEON and
+// generic C code.
+//
+typedef void (*CalcLinearEnergies)(AecmCore* aecm,
+                                   const uint16_t* far_spectrum,
+                                   int32_t* echoEst,
+                                   uint32_t* far_energy,
+                                   uint32_t* echo_energy_adapt,
+                                   uint32_t* echo_energy_stored);
+extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+
+typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
+                                     const uint16_t* far_spectrum,
+                                     int32_t* echo_est);
+extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+
+typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
+extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file aecm_core.c, while those for ARM Neon platforms
+// are declared below and defined in file aecm_core_neon.c.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored);
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
+#endif
+
+#if defined(MIPS32_LE)
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
+#endif
+#endif
+
+#endif
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_c.c
@ -0,0 +1,771 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// Square root of Hanning window in Q14.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+// Table is defined in an ARM assembly file.
+extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
+#else
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+#endif
+
+#ifdef AECM_WITH_ABS_APPROX
+//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
+static const uint16_t kAlpha1 = 32584;
+//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
+static const uint16_t kBeta1 = 4249;
+//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
+static const uint16_t kAlpha2 = 30879;
+//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
+static const uint16_t kBeta2 = 11072;
+//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
+static const uint16_t kAlpha3 = 26951;
+//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
+static const uint16_t kBeta3 = 18927;
+#endif
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+                         int16_t* fft,
+                         const int16_t* time_signal,
+                         ComplexInt16* freq_signal,
+                         int time_signal_scaling) {
+  int i = 0;
+
+  // FFT of signal
+  for (i = 0; i < PART_LEN; i++) {
+    // Window time domain signal and insert into real part of
+    // transformation array |fft|
+    int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
+    fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
+    scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
+    fft[PART_LEN + i] = (int16_t)((
+        scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
+  }
+
+  // Do forward FFT, then take only the first PART_LEN complex samples,
+  // and change signs of the imaginary parts.
+  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+  for (i = 0; i < PART_LEN; i++) {
+    freq_signal[i].imag = -freq_signal[i].imag;
+  }
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+                                int16_t* fft,
+                                ComplexInt16* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean) {
+  int i, j, outCFFT;
+  int32_t tmp32no1;
+  // Reuse |efw| for the inverse FFT output after transferring
+  // the contents to |fft|.
+  int16_t* ifft_out = (int16_t*)efw;
+
+  // Synthesis
+  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+    fft[j] = efw[i].real;
+    fft[j + 1] = -efw[i].imag;
+  }
+  fft[0] = efw[0].real;
+  fft[1] = -efw[0].imag;
+
+  fft[PART_LEN2] = efw[PART_LEN].real;
+  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+
+  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
+  for (i = 0; i < PART_LEN; i++) {
+    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
+                                     outCFFT - aecm->dfaCleanQDomain);
+    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                        tmp32no1 + aecm->outBuf[i],
+                                        WEBRTC_SPL_WORD16_MIN);
+
+    tmp32no1 = (ifft_out[PART_LEN + i] *
+        WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
+                                    outCFFT - aecm->dfaCleanQDomain);
+    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                                tmp32no1,
+                                                WEBRTC_SPL_WORD16_MIN);
+  }
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+                                 const int16_t* time_signal,
+                                 ComplexInt16* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs) {
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#ifndef WEBRTC_ARCH_ARM_V7
+  int16_t tmp16no2;
+#endif
+#ifdef AECM_WITH_ABS_APPROX
+  int16_t max_value = 0;
+  int16_t min_value = 0;
+  uint16_t alpha = 0;
+  uint16_t beta = 0;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part, calculate the magnitude for
+  // all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+                                freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+                           (uint32_t)(freq_signal_abs[PART_LEN]);
+
+  for (i = 1; i < PART_LEN; i++)
+  {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+
+#ifdef AECM_WITH_ABS_APPROX
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+
+      if(tmp16no1 > tmp16no2)
+      {
+        max_value = tmp16no1;
+        min_value = tmp16no2;
+      } else
+      {
+        max_value = tmp16no2;
+        min_value = tmp16no1;
+      }
+
+      // Magnitude in Q(-6)
+      if ((max_value >> 2) > min_value)
+      {
+        alpha = kAlpha1;
+        beta = kBeta1;
+      } else if ((max_value >> 1) > min_value)
+      {
+        alpha = kAlpha2;
+        beta = kBeta2;
+      } else
+      {
+        alpha = kAlpha3;
+        beta = kBeta3;
+      }
+      tmp16no1 = (int16_t)((max_value * alpha) >> 15);
+      tmp16no2 = (int16_t)((min_value * beta) >> 15);
+      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
+#else
+#ifdef WEBRTC_ARCH_ARM_V7
+      __asm __volatile(
+        "smulbb %[tmp32no1], %[real], %[real]\n\t"
+        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
+        :[tmp32no1]"+&r"(tmp32no1),
+         [tmp32no2]"=r"(tmp32no2)
+        :[real]"r"(freq_signal[i].real),
+         [imag]"r"(freq_signal[i].imag)
+      );
+#else
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = tmp16no1 * tmp16no1;
+      tmp32no2 = tmp16no2 * tmp16no2;
+      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+#endif // WEBRTC_ARCH_ARM_V7
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+#endif // AECM_WITH_ABS_APPROX
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* output) {
+  int i;
+
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  // TODO(kma): define fft with ComplexInt16.
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
+  ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
+  ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int16_t nlpGain = ONE_Q14;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2)
+  {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+
+  if (nearendClean == NULL)
+  {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else
+  {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
+                               xfa,
+                               PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1)
+  {
+    return -1;
+  }
+  else if (delay == -2)
+  {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0)
+  {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+  if (far_spectrum_ptr == NULL)
+  {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16)
+    {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
+      //   aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else
+    {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1)
+      {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                                supGain >> tmp16no1);
+      } else
+      {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
+    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+      tmp16no1 = aecm->nearFilt[i] << zeros16;
+      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+    } else {
+      tmp16no1 = dfa_clean_q_domain_diff < 0
+          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+      qDomainDiff = 0;
+      tmp16no2 = ptrDfaClean[i];
+    }
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)(tmp32no1 >> 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else {
+      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+                                          : tmp16no2 >> qDomainDiff;
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0)
+    {
+      hnl[i] = ONE_Q14;
+    } else if (aecm->nearFilt[i] == 0)
+    {
+      hnl[i] = 0;
+    } else
+    {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14)
+      {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0)
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] < 0)
+        {
+          hnl[i] = 0;
+        }
+      }
+    }
+    if (hnl[i])
+    {
+      numPosCoef++;
+    }
+  }
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2)
+  {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
+    {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++)
+    {
+      if (hnl[i] > (int16_t)avgHnl32)
+      {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag)
+  {
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      // Truncate values close to zero and one.
+      if (hnl[i] > NLP_COMP_HIGH)
+      {
+        hnl[i] = ONE_Q14;
+      } else if (hnl[i] < NLP_COMP_LOW)
+      {
+        hnl[i] = 0;
+      }
+
+      // Remove outliers
+      if (numPosCoef < 3)
+      {
+        nlpGain = 0;
+      } else
+      {
+        nlpGain = ONE_Q14;
+      }
+
+      // NLP
+      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
+      }
+
+      // multiply with Wiener coefficients
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+  else
+  {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue)
+  {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda) {
+  int16_t i;
+  int16_t tmp16;
+  int32_t tmp32;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+  int16_t noiseRShift16[PART_LEN1];
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100)
+  {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  } else
+  {
+    minTrackShift = 9;
+  }
+
+  // Estimate noise power.
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Shift to the noise domain.
+    tmp32 = (int32_t)dfa[i];
+    outLShift32 = tmp32 << shiftFromNearToNoise;
+
+    if (outLShift32 < aecm->noiseEst[i])
+    {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (aecm->noiseEst[i] < (1 << minTrackShift))
+      {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i]--;
+          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+        }
+      }
+      else
+      {
+        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
+                              >> minTrackShift);
+      }
+    } else
+    {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((aecm->noiseEst[i] >> 19) > 0)
+      {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        aecm->noiseEst[i] >>= 11;
+        aecm->noiseEst[i] *= 2049;
+      }
+      else if ((aecm->noiseEst[i] >> 11) > 0)
+      {
+        // Large enough for relative increase
+        aecm->noiseEst[i] *= 2049;
+        aecm->noiseEst[i] >>= 11;
+      }
+      else
+      {
+        // Make incremental increases based on size every
+        // |kNoiseEstIncCount| block
+        aecm->noiseEstTooLowCtr[i]++;
+        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
+          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+        }
+      }
+    }
+  }
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
+    if (tmp32 > 32767)
+    {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+    }
+    noiseRShift16[i] = (int16_t)tmp32;
+
+    tmp16 = ONE_Q14 - lambda[i];
+    noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+
+  // Generate noise according to estimated energy.
+  uReal[0] = 0; // Reject LF noise.
+  uImag[0] = 0;
+  for (i = 1; i < PART_LEN1; i++)
+  {
+    // Get a random index for the cos and sin tables over [0 359].
+    tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
+
+    // Tables are in Q13.
+    uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
+        13);
+    uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
+        13);
+  }
+  uImag[PART_LEN] = 0;
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
+    out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
+  }
+}
+
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
@ -0,0 +1,212 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+// TODO(kma): Re-write the corresponding assembly file, the offset
+// generating script and makefile, to replace these C functions.
+
+// Square root of Hanning window in Q14.
+const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0,
+  399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+
+static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
+#if defined(WEBRTC_ARCH_ARM64)
+  *(ptr) = vaddvq_u32(v);
+#else
+  uint32x2_t tmp_v;
+  tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+  tmp_v = vpadd_u32(tmp_v, tmp_v);
+  *(ptr) = vget_lane_u32(tmp_v, 0);
+#endif
+}
+
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored) {
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int32_t* echo_est_p = echo_est;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16x8_t store_v, adapt_v;
+  uint16x8_t spectrum_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;
+  uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;
+
+  far_energy_v = vdupq_n_u32(0);
+  echo_adapt_v = vdupq_n_u32(0);
+  echo_stored_v = vdupq_n_u32(0);
+
+  // Get energy for the delayed far end signal and estimated
+  // echo using both stored and adapted channels.
+  // The C code:
+  //  for (i = 0; i < PART_LEN1; i++) {
+  //      echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                         far_spectrum[i]);
+  //      (*far_energy) += (uint32_t)(far_spectrum[i]);
+  //      *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+  //      (*echo_energy_stored) += (uint32_t)echo_est[i];
+  //  }
+  while (start_stored_p < end_stored_p) {
+    spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);
+    store_v = vld1q_s16(start_stored_p);
+
+    far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
+    far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));
+
+    echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
+                               vget_low_u16(spectrum_v));
+    echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
+                                vget_high_u16(spectrum_v));
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+    echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
+    echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);
+
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_low_s16(adapt_v)),
+                             vget_low_u16(spectrum_v));
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_high_s16(adapt_v)),
+                             vget_high_u16(spectrum_v));
+
+    start_stored_p += 8;
+    start_adapt_p += 8;
+    far_spectrum_p += 8;
+    echo_est_p += 8;
+  }
+
+  AddLanes(far_energy, far_energy_v);
+  AddLanes(echo_energy_stored, echo_stored_v);
+  AddLanes(echo_energy_adapt, echo_adapt_v);
+
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+  *echo_energy_stored += (uint32_t)echo_est[PART_LEN];
+  *far_energy += (uint32_t)far_spectrum[PART_LEN];
+  *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
+}
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est) {
+  assert((uintptr_t)echo_est % 32 == 0);
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+
+  // This is C code of following optimized code.
+  // During startup we store the channel every block.
+  //  memcpy(aecm->channelStored,
+  //         aecm->channelAdapt16,
+  //         sizeof(int16_t) * PART_LEN1);
+  // Recalculate echo estimate
+  //  for (i = 0; i < PART_LEN; i += 4) {
+  //    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                        far_spectrum[i]);
+  //    echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+  //                                            far_spectrum[i + 1]);
+  //    echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+  //                                            far_spectrum[i + 2]);
+  //    echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+  //                                            far_spectrum[i + 3]);
+  //  }
+  //  echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                     far_spectrum[i]);
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int16_t* start_stored_p = aecm->channelStored;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  int32_t* echo_est_p = echo_est;
+
+  uint16x8_t far_spectrum_v;
+  int16x8_t adapt_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;
+
+  while (start_stored_p < end_stored_p) {
+    far_spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);
+
+    vst1q_s16(start_stored_p, adapt_v);
+
+    echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
+                               vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
+    echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
+                                vget_high_u16(vreinterpretq_u16_s16(adapt_v)));
+
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
+
+    far_spectrum_p += 8;
+    start_adapt_p += 8;
+    start_stored_p += 8;
+    echo_est_p += 8;
+  }
+  aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+}
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);
+
+  // The C code of following optimized code.
+  // for (i = 0; i < PART_LEN1; i++) {
+  //   aecm->channelAdapt16[i] = aecm->channelStored[i];
+  //   aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
+  //              (int32_t)aecm->channelStored[i], 16);
+  // }
+
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt16_p = aecm->channelAdapt16;
+  int32_t* start_adapt32_p = aecm->channelAdapt32;
+  const int16_t* end_stored_p = start_stored_p + PART_LEN;
+
+  int16x8_t stored_v;
+  int32x4_t adapt32_v_low, adapt32_v_high;
+
+  while (start_stored_p < end_stored_p) {
+    stored_v = vld1q_s16(start_stored_p);
+    vst1q_s16(start_adapt16_p, stored_v);
+
+    adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
+    adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
+
+    vst1q_s32(start_adapt32_p, adapt32_v_low);
+    vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
+
+    start_stored_p += 8;
+    start_adapt16_p += 8;
+    start_adapt32_p += 8;
+  }
+  aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
+  aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_defines.h
@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+
+#define AECM_DYNAMIC_Q                 /* Turn on/off dynamic Q-domain. */
+
+/* Algorithm parameters */
+#define FRAME_LEN       80             /* Total frame length, 10 ms. */
+
+#define PART_LEN        64             /* Length of partition. */
+#define PART_LEN_SHIFT  7              /* Length of (PART_LEN * 2) in base 2. */
+
+#define PART_LEN1       (PART_LEN + 1)  /* Unique fft coefficients. */
+#define PART_LEN2       (PART_LEN << 1) /* Length of partition * 2. */
+#define PART_LEN4       (PART_LEN << 2) /* Length of partition * 4. */
+#define FAR_BUF_LEN     PART_LEN4       /* Length of buffers. */
+#define MAX_DELAY       100
+
+/* Counter parameters */
+#define CONV_LEN        512          /* Convergence length used at startup. */
+#define CONV_LEN2       (CONV_LEN << 1) /* Used at startup. */
+
+/* Energy parameters */
+#define MAX_BUF_LEN     64           /* History length of energy signals. */
+#define FAR_ENERGY_MIN  1025         /* Lowest Far energy level: At least 2 */
+                                     /* in energy. */
+#define FAR_ENERGY_DIFF 929          /* Allowed difference between max */
+                                     /* and min. */
+#define ENERGY_DEV_OFFSET       0    /* The energy error offset in Q8. */
+#define ENERGY_DEV_TOL  400          /* The energy estimation tolerance (Q8). */
+#define FAR_ENERGY_VAD_REGION   230  /* Far VAD tolerance region. */
+
+/* Stepsize parameters */
+#define MU_MIN          10          /* Min stepsize 2^-MU_MIN (far end energy */
+                                    /* dependent). */
+#define MU_MAX          1           /* Max stepsize 2^-MU_MAX (far end energy */
+                                    /* dependent). */
+#define MU_DIFF         9           /* MU_MIN - MU_MAX */
+
+/* Channel parameters */
+#define MIN_MSE_COUNT   20 /* Min number of consecutive blocks with enough */
+                           /* far end energy to compare channel estimates. */
+#define MIN_MSE_DIFF    29 /* The ratio between adapted and stored channel to */
+                           /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
+#define MSE_RESOLUTION  5           /* MSE parameter resolution. */
+#define RESOLUTION_CHANNEL16    12  /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
+#define RESOLUTION_CHANNEL32    28  /* W32 Channel in Q-RESOLUTION_CHANNEL. */
+#define CHANNEL_VAD     16          /* Minimum energy in frequency band */
+                                    /* to update channel. */
+
+/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
+#define RESOLUTION_SUPGAIN      8     /* Channel in Q-(RESOLUTION_SUPGAIN). */
+#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN)  /* Default. */
+#define SUPGAIN_ERROR_PARAM_A   3072  /* Estimation error parameter */
+                                      /* (Maximum gain) (8 in Q8). */
+#define SUPGAIN_ERROR_PARAM_B   1536  /* Estimation error parameter */
+                                      /* (Gain before going down). */
+#define SUPGAIN_ERROR_PARAM_D   SUPGAIN_DEFAULT /* Estimation error parameter */
+                                /* (Should be the same as Default) (1 in Q8). */
+#define SUPGAIN_EPC_DT  200     /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
+
+/* Defines for "check delay estimation" */
+#define CORR_WIDTH      31      /* Number of samples to correlate over. */
+#define CORR_MAX        16      /* Maximum correlation offset. */
+#define CORR_MAX_BUF    63
+#define CORR_DEV        4
+#define CORR_MAX_LEVEL  20
+#define CORR_MAX_LOW    4
+#define CORR_BUF_LEN    (CORR_MAX << 1) + 1
+/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
+
+#define ONE_Q14         (1 << 14)
+
+/* NLP defines */
+#define NLP_COMP_LOW    3277    /* 0.2 in Q14 */
+#define NLP_COMP_HIGH   ONE_Q14 /* 1 in Q14 */
+
+#endif
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
@ -0,0 +1,702 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+
+#ifdef AEC_DEBUG
+#include <stdio.h>
+#endif
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#define BUF_SIZE_FRAMES 50 // buffer size (frames)
+// Maximum length of resampled signal. Must be an integer multiple of frames
+// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
+// The factor of 2 handles wb, and the + 1 is as a safety margin
+#define MAX_RESAMP_LEN (5 * FRAME_LEN)
+
+static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
+static const int kSampMsNb = 8; // samples per ms in nb
+// Target suppression levels for nlp modes
+// log{0.001, 0.00001, 0.00000001}
+static const int kInitCheck = 42;
+
+typedef struct
+{
+    int sampFreq;
+    int scSampFreq;
+    short bufSizeStart;
+    int knownDelay;
+
+    // Stores the last frame added to the farend buffer
+    short farendOld[2][FRAME_LEN];
+    short initFlag; // indicates if AEC has been initialized
+
+    // Variables used for averaging far end buffer size
+    short counter;
+    short sum;
+    short firstVal;
+    short checkBufSizeCtr;
+
+    // Variables used for delay shifts
+    short msInSndCardBuf;
+    short filtDelay;
+    int timeForDelayChange;
+    int ECstartup;
+    int checkBuffSize;
+    int delayChange;
+    short lastDelayDiff;
+
+    int16_t echoMode;
+
+#ifdef AEC_DEBUG
+    FILE *bufFile;
+    FILE *delayFile;
+    FILE *preCompFile;
+    FILE *postCompFile;
+#endif // AEC_DEBUG
+    // Structures
+    RingBuffer *farendBuf;
+
+    int lastError;
+
+    AecmCore* aecmCore;
+} AecMobile;
+
+// Estimates delay to set the position of the farend buffer read pointer
+// (controlled by knownDelay)
+static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);
+
+// Stuffs the farend buffer if the estimated delay is too large
+static int WebRtcAecm_DelayComp(AecMobile* aecmInst);
+
+void* WebRtcAecm_Create() {
+    AecMobile* aecm = malloc(sizeof(AecMobile));
+
+    WebRtcSpl_Init();
+
+    aecm->aecmCore = WebRtcAecm_CreateCore();
+    if (!aecm->aecmCore) {
+        WebRtcAecm_Free(aecm);
+        return NULL;
+    }
+
+    aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
+                                          sizeof(int16_t));
+    if (!aecm->farendBuf)
+    {
+        WebRtcAecm_Free(aecm);
+        return NULL;
+    }
+
+    aecm->initFlag = 0;
+    aecm->lastError = 0;
+
+#ifdef AEC_DEBUG
+    aecm->aecmCore->farFile = fopen("aecFar.pcm","wb");
+    aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb");
+    aecm->aecmCore->outFile = fopen("aecOut.pcm","wb");
+    //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb");
+
+    aecm->bufFile = fopen("aecBuf.dat", "wb");
+    aecm->delayFile = fopen("aecDelay.dat", "wb");
+    aecm->preCompFile = fopen("preComp.pcm", "wb");
+    aecm->postCompFile = fopen("postComp.pcm", "wb");
+#endif // AEC_DEBUG
+    return aecm;
+}
+
+void WebRtcAecm_Free(void* aecmInst) {
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL) {
+      return;
+    }
+
+#ifdef AEC_DEBUG
+    fclose(aecm->aecmCore->farFile);
+    fclose(aecm->aecmCore->nearFile);
+    fclose(aecm->aecmCore->outFile);
+    //fclose(aecm->aecmCore->outLpFile);
+
+    fclose(aecm->bufFile);
+    fclose(aecm->delayFile);
+    fclose(aecm->preCompFile);
+    fclose(aecm->postCompFile);
+#endif // AEC_DEBUG
+    WebRtcAecm_FreeCore(aecm->aecmCore);
+    WebRtc_FreeBuffer(aecm->farendBuf);
+    free(aecm);
+}
+
+int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
+{
+  AecMobile* aecm = aecmInst;
+    AecmConfig aecConfig;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (sampFreq != 8000 && sampFreq != 16000)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->sampFreq = sampFreq;
+
+    // Initialize AECM core
+    if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
+    {
+        aecm->lastError = AECM_UNSPECIFIED_ERROR;
+        return -1;
+    }
+
+    // Initialize farend buffer
+    WebRtc_InitBuffer(aecm->farendBuf);
+
+    aecm->initFlag = kInitCheck; // indicates that initialization has been done
+
+    aecm->delayChange = 1;
+
+    aecm->sum = 0;
+    aecm->counter = 0;
+    aecm->checkBuffSize = 1;
+    aecm->firstVal = 0;
+
+    aecm->ECstartup = 1;
+    aecm->bufSizeStart = 0;
+    aecm->checkBufSizeCtr = 0;
+    aecm->filtDelay = 0;
+    aecm->timeForDelayChange = 0;
+    aecm->knownDelay = 0;
+    aecm->lastDelayDiff = 0;
+
+    memset(&aecm->farendOld[0][0], 0, 160);
+
+    // Default settings.
+    aecConfig.cngMode = AecmTrue;
+    aecConfig.echoMode = 3;
+
+    if (WebRtcAecm_set_config(aecm, aecConfig) == -1)
+    {
+        aecm->lastError = AECM_UNSPECIFIED_ERROR;
+        return -1;
+    }
+
+    return 0;
+}
+
+int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
+                                size_t nrOfSamples)
+{
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (farend == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (nrOfSamples != 80 && nrOfSamples != 160)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+
+    // TODO: Is this really a good idea?
+    if (!aecm->ECstartup)
+    {
+        WebRtcAecm_DelayComp(aecm);
+    }
+
+    WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
+
+    return retVal;
+}
+
+int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
+                           const int16_t *nearendClean, int16_t *out,
+                           size_t nrOfSamples, int16_t msInSndCardBuf)
+{
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;
+    size_t i;
+    short nmbrOfFilledBuffers;
+    size_t nBlocks10ms;
+    size_t nFrames;
+#ifdef AEC_DEBUG
+    short msInAECBuf;
+#endif
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (nearendNoisy == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (out == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (nrOfSamples != 80 && nrOfSamples != 160)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+
+    if (msInSndCardBuf < 0)
+    {
+        msInSndCardBuf = 0;
+        aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+        retVal = -1;
+    } else if (msInSndCardBuf > 500)
+    {
+        msInSndCardBuf = 500;
+        aecm->lastError = AECM_BAD_PARAMETER_WARNING;
+        retVal = -1;
+    }
+    msInSndCardBuf += 10;
+    aecm->msInSndCardBuf = msInSndCardBuf;
+
+    nFrames = nrOfSamples / FRAME_LEN;
+    nBlocks10ms = nFrames / aecm->aecmCore->mult;
+
+    if (aecm->ECstartup)
+    {
+        if (nearendClean == NULL)
+        {
+            if (out != nearendNoisy)
+            {
+                memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
+            }
+        } else if (out != nearendClean)
+        {
+            memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
+        }
+
+        nmbrOfFilledBuffers =
+            (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+        // The AECM is in the start up mode
+        // AECM is disabled until the soundcard buffer and farend buffers are OK
+
+        // Mechanism to ensure that the soundcard buffer is reasonably stable.
+        if (aecm->checkBuffSize)
+        {
+            aecm->checkBufSizeCtr++;
+            // Before we fill up the far end buffer we require the amount of data on the
+            // sound card to be stable (+/-8 ms) compared to the first value. This
+            // comparison is made during the following 4 consecutive frames. If it seems
+            // to be stable then we start to fill up the far end buffer.
+
+            if (aecm->counter == 0)
+            {
+                aecm->firstVal = aecm->msInSndCardBuf;
+                aecm->sum = 0;
+            }
+
+            if (abs(aecm->firstVal - aecm->msInSndCardBuf)
+                    < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb))
+            {
+                aecm->sum += aecm->msInSndCardBuf;
+                aecm->counter++;
+            } else
+            {
+                aecm->counter = 0;
+            }
+
+            if (aecm->counter * nBlocks10ms >= 6)
+            {
+                // The farend buffer size is determined in blocks of 80 samples
+                // Use 75% of the average value of the soundcard buffer
+                aecm->bufSizeStart
+                        = WEBRTC_SPL_MIN((3 * aecm->sum
+                                        * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES);
+                // buffersize has now been determined
+                aecm->checkBuffSize = 0;
+            }
+
+            if (aecm->checkBufSizeCtr * nBlocks10ms > 50)
+            {
+                // for really bad sound cards, don't disable echocanceller for more than 0.5 sec
+                aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf
+                                * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES);
+                aecm->checkBuffSize = 0;
+            }
+        }
+
+        // if checkBuffSize changed in the if-statement above
+        if (!aecm->checkBuffSize)
+        {
+            // soundcard buffer is now reasonably stable
+            // When the far end buffer is filled with approximately the same amount of
+            // data as the amount on the sound card we end the start up phase and start
+            // to cancel echoes.
+
+            if (nmbrOfFilledBuffers == aecm->bufSizeStart)
+            {
+                aecm->ECstartup = 0; // Enable the AECM
+            } else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
+            {
+                WebRtc_MoveReadPtr(aecm->farendBuf,
+                                   (int) WebRtc_available_read(aecm->farendBuf)
+                                   - (int) aecm->bufSizeStart * FRAME_LEN);
+                aecm->ECstartup = 0;
+            }
+        }
+
+    } else
+    {
+        // AECM is enabled
+
+        // Note only 1 block supported for nb and 2 blocks for wb
+        for (i = 0; i < nFrames; i++)
+        {
+            int16_t farend[FRAME_LEN];
+            const int16_t* farend_ptr = NULL;
+
+            nmbrOfFilledBuffers =
+                (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
+
+            // Check that there is data in the far end buffer
+            if (nmbrOfFilledBuffers > 0)
+            {
+                // Get the next 80 samples from the farend buffer
+                WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
+                                  FRAME_LEN);
+
+                // Always store the last frame for use when we run out of data
+                memcpy(&(aecm->farendOld[i][0]), farend_ptr,
+                       FRAME_LEN * sizeof(short));
+            } else
+            {
+                // We have no data so we use the last played frame
+                memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
+                farend_ptr = farend;
+            }
+
+            // Call buffer delay estimator when all data is extracted,
+            // i,e. i = 0 for NB and i = 1 for WB
+            if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000))
+            {
+                WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
+            }
+
+            // Call the AECM
+            /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
+             &out[FRAME_LEN * i], aecm->knownDelay);*/
+            if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
+                                        farend_ptr,
+                                        &nearendNoisy[FRAME_LEN * i],
+                                        (nearendClean
+                                         ? &nearendClean[FRAME_LEN * i]
+                                         : NULL),
+                                        &out[FRAME_LEN * i]) == -1)
+                return -1;
+        }
+    }
+
+#ifdef AEC_DEBUG
+    msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
+        (kSampMsNb * aecm->aecmCore->mult);
+    fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
+    fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
+#endif
+
+    return retVal;
+}
+
+int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    if (config.cngMode != AecmFalse && config.cngMode != AecmTrue)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->aecmCore->cngMode = config.cngMode;
+
+    if (config.echoMode < 0 || config.echoMode > 4)
+    {
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    aecm->echoMode = config.echoMode;
+
+    if (aecm->echoMode == 0)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3)
+                - (SUPGAIN_ERROR_PARAM_B >> 3);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3)
+                - (SUPGAIN_ERROR_PARAM_D >> 3);
+    } else if (aecm->echoMode == 1)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2)
+                - (SUPGAIN_ERROR_PARAM_B >> 2);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2)
+                - (SUPGAIN_ERROR_PARAM_D >> 2);
+    } else if (aecm->echoMode == 2)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1)
+                - (SUPGAIN_ERROR_PARAM_B >> 1);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1)
+                - (SUPGAIN_ERROR_PARAM_D >> 1);
+    } else if (aecm->echoMode == 3)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
+        aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
+        aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
+    } else if (aecm->echoMode == 4)
+    {
+        aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1;
+        aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1;
+        aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1;
+        aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1;
+        aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1)
+                - (SUPGAIN_ERROR_PARAM_B << 1);
+        aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1)
+                - (SUPGAIN_ERROR_PARAM_D << 1);
+    }
+
+    return 0;
+}
+
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    if (config == NULL)
+    {
+        aecm->lastError = AECM_NULL_POINTER_ERROR;
+        return -1;
+    }
+
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    config->cngMode = aecm->aecmCore->cngMode;
+    config->echoMode = aecm->echoMode;
+
+    return 0;
+}
+
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes)
+{
+  AecMobile* aecm = aecmInst;
+    const int16_t* echo_path_ptr = echo_path;
+
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
+    }
+    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+    {
+        // Input channel size does not match the size of AECM
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr);
+
+    return 0;
+}
+
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes)
+{
+  AecMobile* aecm = aecmInst;
+    int16_t* echo_path_ptr = echo_path;
+
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
+    }
+    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
+    {
+        // Input channel size does not match the size of AECM
+        aecm->lastError = AECM_BAD_PARAMETER_ERROR;
+        return -1;
+    }
+    if (aecm->initFlag != kInitCheck)
+    {
+        aecm->lastError = AECM_UNINITIALIZED_ERROR;
+        return -1;
+    }
+
+    memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes);
+    return 0;
+}
+
+size_t WebRtcAecm_echo_path_size_bytes()
+{
+    return (PART_LEN1 * sizeof(int16_t));
+}
+
+int32_t WebRtcAecm_get_error_code(void *aecmInst)
+{
+  AecMobile* aecm = aecmInst;
+
+    if (aecm == NULL)
+    {
+        return -1;
+    }
+
+    return aecm->lastError;
+}
+
+static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
+    short delayNew, nSampSndCard;
+    short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
+    short diff;
+
+    nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+
+    delayNew = nSampSndCard - nSampFar;
+
+    if (delayNew < FRAME_LEN)
+    {
+        WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
+        delayNew += FRAME_LEN;
+    }
+
+    aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10);
+
+    diff = aecm->filtDelay - aecm->knownDelay;
+    if (diff > 224)
+    {
+        if (aecm->lastDelayDiff < 96)
+        {
+            aecm->timeForDelayChange = 0;
+        } else
+        {
+            aecm->timeForDelayChange++;
+        }
+    } else if (diff < 96 && aecm->knownDelay > 0)
+    {
+        if (aecm->lastDelayDiff > 224)
+        {
+            aecm->timeForDelayChange = 0;
+        } else
+        {
+            aecm->timeForDelayChange++;
+        }
+    } else
+    {
+        aecm->timeForDelayChange = 0;
+    }
+    aecm->lastDelayDiff = diff;
+
+    if (aecm->timeForDelayChange > 25)
+    {
+        aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0);
+    }
+    return 0;
+}
+
+static int WebRtcAecm_DelayComp(AecMobile* aecm) {
+    int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
+    int nSampSndCard, delayNew, nSampAdd;
+    const int maxStuffSamp = 10 * FRAME_LEN;
+
+    nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
+    delayNew = nSampSndCard - nSampFar;
+
+    if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult)
+    {
+        // The difference of the buffer sizes is larger than the maximum
+        // allowed known delay. Compensate by stuffing the buffer.
+        nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar),
+                FRAME_LEN));
+        nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);
+
+        WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
+        aecm->delayChange = 1; // the delay needs to be updated
+    }
+
+    return 0;
+}
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
@ -0,0 +1,218 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+
+#include <stdlib.h>
+
+#include "webrtc/typedefs.h"
+
+enum {
+    AecmFalse = 0,
+    AecmTrue
+};
+
+// Errors
+#define AECM_UNSPECIFIED_ERROR           12000
+#define AECM_UNSUPPORTED_FUNCTION_ERROR  12001
+#define AECM_UNINITIALIZED_ERROR         12002
+#define AECM_NULL_POINTER_ERROR          12003
+#define AECM_BAD_PARAMETER_ERROR         12004
+
+// Warnings
+#define AECM_BAD_PARAMETER_WARNING       12100
+
+typedef struct {
+    int16_t cngMode;            // AECM_FALSE, AECM_TRUE (default)
+    int16_t echoMode;           // 0, 1, 2, 3 (default), 4
+} AecmConfig;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AECM. The memory needs to be
+ * initialized separately using the WebRtcAecm_Init() function.
+ * Returns a pointer to the instance and a nullptr at failure.
+ */
+void* WebRtcAecm_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAecm_Create()
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*    aecmInst            Pointer to the AECM instance
+ */
+void WebRtcAecm_Free(void* aecmInst);
+
+/*
+ * Initializes an AECM instance.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * int32_t        sampFreq      Sampling frequency of data
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * int16_t*       farend        In buffer containing one frame of
+ *                              farend signal
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_BufferFarend(void* aecmInst,
+                                const int16_t* farend,
+                                size_t nrOfSamples);
+
+/*
+ * Runs the AECM on an 80 or 160 sample blocks of data.
+ *
+ * Inputs                        Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst       Pointer to the AECM instance
+ * int16_t*       nearendNoisy   In buffer containing one frame of
+ *                               reference nearend+echo signal. If
+ *                               noise reduction is active, provide
+ *                               the noisy signal here.
+ * int16_t*       nearendClean   In buffer containing one frame of
+ *                               nearend+echo signal. If noise
+ *                               reduction is active, provide the
+ *                               clean signal here. Otherwise pass a
+ *                               NULL pointer.
+ * int16_t        nrOfSamples    Number of samples in nearend buffer
+ * int16_t        msInSndCardBuf Delay estimate for sound card and
+ *                               system buffers
+ *
+ * Outputs                       Description
+ * -------------------------------------------------------------------
+ * int16_t*       out            Out buffer, one frame of processed nearend
+ * int32_t        return         0: OK
+ *                              -1: error
+ */
+int32_t WebRtcAecm_Process(void* aecmInst,
+                           const int16_t* nearendNoisy,
+                           const int16_t* nearendClean,
+                           int16_t* out,
+                           size_t nrOfSamples,
+                           int16_t msInSndCardBuf);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ * AecmConfig     config        Config instance that contains all
+ *                              properties to be set
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * AecmConfig*    config        Pointer to the config instance that
+ *                              all properties will be written to
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);
+
+/*
+ * This function enables the user to set the echo path on-the-fly.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecmInst        Pointer to the AECM instance
+ * void*        echo_path       Pointer to the echo path to be set
+ * size_t       size_bytes      Size in bytes of the echo path
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t      return          0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes);
+
+/*
+ * This function enables the user to get the currently used echo path
+ * on-the-fly
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecmInst        Pointer to the AECM instance
+ * void*        echo_path       Pointer to echo path
+ * size_t       size_bytes      Size in bytes of the echo path
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t      return          0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes);
+
+/*
+ * This function enables the user to get the echo path size in bytes
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * size_t       return          Size in bytes
+ */
+size_t WebRtcAecm_echo_path_size_bytes();
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecmInst      Pointer to the AECM instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        11000-11100: error code
+ */
+int32_t WebRtcAecm_get_error_code(void *aecmInst);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+
+#include <stdio.h>
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Dumps a wav data to file.
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+    rtc_WavWriteSamples(file, data, num_samples);        \
+  } while (0)
+
+// (Re)opens a wav file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate,     \
+                                 sample_rate, wav_file)                  \
+  do {                                                                   \
+    WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
+                        wav_file);                                       \
+  } while (0)
+
+// Closes a wav file.
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+    rtc_WavClose(wav_file);               \
+  } while (0)
+
+// Dumps a raw data to file.
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+    (void) fwrite(data, data_size, 1, file);           \
+  } while (0)
+
+// Opens a raw data file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
+  do {                                                       \
+    WebRtcAec_RawFileOpen(name, instance_counter, file);     \
+  } while (0)
+
+// Closes a raw data file.
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+    fclose(file);                     \
+  } while (0)
+
+#else  // RTC_AEC_DEBUG_DUMP
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
+                                 sample_rate)                                  \
+  do {                                                                         \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
+  do {                                                       \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+  } while (0)
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/stringutils.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file) {
+  if (*wav_file) {
+    if (rtc_WavSampleRate(*wav_file) == sample_rate)
+      return;
+    rtc_WavClose(*wav_file);
+  }
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
+                              instance_index, process_rate);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *wav_file = rtc_WavOpen(filename, sample_rate, 1);
+}
+
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
+                              instance_index);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *file = fopen(filename, "wb");
+}
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
--- a/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
+++ b/third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+
+#include <stdio.h>
+
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Opens a new Wav file for writing. If it was already open with a different
+// sample frequency, it closes it first.
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file);
+
+// Opens dumpfile with instance-specific filename.
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
--- a/Show More
+++ b/Show More