Re #1954: Add WebRTC to third party component
* Add build config for GNU build systems git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@5428 74dad513-b988-da41-8d7b-12977e46ad98
This commit is contained in:
parent
b86d83f88f
commit
3469abaa11
236
aconfigure
236
aconfigure
|
@ -622,6 +622,10 @@ ac_subst_vars='LTLIBOBJS
|
|||
LIBOBJS
|
||||
ac_main_obj
|
||||
ac_host
|
||||
ac_webrtc_ldflags
|
||||
ac_webrtc_cflags
|
||||
ac_webrtc_instset
|
||||
ac_no_webrtc
|
||||
ac_no_yuv
|
||||
opus_present
|
||||
opus_h_present
|
||||
|
@ -642,8 +646,6 @@ libssl_present
|
|||
openssl_h_present
|
||||
ac_ssl_has_aes_gcm
|
||||
ac_no_ssl
|
||||
ac_webrtc_ldflags
|
||||
ac_webrtc_cflags
|
||||
ac_openh264_ldflags
|
||||
ac_openh264_cflags
|
||||
ac_v4l2_ldflags
|
||||
|
@ -682,6 +684,7 @@ ac_pa_cflags
|
|||
ac_external_pa
|
||||
ac_pjmedia_snd
|
||||
ac_pjmedia_resample
|
||||
ac_external_webrtc
|
||||
ac_external_yuv
|
||||
ac_srtp_shutdown_present
|
||||
ac_srtp_deinit_present
|
||||
|
@ -781,6 +784,7 @@ with_external_speex
|
|||
with_external_gsm
|
||||
with_external_srtp
|
||||
with_external_yuv
|
||||
with_external_webrtc
|
||||
enable_resample
|
||||
enable_sound
|
||||
with_external_pa
|
||||
|
@ -806,8 +810,6 @@ enable_ffmpeg
|
|||
enable_v4l2
|
||||
with_openh264
|
||||
enable_openh264
|
||||
with_webrtc
|
||||
enable_webrtc
|
||||
enable_ipp
|
||||
with_ipp
|
||||
with_ipp_samples
|
||||
|
@ -823,6 +825,7 @@ enable_silk
|
|||
with_opus
|
||||
enable_opus
|
||||
enable_libyuv
|
||||
enable_libwebrtc
|
||||
'
|
||||
ac_precious_vars='build_alias
|
||||
host_alias
|
||||
|
@ -1475,7 +1478,6 @@ Optional Features:
|
|||
--disable-ffmpeg Disable ffmpeg (default: not disabled)
|
||||
--disable-v4l2 Disable Video4Linux2 (default: not disabled)
|
||||
--disable-openh264 Disable OpenH264 (default: not disabled)
|
||||
--disable-webrtc Exclude webrtc in the build
|
||||
--enable-ipp Enable Intel IPP support. Specify the Intel IPP
|
||||
package and samples location using IPPROOT and
|
||||
IPPSAMPLES env var or with --with-ipp and
|
||||
|
@ -1492,6 +1494,7 @@ Optional Features:
|
|||
autodetect)
|
||||
|
||||
--disable-libyuv Exclude libyuv in the build
|
||||
--disable-libwebrtc Exclude libwebrtc in the build
|
||||
|
||||
Optional Packages:
|
||||
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
|
||||
|
@ -1516,6 +1519,11 @@ Optional Packages:
|
|||
make sure that libyuv is accessible to use (hint:
|
||||
use CFLAGS and LDFLAGS env var to set the
|
||||
include/lib paths)
|
||||
--with-external-webrtc Use external webrtc development files, not the one
|
||||
in "third_party" directory. When this option is set,
|
||||
make sure that webrtc is accessible to use (hint:
|
||||
use CFLAGS and LDFLAGS env var to set the
|
||||
include/lib paths)
|
||||
--with-external-pa Use external PortAudio development files, not the
|
||||
one in "third_party" directory. When this option is
|
||||
set, make sure that PortAudio is accessible to use
|
||||
|
@ -1524,7 +1532,6 @@ Optional Packages:
|
|||
--with-sdl=DIR Specify alternate libSDL prefix
|
||||
--with-ffmpeg=DIR Specify alternate FFMPEG prefix
|
||||
--with-openh264=DIR Specify alternate OpenH264 prefix
|
||||
--with-webrtc=DIR Specify alternate WebRtc prefix
|
||||
--with-ipp=DIR Specify the Intel IPP location
|
||||
--with-ipp-samples=DIR Specify the Intel IPP samples location
|
||||
--with-ipp-arch=ARCH Specify the Intel IPP ARCH suffix, e.g. "64" or
|
||||
|
@ -6117,6 +6124,45 @@ fi
|
|||
|
||||
|
||||
|
||||
ac_external_webrtc=0
|
||||
|
||||
|
||||
# Check whether --with-external-webrtc was given.
|
||||
if test "${with_external_webrtc+set}" = set; then :
|
||||
withval=$with_external_webrtc;
|
||||
if test "x$with_external_webrtc" != "xno"; then
|
||||
# Test webrtc installation
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if external webrtc devkit is installed" >&5
|
||||
$as_echo_n "checking if external webrtc devkit is installed... " >&6; }
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
#include <webrtc/modules/audio_processing/aec/aec_core.h>
|
||||
#include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
WebRtcAec_Create();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes!!" >&5
|
||||
$as_echo "yes!!" >&6; }
|
||||
ac_external_webrtc="1"
|
||||
|
||||
else
|
||||
as_fn_error $? "Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths" "$LINENO" 5
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
fi
|
||||
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
ac_pjmedia_resample=libresample
|
||||
|
||||
# Check whether --enable-resample was given.
|
||||
|
@ -7401,123 +7447,6 @@ fi
|
|||
|
||||
|
||||
|
||||
|
||||
# Check whether --with-webrtc was given.
|
||||
if test "${with_webrtc+set}" = set; then :
|
||||
withval=$with_webrtc;
|
||||
else
|
||||
with_webrtc=no
|
||||
|
||||
fi
|
||||
|
||||
|
||||
if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
|
||||
enable_webrtc=no
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Check whether --enable-webrtc was given.
|
||||
if test "${enable_webrtc+set}" = set; then :
|
||||
enableval=$enable_webrtc; if test "$enable_webrtc" = "no"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if webrtc is disabled...yes" >&5
|
||||
$as_echo "Checking if webrtc is disabled...yes" >&6; }
|
||||
fi
|
||||
else
|
||||
|
||||
if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
|
||||
WEBRTC_PREFIX=$with_webrtc
|
||||
WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
|
||||
|
||||
case $target in
|
||||
*-apple-darwin_ios*)
|
||||
case $ARCH in
|
||||
*arm*)
|
||||
WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
|
||||
WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
|
||||
WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
|
||||
WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
|
||||
WEBRTC_LIBS="-laudio_processing_sse2"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Using webrtc prefix... $with_webrtc" >&5
|
||||
$as_echo "Using webrtc prefix... $with_webrtc" >&6; }
|
||||
else
|
||||
WEBRTC_CFLAGS=""
|
||||
WEBRTC_LDFLAGS=""
|
||||
fi
|
||||
|
||||
WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
|
||||
|
||||
SAVED_LIBS="$LIBS"
|
||||
SAVED_LDFLAGS="$LDFLAGS"
|
||||
SAVED_CFLAGS="$CFLAGS"
|
||||
|
||||
LIBS="$WEBRTC_LIBS $LIBS"
|
||||
LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
|
||||
CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WebRtcAec_Process in -laudio_processing" >&5
|
||||
$as_echo_n "checking for WebRtcAec_Process in -laudio_processing... " >&6; }
|
||||
if ${ac_cv_lib_audio_processing_WebRtcAec_Process+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
ac_check_lib_save_LIBS=$LIBS
|
||||
LIBS="-laudio_processing
|
||||
$LIBS"
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
/* Override any GCC internal prototype to avoid an error.
|
||||
Use char because int might match the return type of a GCC
|
||||
builtin and then its argument prototype would still apply. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#endif
|
||||
char WebRtcAec_Process ();
|
||||
int
|
||||
main ()
|
||||
{
|
||||
return WebRtcAec_Process ();
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
_ACEOF
|
||||
if ac_fn_c_try_link "$LINENO"; then :
|
||||
ac_cv_lib_audio_processing_WebRtcAec_Process=yes
|
||||
else
|
||||
ac_cv_lib_audio_processing_WebRtcAec_Process=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext \
|
||||
conftest$ac_exeext conftest.$ac_ext
|
||||
LIBS=$ac_check_lib_save_LIBS
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_audio_processing_WebRtcAec_Process" >&5
|
||||
$as_echo "$ac_cv_lib_audio_processing_WebRtcAec_Process" >&6; }
|
||||
if test "x$ac_cv_lib_audio_processing_WebRtcAec_Process" = xyes; then :
|
||||
ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
|
||||
ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
|
||||
|
||||
else
|
||||
LIBS="$SAVED_LIBS"
|
||||
LDFLAGS="$SAVED_LDFLAGS"
|
||||
CFLAGS="$SAVED_CFLAGS"
|
||||
|
||||
fi
|
||||
|
||||
|
||||
fi
|
||||
|
||||
|
||||
# Check whether --enable-ipp was given.
|
||||
if test "${enable_ipp+set}" = set; then :
|
||||
enableval=$enable_ipp;
|
||||
|
@ -8473,6 +8402,67 @@ fi
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
# Check whether --enable-libwebrtc was given.
|
||||
if test "${enable_libwebrtc+set}" = set; then :
|
||||
enableval=$enable_libwebrtc; if test "$enable_libwebrtc" = "no"; then
|
||||
ac_no_webrtc=1
|
||||
$as_echo "#define PJMEDIA_HAS_LIBWEBRTC 0" >>confdefs.h
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...yes" >&5
|
||||
$as_echo "Checking if libwebrtc is disabled...yes" >&6; }
|
||||
fi
|
||||
else
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Checking if libwebrtc is disabled...no" >&5
|
||||
$as_echo "Checking if libwebrtc is disabled...no" >&6; }
|
||||
case $target in
|
||||
*-apple-darwin_ios*)
|
||||
case $target in
|
||||
*arm*)
|
||||
ac_webrtc_instset=neon
|
||||
;;
|
||||
*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*android*)
|
||||
case $TARGET_ABI in
|
||||
armeabi-v7a)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
|
||||
;;
|
||||
armeabi)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
|
||||
;;
|
||||
arm64*)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
|
||||
;;
|
||||
mips*)
|
||||
ac_webrtc_instset=mips
|
||||
;;
|
||||
*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if select() needs correct nfds" >&5
|
||||
$as_echo_n "checking if select() needs correct nfds... " >&6; }
|
||||
case $target in
|
||||
|
|
155
aconfigure.ac
155
aconfigure.ac
|
@ -598,6 +598,28 @@ AC_ARG_WITH(external-yuv,
|
|||
)
|
||||
|
||||
|
||||
dnl # Use external webrtc installation
|
||||
AC_SUBST(ac_external_webrtc,0)
|
||||
AC_ARG_WITH(external-webrtc,
|
||||
AS_HELP_STRING([--with-external-webrtc],
|
||||
[Use external webrtc development files, not the one in "third_party" directory. When this option is set, make sure that webrtc is accessible to use (hint: use CFLAGS and LDFLAGS env var to set the include/lib paths)]),
|
||||
[
|
||||
if test "x$with_external_webrtc" != "xno"; then
|
||||
# Test webrtc installation
|
||||
AC_MSG_CHECKING([if external webrtc devkit is installed])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <webrtc/modules/audio_processing/aec/aec_core.h>
|
||||
#include <webrtc/modules/audio_processing/aec/include/echo_cancellation.h>
|
||||
]],
|
||||
[WebRtcAec_Create();])],
|
||||
[AC_MSG_RESULT(yes!!)
|
||||
ac_external_webrtc="1"
|
||||
],
|
||||
[AC_MSG_ERROR([Unable to use external webrtc. If webrtc development files are not available in the default locations, use CFLAGS and LDFLAGS env var to set the include/lib paths])])
|
||||
fi
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
dnl # Resample implementation
|
||||
AC_SUBST(ac_pjmedia_resample,libresample)
|
||||
AC_ARG_ENABLE(resample,
|
||||
|
@ -1243,82 +1265,6 @@ AC_ARG_ENABLE(openh264,
|
|||
])
|
||||
|
||||
|
||||
dnl # WebRtc alt prefix
|
||||
AC_ARG_WITH(webrtc,
|
||||
AS_HELP_STRING([--with-webrtc=DIR],
|
||||
[Specify alternate WebRtc prefix]),
|
||||
[],
|
||||
[with_webrtc=no]
|
||||
)
|
||||
|
||||
dnl # Do not use default webrtc installation if we are cross-compiling
|
||||
if test "x$ac_cross_compile" != "x" -a "x$with_webrtc" = "xno"; then
|
||||
enable_webrtc=no
|
||||
fi
|
||||
|
||||
dnl # WebRtc
|
||||
AC_SUBST(ac_webrtc_cflags)
|
||||
AC_SUBST(ac_webrtc_ldflags)
|
||||
AC_ARG_ENABLE(webrtc,
|
||||
AS_HELP_STRING([--disable-webrtc],
|
||||
[Exclude webrtc in the build]),
|
||||
[if test "$enable_webrtc" = "no"; then
|
||||
AC_MSG_RESULT([Checking if webrtc is disabled...yes])
|
||||
fi],
|
||||
[
|
||||
if test "x$with_webrtc" != "xno" -a "x$with_webrtc" != "x"; then
|
||||
WEBRTC_PREFIX=$with_webrtc
|
||||
WEBRTC_CFLAGS="-I$WEBRTC_PREFIX/src"
|
||||
|
||||
case $target in
|
||||
*-apple-darwin_ios*)
|
||||
case $ARCH in
|
||||
*arm*)
|
||||
WEBRTC_CFLAGS="-DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1 $WEBRTC_CFLAGS"
|
||||
WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out_ios/Release-iphoneos"
|
||||
WEBRTC_LIBS="-laudio_processing_neon -lcommon_audio_neon"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux* | *android*)
|
||||
WEBRTC_LDFLAGS="-L$WEBRTC_PREFIX/src/out/Release"
|
||||
WEBRTC_LIBS="-laudio_processing_sse2"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_MSG_RESULT([Using webrtc prefix... $with_webrtc])
|
||||
else
|
||||
WEBRTC_CFLAGS=""
|
||||
WEBRTC_LDFLAGS=""
|
||||
fi
|
||||
|
||||
WEBRTC_LIBS="$WEBRTC_LIBS -laudio_processing -lcommon_audio -lsystem_wrappers"
|
||||
|
||||
SAVED_LIBS="$LIBS"
|
||||
SAVED_LDFLAGS="$LDFLAGS"
|
||||
SAVED_CFLAGS="$CFLAGS"
|
||||
|
||||
LIBS="$WEBRTC_LIBS $LIBS"
|
||||
LDFLAGS="$WEBRTC_LDFLAGS $LDFLAGS"
|
||||
CFLAGS="$WEBRTC_CFLAGS $CFLAGS"
|
||||
|
||||
AC_CHECK_LIB(audio_processing,
|
||||
WebRtcAec_Process,
|
||||
[ ac_webrtc_cflags="-DPJMEDIA_HAS_WEBRTC_AEC=1 $WEBRTC_CFLAGS"
|
||||
ac_webrtc_ldflags="$WEBRTC_LDFLAGS $WEBRTC_LIBS"
|
||||
],
|
||||
[ LIBS="$SAVED_LIBS"
|
||||
LDFLAGS="$SAVED_LDFLAGS"
|
||||
CFLAGS="$SAVED_CFLAGS"
|
||||
],
|
||||
[]
|
||||
)
|
||||
])
|
||||
|
||||
dnl ########################################################
|
||||
dnl # Intel IPP support
|
||||
dnl #
|
||||
|
@ -1819,6 +1765,63 @@ AC_ARG_ENABLE(libyuv,
|
|||
AC_MSG_RESULT([Checking if libyuv is disabled...no]))
|
||||
|
||||
|
||||
dnl # Include webrtc
|
||||
AC_SUBST(ac_no_webrtc)
|
||||
AC_SUBST(ac_webrtc_instset)
|
||||
AC_SUBST(ac_webrtc_cflags)
|
||||
AC_SUBST(ac_webrtc_ldflags)
|
||||
AC_ARG_ENABLE(libwebrtc,
|
||||
AS_HELP_STRING([--disable-libwebrtc],
|
||||
[Exclude libwebrtc in the build]),
|
||||
[if test "$enable_libwebrtc" = "no"; then
|
||||
[ac_no_webrtc=1]
|
||||
AC_DEFINE(PJMEDIA_HAS_LIBWEBRTC,0)
|
||||
AC_MSG_RESULT([Checking if libwebrtc is disabled...yes])
|
||||
fi],
|
||||
[
|
||||
AC_MSG_RESULT([Checking if libwebrtc is disabled...no])
|
||||
case $target in
|
||||
*-apple-darwin_ios*)
|
||||
case $target in
|
||||
*arm*)
|
||||
ac_webrtc_instset=neon
|
||||
;;
|
||||
*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*android*)
|
||||
case $TARGET_ABI in
|
||||
armeabi-v7a)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon"
|
||||
;;
|
||||
armeabi)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-mfloat-abi=softfp -mfpu=neon -march=armv7"
|
||||
;;
|
||||
arm64*)
|
||||
ac_webrtc_instset=neon
|
||||
ac_webrtc_cflags="-DWEBRTC_ARCH_ARM64"
|
||||
;;
|
||||
mips*)
|
||||
ac_webrtc_instset=mips
|
||||
;;
|
||||
*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*mingw* | *cygw* | *win32* | *w32* | *darwin* | *linux*)
|
||||
ac_webrtc_instset=sse2
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
|
||||
dnl ##########################################
|
||||
dnl #
|
||||
dnl # MANUAL CONFIG
|
||||
|
|
14
build.mak.in
14
build.mak.in
|
@ -135,6 +135,20 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifneq (@ac_no_webrtc@,1)
|
||||
ifeq (@ac_external_webrtc@,1)
|
||||
APP_THIRD_PARTY_EXT += -lwebrtc
|
||||
else
|
||||
APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc-$(LIB_SUFFIX)
|
||||
ifeq ($(PJ_SHARED_LIBRARIES),)
|
||||
APP_THIRD_PARTY_LIBS += -lwebrtc-$(TARGET_NAME)
|
||||
else
|
||||
APP_THIRD_PARTY_LIBS += -lwebrtc
|
||||
APP_THIRD_PARTY_LIB_FILES += $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX).$(PJ_VERSION_MAJOR) $(PJ_DIR)/third_party/lib/libwebrtc.$(SHLIB_SUFFIX)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
# Additional flags
|
||||
@ac_build_mak_vars@
|
||||
|
|
|
@ -149,6 +149,7 @@ else
|
|||
exit 1
|
||||
fi
|
||||
|
||||
export TARGET_ABI="${TARGET_ABI}"
|
||||
export CC="${ANDROID_TC}/bin/${TARGET_HOST}-gcc"
|
||||
export CXX="${ANDROID_TC}/bin/${TARGET_HOST}-g++"
|
||||
export AR="${ANDROID_TC}/bin/${TARGET_HOST}-ar"
|
||||
|
|
|
@ -32,18 +32,14 @@ ANDROID_CFLAGS = @ac_android_cflags@
|
|||
OPENH264_CFLAGS = @ac_openh264_cflags@
|
||||
OPENH264_LDFLAGS = @ac_openh264_ldflags@
|
||||
|
||||
# WebRtc
|
||||
WEBRTC_CFLAGS = @ac_webrtc_cflags@
|
||||
WEBRTC_LDFLAGS = @ac_webrtc_ldflags@
|
||||
|
||||
|
||||
# PJMEDIA features exclusion
|
||||
export CFLAGS += @ac_no_small_filter@ @ac_no_large_filter@ @ac_no_speex_aec@ \
|
||||
$(SDL_CFLAGS) $(FFMPEG_CFLAGS) $(V4L2_CFLAGS) $(QT_CFLAGS) \
|
||||
$(DARWIN_CFLAGS) $(ANDROID_CFLAGS) \
|
||||
$(OPENH264_CFLAGS) $(WEBRTC_CFLAGS)
|
||||
$(OPENH264_CFLAGS)
|
||||
export LDFLAGS += $(SDL_LDFLAGS) $(FFMPEG_LDFLAGS) $(V4L2_LDFLAGS) \
|
||||
$(OPENH264_LDFLAGS) $(WEBRTC_LDFLAGS)
|
||||
$(OPENH264_LDFLAGS)
|
||||
|
||||
# Define the desired sound device backend
|
||||
# Valid values are:
|
||||
|
@ -203,6 +199,22 @@ export CFLAGS += -I$(THIRD_PARTY)/yuv/include
|
|||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# libwebrtc
|
||||
#
|
||||
ifeq (@ac_no_webrtc@,1)
|
||||
export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=0
|
||||
else
|
||||
export CFLAGS += -DPJMEDIA_HAS_WEBRTC_AEC=1
|
||||
ifneq ($(findstring arm,$(@ac_webrtc_instset@)),)
|
||||
export CFLAGS += -DPJMEDIA_WEBRTC_AEC_USE_MOBILE=1
|
||||
endif
|
||||
|
||||
ifeq (@ac_external_webrtc@,0)
|
||||
export CFLAGS += -I$(THIRD_PARTY)/webrtc/src
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
#
|
||||
# MacOSX specific
|
||||
|
|
|
@ -64,3 +64,43 @@ else
|
|||
DIRS += yuv
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq (@ac_no_webrtc@,1)
|
||||
ifeq (@ac_external_webrtc@,1)
|
||||
# External webrtc
|
||||
else
|
||||
DIRS += webrtc
|
||||
WEBRTC_OTHER_CFLAGS = -fexceptions -DWEBRTC_POSIX=1 @ac_webrtc_cflags@
|
||||
ifneq ($(findstring sse2,@ac_webrtc_instset@),)
|
||||
WEBRTC_SRC = \
|
||||
modules/audio_processing/aec/aec_core_sse2.o \
|
||||
modules/audio_processing/aec/aec_rdft_sse2.o \
|
||||
modules/audio_processing/aecm/aecm_core_c.o \
|
||||
modules/audio_processing/ns/nsx_core_c.o \
|
||||
system_wrappers/source/cpu_features.o
|
||||
else ifneq ($(findstring neon,@ac_webrtc_instset@),)
|
||||
WEBRTC_SRC = \
|
||||
modules/audio_processing/aec/aec_core_neon.o \
|
||||
modules/audio_processing/aec/aec_rdft_neon.o \
|
||||
modules/audio_processing/aecm/aecm_core_c.o \
|
||||
modules/audio_processing/aecm/aecm_core_neon.o \
|
||||
modules/audio_processing/ns/nsx_core_c.o \
|
||||
modules/audio_processing/ns/nsx_core_neon.o \
|
||||
common_audio/signal_processing/cross_correlation_neon.o \
|
||||
common_audio/signal_processing/downsample_fast_neon.o \
|
||||
common_audio/signal_processing/min_max_operations_neon.o
|
||||
WEBRTC_OTHER_CFLAGS += -DWEBRTC_HAS_NEON
|
||||
else ifneq ($(findstring mips,@ac_webrtc_instset@),)
|
||||
WEBRTC_SRC = \
|
||||
modules/audio_processing/aec/aec_core_mips.o \
|
||||
modules/audio_processing/aec/aec_rdft_mips.o \
|
||||
modules/audio_processing/aecm/aecm_core_mips.o \
|
||||
modules/audio_processing/ns/nsx_core_mips.o \
|
||||
common_audio/signal_processing/cross_correlation_mips.o \
|
||||
common_audio/signal_processing/downsample_fast_mips.o \
|
||||
common_audio/signal_processing/min_max_operations_mips.o
|
||||
|
||||
WEBRTC_OTHER_CFLAGS += -DMIPS_FPU_LE
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
include ../../../build.mak
|
||||
include ../../../build/common.mak
|
||||
include ../os-$(OS_NAME).mak
|
||||
|
||||
export LIBDIR := ../../lib
|
||||
|
||||
RULES_MAK := $(PJDIR)/build/rules.mak
|
||||
|
||||
export WEBRTC_LIB := libwebrtc-$(TARGET_NAME)$(LIBEXT)
|
||||
|
||||
ifeq ($(PJ_SHARED_LIBRARIES),)
|
||||
else
|
||||
export WEBRTC_SONAME := libwebrtc.$(SHLIB_SUFFIX)
|
||||
export WEBRTC_SHLIB := $(WEBRTC_SONAME).$(PJ_VERSION_MAJOR)
|
||||
endif
|
||||
|
||||
###############################################################################
|
||||
# Gather all flags.
|
||||
#
|
||||
export _CFLAGS := $(CC_CFLAGS) $(OS_CFLAGS) $(HOST_CFLAGS) $(M_CFLAGS) \
|
||||
$(CFLAGS) $(CC_INC). $(CC_INC)../../webrtc/src \
|
||||
$(CC_INC)../../../pjlib/include
|
||||
export _CXXFLAGS:= $(_CFLAGS) $(CC_CXXFLAGS) $(OS_CXXFLAGS) $(M_CXXFLAGS) \
|
||||
$(HOST_CXXFLAGS) $(CXXFLAGS)
|
||||
export _LDFLAGS := $(CC_LDFLAGS) $(OS_LDFLAGS) $(M_LDFLAGS) $(HOST_LDFLAGS) \
|
||||
$(LDFLAGS)
|
||||
|
||||
export WEBRTC_SRCDIR = ../../webrtc/src/webrtc/
|
||||
export WEBRTC_OBJS = \
|
||||
modules/audio_processing/aec/aec_core.o \
|
||||
modules/audio_processing/aec/aec_rdft.o \
|
||||
modules/audio_processing/aec/aec_resampler.o \
|
||||
modules/audio_processing/aec/echo_cancellation.o \
|
||||
modules/audio_processing/aecm/aecm_core.o \
|
||||
modules/audio_processing/aecm/echo_control_mobile.o \
|
||||
modules/audio_processing/ns/noise_suppression.o \
|
||||
modules/audio_processing/ns/noise_suppression_x.o \
|
||||
modules/audio_processing/ns/ns_core.o \
|
||||
modules/audio_processing/ns/nsx_core.o \
|
||||
modules/audio_processing/utility/delay_estimator_wrapper.o \
|
||||
modules/audio_processing/utility/delay_estimator.o \
|
||||
common_audio/fft4g.o \
|
||||
common_audio/ring_buffer.o \
|
||||
common_audio/signal_processing/complex_bit_reverse.o \
|
||||
common_audio/signal_processing/complex_fft.o \
|
||||
common_audio/signal_processing/copy_set_operations.o \
|
||||
common_audio/signal_processing/cross_correlation.o \
|
||||
common_audio/signal_processing/division_operations.o \
|
||||
common_audio/signal_processing/downsample_fast.o \
|
||||
common_audio/signal_processing/energy.o \
|
||||
common_audio/signal_processing/get_scaling_square.o \
|
||||
common_audio/signal_processing/min_max_operations.o \
|
||||
common_audio/signal_processing/randomization_functions.o \
|
||||
common_audio/signal_processing/real_fft.o \
|
||||
common_audio/signal_processing/spl_init.o \
|
||||
common_audio/signal_processing/spl_sqrt.o \
|
||||
common_audio/signal_processing/spl_sqrt_floor.o \
|
||||
common_audio/signal_processing/vector_scaling_operations.o \
|
||||
$(WEBRTC_SRC)
|
||||
|
||||
|
||||
export WEBRTC_CFLAGS = $(_CFLAGS) $(WEBRTC_OTHER_CFLAGS)
|
||||
export WEBRTC_CXXFLAGS = $(WEBRTC_CFLAGS)
|
||||
|
||||
|
||||
export CC_OUT CC AR RANLIB HOST_MV HOST_RM HOST_RMDIR HOST_MKDIR OBJEXT LD LDOUT
|
||||
###############################################################################
|
||||
# Main entry
|
||||
#
|
||||
# $(TARGET) is defined in os-$(OS_NAME).mak file in current directory.
|
||||
#
|
||||
TARGETS := $(WEBRTC_LIB) $(WEBRTC_SONAME)
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
doc:
|
||||
cd .. && doxygen docs/doxygen.cfg
|
||||
|
||||
dep: depend
|
||||
distclean: realclean
|
||||
|
||||
.PHONY: all dep depend clean realclean distclean
|
||||
.PHONY: $(TARGETS)
|
||||
.PHONY: $(WEBRTC_LIB) $(WEBRTC_SONAME)
|
||||
|
||||
libwebrtc: $(WEBRTC_LIB)
|
||||
$(WEBRTC_SONAME): $(WEBRTC_LIB)
|
||||
$(WEBRTC_LIB) $(WEBRTC_SONAME):
|
||||
$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $(subst /,$(HOST_PSEP),$(LIBDIR)/$@)
|
||||
|
||||
clean print_lib:
|
||||
$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
|
||||
|
||||
realclean:
|
||||
$(subst @@,$(subst /,$(HOST_PSEP),.webrtc-$(TARGET_NAME).depend),$(HOST_RMR))
|
||||
|
||||
$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
|
||||
|
||||
depend:
|
||||
$(MAKE) -f $(RULES_MAK) APP=WEBRTC app=libwebrtc $@
|
|
@ -0,0 +1,2 @@
|
|||
Notes:
|
||||
* Source code for webrtc from https://chromium.googlesource.com/external/webrtc circa Oct 2015.
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_
|
||||
#define WEBRTC_COMMON_AUDIO_FFT4G_H_
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Refer to fft4g.c for documentation.
|
||||
void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_FFT4G_H_
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
|
||||
// otherwise specified, functions return 0 on success and -1 on error.
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
enum Wrap {
|
||||
SAME_WRAP,
|
||||
DIFF_WRAP
|
||||
};
|
||||
|
||||
struct RingBuffer {
|
||||
size_t read_pos;
|
||||
size_t write_pos;
|
||||
size_t element_count;
|
||||
size_t element_size;
|
||||
enum Wrap rw_wrap;
|
||||
char* data;
|
||||
};
|
||||
|
||||
// Get address of region(s) from which we can read data.
|
||||
// If the region is contiguous, |data_ptr_bytes_2| will be zero.
|
||||
// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second
|
||||
// region. Returns room available to be read or |element_count|, whichever is
|
||||
// smaller.
|
||||
static size_t GetBufferReadRegions(RingBuffer* buf,
|
||||
size_t element_count,
|
||||
void** data_ptr_1,
|
||||
size_t* data_ptr_bytes_1,
|
||||
void** data_ptr_2,
|
||||
size_t* data_ptr_bytes_2) {
|
||||
|
||||
const size_t readable_elements = WebRtc_available_read(buf);
|
||||
const size_t read_elements = (readable_elements < element_count ?
|
||||
readable_elements : element_count);
|
||||
const size_t margin = buf->element_count - buf->read_pos;
|
||||
|
||||
// Check to see if read is not contiguous.
|
||||
if (read_elements > margin) {
|
||||
// Write data in two blocks that wrap the buffer.
|
||||
*data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
|
||||
*data_ptr_bytes_1 = margin * buf->element_size;
|
||||
*data_ptr_2 = buf->data;
|
||||
*data_ptr_bytes_2 = (read_elements - margin) * buf->element_size;
|
||||
} else {
|
||||
*data_ptr_1 = buf->data + buf->read_pos * buf->element_size;
|
||||
*data_ptr_bytes_1 = read_elements * buf->element_size;
|
||||
*data_ptr_2 = NULL;
|
||||
*data_ptr_bytes_2 = 0;
|
||||
}
|
||||
|
||||
return read_elements;
|
||||
}
|
||||
|
||||
RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) {
|
||||
RingBuffer* self = NULL;
|
||||
if (element_count == 0 || element_size == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self = malloc(sizeof(RingBuffer));
|
||||
if (!self) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self->data = malloc(element_count * element_size);
|
||||
if (!self->data) {
|
||||
free(self);
|
||||
self = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self->element_count = element_count;
|
||||
self->element_size = element_size;
|
||||
WebRtc_InitBuffer(self);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void WebRtc_InitBuffer(RingBuffer* self) {
|
||||
self->read_pos = 0;
|
||||
self->write_pos = 0;
|
||||
self->rw_wrap = SAME_WRAP;
|
||||
|
||||
// Initialize buffer to zeros
|
||||
memset(self->data, 0, self->element_count * self->element_size);
|
||||
}
|
||||
|
||||
void WebRtc_FreeBuffer(void* handle) {
|
||||
RingBuffer* self = (RingBuffer*)handle;
|
||||
if (!self) {
|
||||
return;
|
||||
}
|
||||
|
||||
free(self->data);
|
||||
free(self);
|
||||
}
|
||||
|
||||
size_t WebRtc_ReadBuffer(RingBuffer* self,
|
||||
void** data_ptr,
|
||||
void* data,
|
||||
size_t element_count) {
|
||||
|
||||
if (self == NULL) {
|
||||
return 0;
|
||||
}
|
||||
if (data == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
{
|
||||
void* buf_ptr_1 = NULL;
|
||||
void* buf_ptr_2 = NULL;
|
||||
size_t buf_ptr_bytes_1 = 0;
|
||||
size_t buf_ptr_bytes_2 = 0;
|
||||
const size_t read_count = GetBufferReadRegions(self,
|
||||
element_count,
|
||||
&buf_ptr_1,
|
||||
&buf_ptr_bytes_1,
|
||||
&buf_ptr_2,
|
||||
&buf_ptr_bytes_2);
|
||||
|
||||
if (buf_ptr_bytes_2 > 0) {
|
||||
// We have a wrap around when reading the buffer. Copy the buffer data to
|
||||
// |data| and point to it.
|
||||
memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
|
||||
memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2);
|
||||
buf_ptr_1 = data;
|
||||
} else if (!data_ptr) {
|
||||
// No wrap, but a memcpy was requested.
|
||||
memcpy(data, buf_ptr_1, buf_ptr_bytes_1);
|
||||
}
|
||||
if (data_ptr) {
|
||||
// |buf_ptr_1| == |data| in the case of a wrap.
|
||||
*data_ptr = buf_ptr_1;
|
||||
}
|
||||
|
||||
// Update read position
|
||||
WebRtc_MoveReadPtr(self, (int) read_count);
|
||||
|
||||
return read_count;
|
||||
}
|
||||
}
|
||||
|
||||
size_t WebRtc_WriteBuffer(RingBuffer* self,
|
||||
const void* data,
|
||||
size_t element_count) {
|
||||
if (!self) {
|
||||
return 0;
|
||||
}
|
||||
if (!data) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
{
|
||||
const size_t free_elements = WebRtc_available_write(self);
|
||||
const size_t write_elements = (free_elements < element_count ? free_elements
|
||||
: element_count);
|
||||
size_t n = write_elements;
|
||||
const size_t margin = self->element_count - self->write_pos;
|
||||
|
||||
if (write_elements > margin) {
|
||||
// Buffer wrap around when writing.
|
||||
memcpy(self->data + self->write_pos * self->element_size,
|
||||
data, margin * self->element_size);
|
||||
self->write_pos = 0;
|
||||
n -= margin;
|
||||
self->rw_wrap = DIFF_WRAP;
|
||||
}
|
||||
memcpy(self->data + self->write_pos * self->element_size,
|
||||
((const char*) data) + ((write_elements - n) * self->element_size),
|
||||
n * self->element_size);
|
||||
self->write_pos += n;
|
||||
|
||||
return write_elements;
|
||||
}
|
||||
}
|
||||
|
||||
int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) {
|
||||
if (!self) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
{
|
||||
// We need to be able to take care of negative changes, hence use "int"
|
||||
// instead of "size_t".
|
||||
const int free_elements = (int) WebRtc_available_write(self);
|
||||
const int readable_elements = (int) WebRtc_available_read(self);
|
||||
int read_pos = (int) self->read_pos;
|
||||
|
||||
if (element_count > readable_elements) {
|
||||
element_count = readable_elements;
|
||||
}
|
||||
if (element_count < -free_elements) {
|
||||
element_count = -free_elements;
|
||||
}
|
||||
|
||||
read_pos += element_count;
|
||||
if (read_pos > (int) self->element_count) {
|
||||
// Buffer wrap around. Restart read position and wrap indicator.
|
||||
read_pos -= (int) self->element_count;
|
||||
self->rw_wrap = SAME_WRAP;
|
||||
}
|
||||
if (read_pos < 0) {
|
||||
// Buffer wrap around. Restart read position and wrap indicator.
|
||||
read_pos += (int) self->element_count;
|
||||
self->rw_wrap = DIFF_WRAP;
|
||||
}
|
||||
|
||||
self->read_pos = (size_t) read_pos;
|
||||
|
||||
return element_count;
|
||||
}
|
||||
}
|
||||
|
||||
size_t WebRtc_available_read(const RingBuffer* self) {
|
||||
if (!self) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (self->rw_wrap == SAME_WRAP) {
|
||||
return self->write_pos - self->read_pos;
|
||||
} else {
|
||||
return self->element_count - self->read_pos + self->write_pos;
|
||||
}
|
||||
}
|
||||
|
||||
size_t WebRtc_available_write(const RingBuffer* self) {
|
||||
if (!self) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return self->element_count - WebRtc_available_read(self);
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// A ring buffer to hold arbitrary data. Provides no thread safety. Unless
|
||||
// otherwise specified, functions return 0 on success and -1 on error.
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
|
||||
#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
typedef struct RingBuffer RingBuffer;
|
||||
|
||||
// Creates and initializes the buffer. Returns NULL on failure.
|
||||
RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size);
|
||||
void WebRtc_InitBuffer(RingBuffer* handle);
|
||||
void WebRtc_FreeBuffer(void* handle);
|
||||
|
||||
// Reads data from the buffer. The |data_ptr| will point to the address where
|
||||
// it is located. If all |element_count| data are feasible to read without
|
||||
// buffer wrap around |data_ptr| will point to the location in the buffer.
|
||||
// Otherwise, the data will be copied to |data| (memory allocation done by the
|
||||
// user) and |data_ptr| points to the address of |data|. |data_ptr| is only
|
||||
// guaranteed to be valid until the next call to WebRtc_WriteBuffer().
|
||||
//
|
||||
// To force a copying to |data|, pass a NULL |data_ptr|.
|
||||
//
|
||||
// Returns number of elements read.
|
||||
size_t WebRtc_ReadBuffer(RingBuffer* handle,
|
||||
void** data_ptr,
|
||||
void* data,
|
||||
size_t element_count);
|
||||
|
||||
// Writes |data| to buffer and returns the number of elements written.
|
||||
size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data,
|
||||
size_t element_count);
|
||||
|
||||
// Moves the buffer read position and returns the number of elements moved.
|
||||
// Positive |element_count| moves the read position towards the write position,
|
||||
// that is, flushing the buffer. Negative |element_count| moves the read
|
||||
// position away from the the write position, that is, stuffing the buffer.
|
||||
// Returns number of elements moved.
|
||||
int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count);
|
||||
|
||||
// Returns number of available elements to read.
|
||||
size_t WebRtc_available_read(const RingBuffer* handle);
|
||||
|
||||
// Returns number of available elements for write.
|
||||
size_t WebRtc_available_write(const RingBuffer* handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_
|
103
third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
vendored
Normal file
103
third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_AutoCorrToReflCoef().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
|
||||
{
|
||||
int i, n;
|
||||
int16_t tmp;
|
||||
const int32_t *rptr;
|
||||
int32_t L_num, L_den;
|
||||
int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
|
||||
P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
|
||||
|
||||
// Initialize loop and pointers.
|
||||
acfptr = ACF;
|
||||
rptr = R;
|
||||
pptr = P;
|
||||
p1ptr = &P[1];
|
||||
w1ptr = &W[1];
|
||||
wptr = w1ptr;
|
||||
|
||||
// First loop; n=0. Determine shifting.
|
||||
tmp = WebRtcSpl_NormW32(*R);
|
||||
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
|
||||
*pptr++ = *acfptr++;
|
||||
|
||||
// Initialize ACF, P and W.
|
||||
for (i = 1; i <= use_order; i++)
|
||||
{
|
||||
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
|
||||
*wptr++ = *acfptr;
|
||||
*pptr++ = *acfptr++;
|
||||
}
|
||||
|
||||
// Compute reflection coefficients.
|
||||
for (n = 1; n <= use_order; n++, K++)
|
||||
{
|
||||
tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
|
||||
if (*P < tmp)
|
||||
{
|
||||
for (i = n; i <= use_order; i++)
|
||||
*K++ = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Division: WebRtcSpl_div(tmp, *P)
|
||||
*K = 0;
|
||||
if (tmp != 0)
|
||||
{
|
||||
L_num = tmp;
|
||||
L_den = *P;
|
||||
i = 15;
|
||||
while (i--)
|
||||
{
|
||||
(*K) <<= 1;
|
||||
L_num <<= 1;
|
||||
if (L_num >= L_den)
|
||||
{
|
||||
L_num -= L_den;
|
||||
(*K)++;
|
||||
}
|
||||
}
|
||||
if (*p1ptr > 0)
|
||||
*K = -*K;
|
||||
}
|
||||
|
||||
// Last iteration; don't do Schur recursion.
|
||||
if (n == use_order)
|
||||
return;
|
||||
|
||||
// Schur recursion.
|
||||
pptr = P;
|
||||
wptr = w1ptr;
|
||||
tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
|
||||
*pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
|
||||
pptr++;
|
||||
for (i = 1; i <= use_order - n; i++)
|
||||
{
|
||||
tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
|
||||
*pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
|
||||
pptr++;
|
||||
tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
|
||||
*wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
}
|
65
third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
vendored
Normal file
65
third_party/webrtc/src/webrtc/common_audio/signal_processing/auto_correlation.c
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
|
||||
size_t in_vector_length,
|
||||
size_t order,
|
||||
int32_t* result,
|
||||
int* scale) {
|
||||
int32_t sum = 0;
|
||||
size_t i = 0, j = 0;
|
||||
int16_t smax = 0;
|
||||
int scaling = 0;
|
||||
|
||||
assert(order <= in_vector_length);
|
||||
|
||||
// Find the maximum absolute value of the samples.
|
||||
smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length);
|
||||
|
||||
// In order to avoid overflow when computing the sum we should scale the
|
||||
// samples so that (in_vector_length * smax * smax) will not overflow.
|
||||
if (smax == 0) {
|
||||
scaling = 0;
|
||||
} else {
|
||||
// Number of bits in the sum loop.
|
||||
int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length);
|
||||
// Number of bits to normalize smax.
|
||||
int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
|
||||
|
||||
if (t > nbits) {
|
||||
scaling = 0;
|
||||
} else {
|
||||
scaling = nbits - t;
|
||||
}
|
||||
}
|
||||
|
||||
// Perform the actual correlation calculation.
|
||||
for (i = 0; i < order + 1; i++) {
|
||||
sum = 0;
|
||||
/* Unroll the loop to improve performance. */
|
||||
for (j = 0; i + j + 3 < in_vector_length; j += 4) {
|
||||
sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling;
|
||||
sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling;
|
||||
sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling;
|
||||
sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling;
|
||||
}
|
||||
for (; j < in_vector_length - i; j++) {
|
||||
sum += (in_vector[j] * in_vector[i + j]) >> scaling;
|
||||
}
|
||||
*result++ = sum;
|
||||
}
|
||||
|
||||
*scale = scaling;
|
||||
return order + 1;
|
||||
}
|
108
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
vendored
Normal file
108
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse.c
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
/* Tables for data buffer indexes that are bit reversed and thus need to be
|
||||
* swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
|
||||
* operations, while index_7[{1, 3, 5, ...}] are for the right side of the
|
||||
* operation. Same for index_8.
|
||||
*/
|
||||
|
||||
/* Indexes for the case of stages == 7. */
|
||||
static const int16_t index_7[112] = {
|
||||
1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
|
||||
12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
|
||||
23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
|
||||
37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
|
||||
51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
|
||||
81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
|
||||
103, 115, 111, 123
|
||||
};
|
||||
|
||||
/* Indexes for the case of stages == 8. */
|
||||
static const int16_t index_8[240] = {
|
||||
1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
|
||||
11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
|
||||
40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
|
||||
30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
|
||||
148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
|
||||
51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
|
||||
124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
|
||||
75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
|
||||
234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
|
||||
166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
|
||||
115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
|
||||
193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
|
||||
149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
|
||||
213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
|
||||
203, 211, 207, 243, 215, 235, 223, 251, 239, 247
|
||||
};
|
||||
|
||||
void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
|
||||
/* For any specific value of stages, we know exactly the indexes that are
|
||||
* bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
|
||||
* stages are 7 and 8, so we use tables to save unnecessary iterations and
|
||||
* calculations for these two cases.
|
||||
*/
|
||||
if (stages == 7 || stages == 8) {
|
||||
int m = 0;
|
||||
int length = 112;
|
||||
const int16_t* index = index_7;
|
||||
|
||||
if (stages == 8) {
|
||||
length = 240;
|
||||
index = index_8;
|
||||
}
|
||||
|
||||
/* Decimation in time. Swap the elements with bit-reversed indexes. */
|
||||
for (m = 0; m < length; m += 2) {
|
||||
/* We declare a int32_t* type pointer, to load both the 16-bit real
|
||||
* and imaginary elements from complex_data in one instruction, reducing
|
||||
* complexity.
|
||||
*/
|
||||
int32_t* complex_data_ptr = (int32_t*)complex_data;
|
||||
int32_t temp = 0;
|
||||
|
||||
temp = complex_data_ptr[index[m]]; /* Real and imaginary */
|
||||
complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
|
||||
complex_data_ptr[index[m + 1]] = temp;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int m = 0, mr = 0, l = 0;
|
||||
int n = 1 << stages;
|
||||
int nn = n - 1;
|
||||
|
||||
/* Decimation in time - re-order data */
|
||||
for (m = 1; m <= nn; ++m) {
|
||||
int32_t* complex_data_ptr = (int32_t*)complex_data;
|
||||
int32_t temp = 0;
|
||||
|
||||
/* Find out indexes that are bit-reversed. */
|
||||
l = n;
|
||||
do {
|
||||
l >>= 1;
|
||||
} while (l > nn - mr);
|
||||
mr = (mr & (l - 1)) + l;
|
||||
|
||||
if (mr <= m) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Swap the elements with bit-reversed indexes.
|
||||
* This is similar to the loop in the stages == 7 or 8 cases.
|
||||
*/
|
||||
temp = complex_data_ptr[m]; /* Real and imaginary */
|
||||
complex_data_ptr[m] = complex_data_ptr[mr];
|
||||
complex_data_ptr[mr] = temp;
|
||||
}
|
||||
}
|
||||
}
|
119
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
vendored
Normal file
119
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_arm.S
vendored
Normal file
|
@ -0,0 +1,119 @@
|
|||
@
|
||||
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
|
||||
@ for ARMv5 platforms.
|
||||
@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
|
||||
|
||||
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
||||
|
||||
GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
|
||||
push {r4-r7}
|
||||
|
||||
cmp r1, #7
|
||||
adr r3, index_7 @ Table pointer.
|
||||
mov r4, #112 @ Number of interations.
|
||||
beq PRE_LOOP_STAGES_7_OR_8
|
||||
|
||||
cmp r1, #8
|
||||
adr r3, index_8 @ Table pointer.
|
||||
mov r4, #240 @ Number of interations.
|
||||
beq PRE_LOOP_STAGES_7_OR_8
|
||||
|
||||
mov r3, #1 @ Initialize m.
|
||||
mov r1, r3, asl r1 @ n = 1 << stages;
|
||||
subs r6, r1, #1 @ nn = n - 1;
|
||||
ble END
|
||||
|
||||
mov r5, r0 @ &complex_data
|
||||
mov r4, #0 @ ml
|
||||
|
||||
LOOP_GENERIC:
|
||||
rsb r12, r4, r6 @ l > nn - mr
|
||||
mov r2, r1 @ n
|
||||
|
||||
LOOP_SHIFT:
|
||||
asr r2, #1 @ l >>= 1;
|
||||
cmp r2, r12
|
||||
bgt LOOP_SHIFT
|
||||
|
||||
sub r12, r2, #1
|
||||
and r4, r12, r4
|
||||
add r4, r2 @ mr = (mr & (l - 1)) + l;
|
||||
cmp r4, r3 @ mr <= m ?
|
||||
ble UPDATE_REGISTERS
|
||||
|
||||
mov r12, r4, asl #2
|
||||
ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1].
|
||||
@ Offset 4 due to m incrementing from 1.
|
||||
ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1].
|
||||
str r7, [r0, r12]
|
||||
str r2, [r5, #4]
|
||||
|
||||
UPDATE_REGISTERS:
|
||||
add r3, r3, #1
|
||||
add r5, #4
|
||||
cmp r3, r1
|
||||
bne LOOP_GENERIC
|
||||
|
||||
b END
|
||||
|
||||
PRE_LOOP_STAGES_7_OR_8:
|
||||
add r4, r3, r4, asl #1
|
||||
|
||||
LOOP_STAGES_7_OR_8:
|
||||
ldrsh r2, [r3], #2 @ index[m]
|
||||
ldrsh r5, [r3], #2 @ index[m + 1]
|
||||
ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1]
|
||||
ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1]
|
||||
cmp r3, r4
|
||||
str r1, [r0, r5]
|
||||
str r12, [r0, r2]
|
||||
bne LOOP_STAGES_7_OR_8
|
||||
|
||||
END:
|
||||
pop {r4-r7}
|
||||
bx lr
|
||||
|
||||
@ The index tables. Note the values are doubles of the actual indexes for 16-bit
|
||||
@ elements, different from the generic C code. It actually provides byte offsets
|
||||
@ for the indexes.
|
||||
|
||||
.align 2
|
||||
index_7: @ Indexes for stages == 7.
|
||||
.short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
|
||||
.short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
|
||||
.short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
|
||||
.short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
|
||||
.short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
|
||||
.short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
|
||||
.short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
|
||||
.short 468, 364, 436, 380, 500, 412, 460, 444, 492
|
||||
|
||||
index_8: @ Indexes for stages == 8.
|
||||
.short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
|
||||
.short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
|
||||
.short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
|
||||
.short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
|
||||
.short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
|
||||
.short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
|
||||
.short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
|
||||
.short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
|
||||
.short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
|
||||
.short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
|
||||
.short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
|
||||
.short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
|
||||
.short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
|
||||
.short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
|
||||
.short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
|
||||
.short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
|
||||
.short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
|
176
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
vendored
Normal file
176
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_bit_reverse_mips.c
vendored
Normal file
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
static int16_t coefTable_7[] = {
|
||||
4, 256, 8, 128, 12, 384, 16, 64,
|
||||
20, 320, 24, 192, 28, 448, 36, 288,
|
||||
40, 160, 44, 416, 48, 96, 52, 352,
|
||||
56, 224, 60, 480, 68, 272, 72, 144,
|
||||
76, 400, 84, 336, 88, 208, 92, 464,
|
||||
100, 304, 104, 176, 108, 432, 116, 368,
|
||||
120, 240, 124, 496, 132, 264, 140, 392,
|
||||
148, 328, 152, 200, 156, 456, 164, 296,
|
||||
172, 424, 180, 360, 184, 232, 188, 488,
|
||||
196, 280, 204, 408, 212, 344, 220, 472,
|
||||
228, 312, 236, 440, 244, 376, 252, 504,
|
||||
268, 388, 276, 324, 284, 452, 300, 420,
|
||||
308, 356, 316, 484, 332, 404, 348, 468,
|
||||
364, 436, 380, 500, 412, 460, 444, 492
|
||||
};
|
||||
|
||||
static int16_t coefTable_8[] = {
|
||||
4, 512, 8, 256, 12, 768, 16, 128,
|
||||
20, 640, 24, 384, 28, 896, 32, 64,
|
||||
36, 576, 40, 320, 44, 832, 48, 192,
|
||||
52, 704, 56, 448, 60, 960, 68, 544,
|
||||
72, 288, 76, 800, 80, 160, 84, 672,
|
||||
88, 416, 92, 928, 100, 608, 104, 352,
|
||||
108, 864, 112, 224, 116, 736, 120, 480,
|
||||
124, 992, 132, 528, 136, 272, 140, 784,
|
||||
148, 656, 152, 400, 156, 912, 164, 592,
|
||||
168, 336, 172, 848, 176, 208, 180, 720,
|
||||
184, 464, 188, 976, 196, 560, 200, 304,
|
||||
204, 816, 212, 688, 216, 432, 220, 944,
|
||||
228, 624, 232, 368, 236, 880, 244, 752,
|
||||
248, 496, 252, 1008, 260, 520, 268, 776,
|
||||
276, 648, 280, 392, 284, 904, 292, 584,
|
||||
296, 328, 300, 840, 308, 712, 312, 456,
|
||||
316, 968, 324, 552, 332, 808, 340, 680,
|
||||
344, 424, 348, 936, 356, 616, 364, 872,
|
||||
372, 744, 376, 488, 380, 1000, 388, 536,
|
||||
396, 792, 404, 664, 412, 920, 420, 600,
|
||||
428, 856, 436, 728, 440, 472, 444, 984,
|
||||
452, 568, 460, 824, 468, 696, 476, 952,
|
||||
484, 632, 492, 888, 500, 760, 508, 1016,
|
||||
524, 772, 532, 644, 540, 900, 548, 580,
|
||||
556, 836, 564, 708, 572, 964, 588, 804,
|
||||
596, 676, 604, 932, 620, 868, 628, 740,
|
||||
636, 996, 652, 788, 668, 916, 684, 852,
|
||||
692, 724, 700, 980, 716, 820, 732, 948,
|
||||
748, 884, 764, 1012, 796, 908, 812, 844,
|
||||
828, 972, 860, 940, 892, 1004, 956, 988
|
||||
};
|
||||
|
||||
void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
|
||||
int l;
|
||||
int16_t tr, ti;
|
||||
int32_t tmp1, tmp2, tmp3, tmp4;
|
||||
int32_t* ptr_i;
|
||||
int32_t* ptr_j;
|
||||
|
||||
if (stages == 8) {
|
||||
int16_t* pcoeftable_8 = coefTable_8;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[l], $zero, 120 \n\t"
|
||||
"1: \n\t"
|
||||
"addiu %[l], %[l], -4 \n\t"
|
||||
"lh %[tr], 0(%[pcoeftable_8]) \n\t"
|
||||
"lh %[ti], 2(%[pcoeftable_8]) \n\t"
|
||||
"lh %[tmp3], 4(%[pcoeftable_8]) \n\t"
|
||||
"lh %[tmp4], 6(%[pcoeftable_8]) \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tr] \n\t"
|
||||
"addu %[ptr_j], %[frfi], %[ti] \n\t"
|
||||
"addu %[tr], %[frfi], %[tmp3] \n\t"
|
||||
"addu %[ti], %[frfi], %[tmp4] \n\t"
|
||||
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
|
||||
"ulw %[tmp3], 0(%[tr]) \n\t"
|
||||
"ulw %[tmp4], 0(%[ti]) \n\t"
|
||||
"usw %[tmp1], 0(%[ptr_j]) \n\t"
|
||||
"usw %[tmp2], 0(%[ptr_i]) \n\t"
|
||||
"usw %[tmp4], 0(%[tr]) \n\t"
|
||||
"usw %[tmp3], 0(%[ti]) \n\t"
|
||||
"lh %[tmp1], 8(%[pcoeftable_8]) \n\t"
|
||||
"lh %[tmp2], 10(%[pcoeftable_8]) \n\t"
|
||||
"lh %[tr], 12(%[pcoeftable_8]) \n\t"
|
||||
"lh %[ti], 14(%[pcoeftable_8]) \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
|
||||
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
|
||||
"addu %[tr], %[frfi], %[tr] \n\t"
|
||||
"addu %[ti], %[frfi], %[ti] \n\t"
|
||||
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
|
||||
"ulw %[tmp3], 0(%[tr]) \n\t"
|
||||
"ulw %[tmp4], 0(%[ti]) \n\t"
|
||||
"usw %[tmp1], 0(%[ptr_j]) \n\t"
|
||||
"usw %[tmp2], 0(%[ptr_i]) \n\t"
|
||||
"usw %[tmp4], 0(%[tr]) \n\t"
|
||||
"usw %[tmp3], 0(%[ti]) \n\t"
|
||||
"bgtz %[l], 1b \n\t"
|
||||
" addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t"
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
|
||||
[ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
|
||||
[tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
|
||||
[ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
|
||||
: [frfi] "r" (frfi)
|
||||
: "memory"
|
||||
);
|
||||
} else if (stages == 7) {
|
||||
int16_t* pcoeftable_7 = coefTable_7;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[l], $zero, 56 \n\t"
|
||||
"1: \n\t"
|
||||
"addiu %[l], %[l], -4 \n\t"
|
||||
"lh %[tr], 0(%[pcoeftable_7]) \n\t"
|
||||
"lh %[ti], 2(%[pcoeftable_7]) \n\t"
|
||||
"lh %[tmp3], 4(%[pcoeftable_7]) \n\t"
|
||||
"lh %[tmp4], 6(%[pcoeftable_7]) \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tr] \n\t"
|
||||
"addu %[ptr_j], %[frfi], %[ti] \n\t"
|
||||
"addu %[tr], %[frfi], %[tmp3] \n\t"
|
||||
"addu %[ti], %[frfi], %[tmp4] \n\t"
|
||||
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
|
||||
"ulw %[tmp3], 0(%[tr]) \n\t"
|
||||
"ulw %[tmp4], 0(%[ti]) \n\t"
|
||||
"usw %[tmp1], 0(%[ptr_j]) \n\t"
|
||||
"usw %[tmp2], 0(%[ptr_i]) \n\t"
|
||||
"usw %[tmp4], 0(%[tr]) \n\t"
|
||||
"usw %[tmp3], 0(%[ti]) \n\t"
|
||||
"lh %[tmp1], 8(%[pcoeftable_7]) \n\t"
|
||||
"lh %[tmp2], 10(%[pcoeftable_7]) \n\t"
|
||||
"lh %[tr], 12(%[pcoeftable_7]) \n\t"
|
||||
"lh %[ti], 14(%[pcoeftable_7]) \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
|
||||
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
|
||||
"addu %[tr], %[frfi], %[tr] \n\t"
|
||||
"addu %[ti], %[frfi], %[ti] \n\t"
|
||||
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
|
||||
"ulw %[tmp3], 0(%[tr]) \n\t"
|
||||
"ulw %[tmp4], 0(%[ti]) \n\t"
|
||||
"usw %[tmp1], 0(%[ptr_j]) \n\t"
|
||||
"usw %[tmp2], 0(%[ptr_i]) \n\t"
|
||||
"usw %[tmp4], 0(%[tr]) \n\t"
|
||||
"usw %[tmp3], 0(%[ti]) \n\t"
|
||||
"bgtz %[l], 1b \n\t"
|
||||
" addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t"
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
|
||||
[ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
|
||||
[l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
|
||||
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
|
||||
: [frfi] "r" (frfi)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_ComplexFFT().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#define CFFTSFT 14
|
||||
#define CFFTRND 1
|
||||
#define CFFTRND2 16384
|
||||
|
||||
#define CIFFTSFT 14
|
||||
#define CIFFTRND 1
|
||||
|
||||
|
||||
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
|
||||
{
|
||||
int i, j, l, k, istep, n, m;
|
||||
int16_t wr, wi;
|
||||
int32_t tr32, ti32, qr32, qi32;
|
||||
|
||||
/* The 1024-value is a constant given from the size of kSinTable1024[],
|
||||
* and should not be changed depending on the input parameter 'stages'
|
||||
*/
|
||||
n = 1 << stages;
|
||||
if (n > 1024)
|
||||
return -1;
|
||||
|
||||
l = 1;
|
||||
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
|
||||
depending on the input parameter 'stages' */
|
||||
|
||||
if (mode == 0)
|
||||
{
|
||||
// mode==0: Low-complexity and Low-accuracy mode
|
||||
while (l < n)
|
||||
{
|
||||
istep = l << 1;
|
||||
|
||||
for (m = 0; m < l; ++m)
|
||||
{
|
||||
j = m << k;
|
||||
|
||||
/* The 256-value is a constant given as 1/4 of the size of
|
||||
* kSinTable1024[], and should not be changed depending on the input
|
||||
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
|
||||
*/
|
||||
wr = kSinTable1024[j + 256];
|
||||
wi = -kSinTable1024[j];
|
||||
|
||||
for (i = m; i < n; i += istep)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
|
||||
|
||||
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
|
||||
|
||||
qr32 = (int32_t)frfi[2 * i];
|
||||
qi32 = (int32_t)frfi[2 * i + 1];
|
||||
frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
|
||||
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
|
||||
frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
|
||||
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
--k;
|
||||
l = istep;
|
||||
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
// mode==1: High-complexity and High-accuracy mode
|
||||
while (l < n)
|
||||
{
|
||||
istep = l << 1;
|
||||
|
||||
for (m = 0; m < l; ++m)
|
||||
{
|
||||
j = m << k;
|
||||
|
||||
/* The 256-value is a constant given as 1/4 of the size of
|
||||
* kSinTable1024[], and should not be changed depending on the input
|
||||
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
|
||||
*/
|
||||
wr = kSinTable1024[j + 256];
|
||||
wi = -kSinTable1024[j];
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
int32_t wri = 0;
|
||||
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
|
||||
"r"((int32_t)wr), "r"((int32_t)wi));
|
||||
#endif
|
||||
|
||||
for (i = m; i < n; i += istep)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
register int32_t frfi_r;
|
||||
__asm __volatile(
|
||||
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
|
||||
" lsl #16\n\t"
|
||||
"smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
|
||||
"smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
|
||||
:[frfi_r]"=&r"(frfi_r),
|
||||
[tr32]"=&r"(tr32),
|
||||
[ti32]"=r"(ti32)
|
||||
:[frfi_even]"r"((int32_t)frfi[2*j]),
|
||||
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
|
||||
[wri]"r"(wri),
|
||||
[cfftrnd]"r"(CFFTRND));
|
||||
#else
|
||||
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
|
||||
|
||||
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
|
||||
#endif
|
||||
|
||||
tr32 >>= 15 - CFFTSFT;
|
||||
ti32 >>= 15 - CFFTSFT;
|
||||
|
||||
qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT;
|
||||
qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT;
|
||||
|
||||
frfi[2 * j] = (int16_t)(
|
||||
(qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
|
||||
frfi[2 * j + 1] = (int16_t)(
|
||||
(qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
|
||||
frfi[2 * i] = (int16_t)(
|
||||
(qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
|
||||
frfi[2 * i + 1] = (int16_t)(
|
||||
(qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
|
||||
}
|
||||
}
|
||||
|
||||
--k;
|
||||
l = istep;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
|
||||
{
|
||||
size_t i, j, l, istep, n, m;
|
||||
int k, scale, shift;
|
||||
int16_t wr, wi;
|
||||
int32_t tr32, ti32, qr32, qi32;
|
||||
int32_t tmp32, round2;
|
||||
|
||||
/* The 1024-value is a constant given from the size of kSinTable1024[],
|
||||
* and should not be changed depending on the input parameter 'stages'
|
||||
*/
|
||||
n = 1 << stages;
|
||||
if (n > 1024)
|
||||
return -1;
|
||||
|
||||
scale = 0;
|
||||
|
||||
l = 1;
|
||||
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
|
||||
depending on the input parameter 'stages' */
|
||||
|
||||
while (l < n)
|
||||
{
|
||||
// variable scaling, depending upon data
|
||||
shift = 0;
|
||||
round2 = 8192;
|
||||
|
||||
tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
|
||||
if (tmp32 > 13573)
|
||||
{
|
||||
shift++;
|
||||
scale++;
|
||||
round2 <<= 1;
|
||||
}
|
||||
if (tmp32 > 27146)
|
||||
{
|
||||
shift++;
|
||||
scale++;
|
||||
round2 <<= 1;
|
||||
}
|
||||
|
||||
istep = l << 1;
|
||||
|
||||
if (mode == 0)
|
||||
{
|
||||
// mode==0: Low-complexity and Low-accuracy mode
|
||||
for (m = 0; m < l; ++m)
|
||||
{
|
||||
j = m << k;
|
||||
|
||||
/* The 256-value is a constant given as 1/4 of the size of
|
||||
* kSinTable1024[], and should not be changed depending on the input
|
||||
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
|
||||
*/
|
||||
wr = kSinTable1024[j + 256];
|
||||
wi = kSinTable1024[j];
|
||||
|
||||
for (i = m; i < n; i += istep)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
|
||||
|
||||
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
|
||||
|
||||
qr32 = (int32_t)frfi[2 * i];
|
||||
qi32 = (int32_t)frfi[2 * i + 1];
|
||||
frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
|
||||
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
|
||||
frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
|
||||
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
|
||||
}
|
||||
}
|
||||
} else
|
||||
{
|
||||
// mode==1: High-complexity and High-accuracy mode
|
||||
|
||||
for (m = 0; m < l; ++m)
|
||||
{
|
||||
j = m << k;
|
||||
|
||||
/* The 256-value is a constant given as 1/4 of the size of
|
||||
* kSinTable1024[], and should not be changed depending on the input
|
||||
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
|
||||
*/
|
||||
wr = kSinTable1024[j + 256];
|
||||
wi = kSinTable1024[j];
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
int32_t wri = 0;
|
||||
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
|
||||
"r"((int32_t)wr), "r"((int32_t)wi));
|
||||
#endif
|
||||
|
||||
for (i = m; i < n; i += istep)
|
||||
{
|
||||
j = i + l;
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
register int32_t frfi_r;
|
||||
__asm __volatile(
|
||||
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
|
||||
"smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
|
||||
"smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
|
||||
:[frfi_r]"=&r"(frfi_r),
|
||||
[tr32]"=&r"(tr32),
|
||||
[ti32]"=r"(ti32)
|
||||
:[frfi_even]"r"((int32_t)frfi[2*j]),
|
||||
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
|
||||
[wri]"r"(wri),
|
||||
[cifftrnd]"r"(CIFFTRND)
|
||||
);
|
||||
#else
|
||||
|
||||
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
|
||||
|
||||
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
|
||||
#endif
|
||||
tr32 >>= 15 - CIFFTSFT;
|
||||
ti32 >>= 15 - CIFFTSFT;
|
||||
|
||||
qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT;
|
||||
qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT;
|
||||
|
||||
frfi[2 * j] = (int16_t)(
|
||||
(qr32 - tr32 + round2) >> (shift + CIFFTSFT));
|
||||
frfi[2 * j + 1] = (int16_t)(
|
||||
(qi32 - ti32 + round2) >> (shift + CIFFTSFT));
|
||||
frfi[2 * i] = (int16_t)(
|
||||
(qr32 + tr32 + round2) >> (shift + CIFFTSFT));
|
||||
frfi[2 * i + 1] = (int16_t)(
|
||||
(qi32 + ti32 + round2) >> (shift + CIFFTSFT));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
--k;
|
||||
l = istep;
|
||||
}
|
||||
return scale;
|
||||
}
|
328
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
vendored
Normal file
328
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_mips.c
vendored
Normal file
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#define CFFTSFT 14
|
||||
#define CFFTRND 1
|
||||
#define CFFTRND2 16384
|
||||
|
||||
#define CIFFTSFT 14
|
||||
#define CIFFTRND 1
|
||||
|
||||
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
|
||||
int i = 0;
|
||||
int l = 0;
|
||||
int k = 0;
|
||||
int istep = 0;
|
||||
int n = 0;
|
||||
int m = 0;
|
||||
int32_t wr = 0, wi = 0;
|
||||
int32_t tmp1 = 0;
|
||||
int32_t tmp2 = 0;
|
||||
int32_t tmp3 = 0;
|
||||
int32_t tmp4 = 0;
|
||||
int32_t tmp5 = 0;
|
||||
int32_t tmp6 = 0;
|
||||
int32_t tmp = 0;
|
||||
int16_t* ptr_j = NULL;
|
||||
int16_t* ptr_i = NULL;
|
||||
|
||||
n = 1 << stages;
|
||||
if (n > 1024) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"addiu %[k], $zero, 10 \n\t"
|
||||
"addiu %[l], $zero, 1 \n\t"
|
||||
"3: \n\t"
|
||||
"sll %[istep], %[l], 1 \n\t"
|
||||
"move %[m], $zero \n\t"
|
||||
"sll %[tmp], %[l], 2 \n\t"
|
||||
"move %[i], $zero \n\t"
|
||||
"2: \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"sllv %[tmp3], %[m], %[k] \n\t"
|
||||
"addiu %[tmp2], %[tmp3], 512 \n\t"
|
||||
"addiu %[m], %[m], 1 \n\t"
|
||||
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
|
||||
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"sllv %[tmp3], %[m], %[k] \n\t"
|
||||
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
|
||||
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
|
||||
"addiu %[m], %[m], 1 \n\t"
|
||||
"lh %[wi], 0(%[ptr_j]) \n\t"
|
||||
"lh %[wr], 0(%[ptr_i]) \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"1: \n\t"
|
||||
"sll %[tmp1], %[i], 2 \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
|
||||
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
|
||||
"lh %[tmp6], 0(%[ptr_i]) \n\t"
|
||||
"lh %[tmp5], 2(%[ptr_i]) \n\t"
|
||||
"lh %[tmp3], 0(%[ptr_j]) \n\t"
|
||||
"lh %[tmp4], 2(%[ptr_j]) \n\t"
|
||||
"addu %[i], %[i], %[istep] \n\t"
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
"mult %[wr], %[tmp3] \n\t"
|
||||
"madd %[wi], %[tmp4] \n\t"
|
||||
"mult $ac1, %[wr], %[tmp4] \n\t"
|
||||
"msub $ac1, %[wi], %[tmp3] \n\t"
|
||||
"mflo %[tmp1] \n\t"
|
||||
"mflo %[tmp2], $ac1 \n\t"
|
||||
"sll %[tmp6], %[tmp6], 14 \n\t"
|
||||
"sll %[tmp5], %[tmp5], 14 \n\t"
|
||||
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
|
||||
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
|
||||
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
|
||||
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
|
||||
"shra_r.w %[tmp1], %[tmp1], 15 \n\t"
|
||||
"shra_r.w %[tmp6], %[tmp6], 15 \n\t"
|
||||
"shra_r.w %[tmp4], %[tmp4], 15 \n\t"
|
||||
"shra_r.w %[tmp5], %[tmp5], 15 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R2_LE)
|
||||
"mul %[tmp2], %[wr], %[tmp4] \n\t"
|
||||
"mul %[tmp1], %[wr], %[tmp3] \n\t"
|
||||
"mul %[tmp4], %[wi], %[tmp4] \n\t"
|
||||
"mul %[tmp3], %[wi], %[tmp3] \n\t"
|
||||
"sll %[tmp6], %[tmp6], 14 \n\t"
|
||||
"sll %[tmp5], %[tmp5], 14 \n\t"
|
||||
"addiu %[tmp6], %[tmp6], 16384 \n\t"
|
||||
"addiu %[tmp5], %[tmp5], 16384 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[tmp4] \n\t"
|
||||
"subu %[tmp2], %[tmp2], %[tmp3] \n\t"
|
||||
"addiu %[tmp1], %[tmp1], 1 \n\t"
|
||||
"addiu %[tmp2], %[tmp2], 1 \n\t"
|
||||
"sra %[tmp1], %[tmp1], 1 \n\t"
|
||||
"sra %[tmp2], %[tmp2], 1 \n\t"
|
||||
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
|
||||
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
|
||||
"sra %[tmp4], %[tmp4], 15 \n\t"
|
||||
"sra %[tmp1], %[tmp1], 15 \n\t"
|
||||
"sra %[tmp6], %[tmp6], 15 \n\t"
|
||||
"sra %[tmp5], %[tmp5], 15 \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
"sh %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"sh %[tmp6], 2(%[ptr_i]) \n\t"
|
||||
"sh %[tmp4], 0(%[ptr_j]) \n\t"
|
||||
"blt %[i], %[n], 1b \n\t"
|
||||
" sh %[tmp5], 2(%[ptr_j]) \n\t"
|
||||
"blt %[m], %[l], 2b \n\t"
|
||||
" addu %[i], $zero, %[m] \n\t"
|
||||
"move %[l], %[istep] \n\t"
|
||||
"blt %[l], %[n], 3b \n\t"
|
||||
" addiu %[k], %[k], -1 \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
|
||||
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
|
||||
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
|
||||
[m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
|
||||
[ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
|
||||
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
|
||||
: "hi", "lo", "memory"
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
, "$ac1hi", "$ac1lo"
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
|
||||
int i = 0, l = 0, k = 0;
|
||||
int istep = 0, n = 0, m = 0;
|
||||
int scale = 0, shift = 0;
|
||||
int32_t wr = 0, wi = 0;
|
||||
int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0;
|
||||
int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0;
|
||||
int16_t* ptr_j = NULL;
|
||||
int16_t* ptr_i = NULL;
|
||||
|
||||
n = 1 << stages;
|
||||
if (n > 1024) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"addiu %[k], $zero, 10 \n\t"
|
||||
"addiu %[l], $zero, 1 \n\t"
|
||||
"move %[scale], $zero \n\t"
|
||||
"3: \n\t"
|
||||
"addiu %[shift], $zero, 14 \n\t"
|
||||
"addiu %[round2], $zero, 8192 \n\t"
|
||||
"move %[ptr_i], %[frfi] \n\t"
|
||||
"move %[tempMax], $zero \n\t"
|
||||
"addu %[i], %[n], %[n] \n\t"
|
||||
"5: \n\t"
|
||||
"lh %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"lh %[tmp2], 2(%[ptr_i]) \n\t"
|
||||
"lh %[tmp3], 4(%[ptr_i]) \n\t"
|
||||
"lh %[tmp4], 6(%[ptr_i]) \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"absq_s.w %[tmp1], %[tmp1] \n\t"
|
||||
"absq_s.w %[tmp2], %[tmp2] \n\t"
|
||||
"absq_s.w %[tmp3], %[tmp3] \n\t"
|
||||
"absq_s.w %[tmp4], %[tmp4] \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"slt %[tmp5], %[tmp1], $zero \n\t"
|
||||
"subu %[tmp6], $zero, %[tmp1] \n\t"
|
||||
"movn %[tmp1], %[tmp6], %[tmp5] \n\t"
|
||||
"slt %[tmp5], %[tmp2], $zero \n\t"
|
||||
"subu %[tmp6], $zero, %[tmp2] \n\t"
|
||||
"movn %[tmp2], %[tmp6], %[tmp5] \n\t"
|
||||
"slt %[tmp5], %[tmp3], $zero \n\t"
|
||||
"subu %[tmp6], $zero, %[tmp3] \n\t"
|
||||
"movn %[tmp3], %[tmp6], %[tmp5] \n\t"
|
||||
"slt %[tmp5], %[tmp4], $zero \n\t"
|
||||
"subu %[tmp6], $zero, %[tmp4] \n\t"
|
||||
"movn %[tmp4], %[tmp6], %[tmp5] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"slt %[tmp5], %[tempMax], %[tmp1] \n\t"
|
||||
"movn %[tempMax], %[tmp1], %[tmp5] \n\t"
|
||||
"addiu %[i], %[i], -4 \n\t"
|
||||
"slt %[tmp5], %[tempMax], %[tmp2] \n\t"
|
||||
"movn %[tempMax], %[tmp2], %[tmp5] \n\t"
|
||||
"slt %[tmp5], %[tempMax], %[tmp3] \n\t"
|
||||
"movn %[tempMax], %[tmp3], %[tmp5] \n\t"
|
||||
"slt %[tmp5], %[tempMax], %[tmp4] \n\t"
|
||||
"movn %[tempMax], %[tmp4], %[tmp5] \n\t"
|
||||
"bgtz %[i], 5b \n\t"
|
||||
" addiu %[ptr_i], %[ptr_i], 8 \n\t"
|
||||
"addiu %[tmp1], $zero, 13573 \n\t"
|
||||
"addiu %[tmp2], $zero, 27146 \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"sll %[tempMax], %[tempMax], 16 \n\t"
|
||||
"sra %[tempMax], %[tempMax], 16 \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"seh %[tempMax] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"slt %[tmp1], %[tmp1], %[tempMax] \n\t"
|
||||
"slt %[tmp2], %[tmp2], %[tempMax] \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[tmp2] \n\t"
|
||||
"addu %[shift], %[shift], %[tmp1] \n\t"
|
||||
"addu %[scale], %[scale], %[tmp1] \n\t"
|
||||
"sllv %[round2], %[round2], %[tmp1] \n\t"
|
||||
"sll %[istep], %[l], 1 \n\t"
|
||||
"move %[m], $zero \n\t"
|
||||
"sll %[tmp], %[l], 2 \n\t"
|
||||
"2: \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"sllv %[tmp3], %[m], %[k] \n\t"
|
||||
"addiu %[tmp2], %[tmp3], 512 \n\t"
|
||||
"addiu %[m], %[m], 1 \n\t"
|
||||
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
|
||||
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"sllv %[tmp3], %[m], %[k] \n\t"
|
||||
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
|
||||
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
|
||||
"addiu %[m], %[m], 1 \n\t"
|
||||
"lh %[wi], 0(%[ptr_j]) \n\t"
|
||||
"lh %[wr], 0(%[ptr_i]) \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"1: \n\t"
|
||||
"sll %[tmp1], %[i], 2 \n\t"
|
||||
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
|
||||
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
|
||||
"lh %[tmp3], 0(%[ptr_j]) \n\t"
|
||||
"lh %[tmp4], 2(%[ptr_j]) \n\t"
|
||||
"lh %[tmp6], 0(%[ptr_i]) \n\t"
|
||||
"lh %[tmp5], 2(%[ptr_i]) \n\t"
|
||||
"addu %[i], %[i], %[istep] \n\t"
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
"mult %[wr], %[tmp3] \n\t"
|
||||
"msub %[wi], %[tmp4] \n\t"
|
||||
"mult $ac1, %[wr], %[tmp4] \n\t"
|
||||
"madd $ac1, %[wi], %[tmp3] \n\t"
|
||||
"mflo %[tmp1] \n\t"
|
||||
"mflo %[tmp2], $ac1 \n\t"
|
||||
"sll %[tmp6], %[tmp6], 14 \n\t"
|
||||
"sll %[tmp5], %[tmp5], 14 \n\t"
|
||||
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
|
||||
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
|
||||
"addu %[tmp6], %[tmp6], %[round2] \n\t"
|
||||
"addu %[tmp5], %[tmp5], %[round2] \n\t"
|
||||
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
|
||||
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
|
||||
"srav %[tmp4], %[tmp4], %[shift] \n\t"
|
||||
"srav %[tmp1], %[tmp1], %[shift] \n\t"
|
||||
"srav %[tmp6], %[tmp6], %[shift] \n\t"
|
||||
"srav %[tmp5], %[tmp5], %[shift] \n\t"
|
||||
#else // #if defined(MIPS_DSP_R2_LE)
|
||||
"mul %[tmp1], %[wr], %[tmp3] \n\t"
|
||||
"mul %[tmp2], %[wr], %[tmp4] \n\t"
|
||||
"mul %[tmp4], %[wi], %[tmp4] \n\t"
|
||||
"mul %[tmp3], %[wi], %[tmp3] \n\t"
|
||||
"sll %[tmp6], %[tmp6], 14 \n\t"
|
||||
"sll %[tmp5], %[tmp5], 14 \n\t"
|
||||
"sub %[tmp1], %[tmp1], %[tmp4] \n\t"
|
||||
"addu %[tmp2], %[tmp2], %[tmp3] \n\t"
|
||||
"addiu %[tmp1], %[tmp1], 1 \n\t"
|
||||
"addiu %[tmp2], %[tmp2], 1 \n\t"
|
||||
"sra %[tmp2], %[tmp2], 1 \n\t"
|
||||
"sra %[tmp1], %[tmp1], 1 \n\t"
|
||||
"addu %[tmp6], %[tmp6], %[round2] \n\t"
|
||||
"addu %[tmp5], %[tmp5], %[round2] \n\t"
|
||||
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
|
||||
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
|
||||
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
|
||||
"sra %[tmp4], %[tmp4], %[shift] \n\t"
|
||||
"sra %[tmp1], %[tmp1], %[shift] \n\t"
|
||||
"sra %[tmp6], %[tmp6], %[shift] \n\t"
|
||||
"sra %[tmp5], %[tmp5], %[shift] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
"sh %[tmp1], 0(%[ptr_i]) \n\t"
|
||||
"sh %[tmp6], 2(%[ptr_i]) \n\t"
|
||||
"sh %[tmp4], 0(%[ptr_j]) \n\t"
|
||||
"blt %[i], %[n], 1b \n\t"
|
||||
" sh %[tmp5], 2(%[ptr_j]) \n\t"
|
||||
"blt %[m], %[l], 2b \n\t"
|
||||
" addu %[i], $zero, %[m] \n\t"
|
||||
"move %[l], %[istep] \n\t"
|
||||
"blt %[l], %[n], 3b \n\t"
|
||||
" addiu %[k], %[k], -1 \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
|
||||
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
|
||||
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
|
||||
[istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
|
||||
[k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
|
||||
[shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
|
||||
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
|
||||
: "hi", "lo", "memory"
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
, "$ac1hi", "$ac1lo"
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
);
|
||||
|
||||
return scale;
|
||||
|
||||
}
|
148
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
vendored
Normal file
148
third_party/webrtc/src/webrtc/common_audio/signal_processing/complex_fft_tables.h
vendored
Normal file
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
|
||||
#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
static const int16_t kSinTable1024[] = {
|
||||
0, 201, 402, 603, 804, 1005, 1206, 1406,
|
||||
1607, 1808, 2009, 2209, 2410, 2610, 2811, 3011,
|
||||
3211, 3411, 3611, 3811, 4011, 4210, 4409, 4608,
|
||||
4807, 5006, 5205, 5403, 5601, 5799, 5997, 6195,
|
||||
6392, 6589, 6786, 6982, 7179, 7375, 7571, 7766,
|
||||
7961, 8156, 8351, 8545, 8739, 8932, 9126, 9319,
|
||||
9511, 9703, 9895, 10087, 10278, 10469, 10659, 10849,
|
||||
11038, 11227, 11416, 11604, 11792, 11980, 12166, 12353,
|
||||
12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827,
|
||||
14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268,
|
||||
15446, 15623, 15799, 15975, 16150, 16325, 16499, 16672,
|
||||
16845, 17017, 17189, 17360, 17530, 17699, 17868, 18036,
|
||||
18204, 18371, 18537, 18702, 18867, 19031, 19194, 19357,
|
||||
19519, 19680, 19840, 20000, 20159, 20317, 20474, 20631,
|
||||
20787, 20942, 21096, 21249, 21402, 21554, 21705, 21855,
|
||||
22004, 22153, 22301, 22448, 22594, 22739, 22883, 23027,
|
||||
23169, 23311, 23452, 23592, 23731, 23869, 24006, 24143,
|
||||
24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201,
|
||||
25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198,
|
||||
26318, 26437, 26556, 26673, 26789, 26905, 27019, 27132,
|
||||
27244, 27355, 27466, 27575, 27683, 27790, 27896, 28001,
|
||||
28105, 28208, 28309, 28410, 28510, 28608, 28706, 28802,
|
||||
28897, 28992, 29085, 29177, 29268, 29358, 29446, 29534,
|
||||
29621, 29706, 29790, 29873, 29955, 30036, 30116, 30195,
|
||||
30272, 30349, 30424, 30498, 30571, 30643, 30713, 30783,
|
||||
30851, 30918, 30984, 31049, 31113, 31175, 31236, 31297,
|
||||
31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735,
|
||||
31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097,
|
||||
32137, 32176, 32213, 32249, 32284, 32318, 32350, 32382,
|
||||
32412, 32441, 32468, 32495, 32520, 32544, 32567, 32588,
|
||||
32609, 32628, 32646, 32662, 32678, 32692, 32705, 32717,
|
||||
32727, 32736, 32744, 32751, 32757, 32761, 32764, 32766,
|
||||
32767, 32766, 32764, 32761, 32757, 32751, 32744, 32736,
|
||||
32727, 32717, 32705, 32692, 32678, 32662, 32646, 32628,
|
||||
32609, 32588, 32567, 32544, 32520, 32495, 32468, 32441,
|
||||
32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176,
|
||||
32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833,
|
||||
31785, 31735, 31684, 31633, 31580, 31525, 31470, 31413,
|
||||
31356, 31297, 31236, 31175, 31113, 31049, 30984, 30918,
|
||||
30851, 30783, 30713, 30643, 30571, 30498, 30424, 30349,
|
||||
30272, 30195, 30116, 30036, 29955, 29873, 29790, 29706,
|
||||
29621, 29534, 29446, 29358, 29268, 29177, 29085, 28992,
|
||||
28897, 28802, 28706, 28608, 28510, 28410, 28309, 28208,
|
||||
28105, 28001, 27896, 27790, 27683, 27575, 27466, 27355,
|
||||
27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437,
|
||||
26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456,
|
||||
25329, 25201, 25072, 24942, 24811, 24679, 24546, 24413,
|
||||
24278, 24143, 24006, 23869, 23731, 23592, 23452, 23311,
|
||||
23169, 23027, 22883, 22739, 22594, 22448, 22301, 22153,
|
||||
22004, 21855, 21705, 21554, 21402, 21249, 21096, 20942,
|
||||
20787, 20631, 20474, 20317, 20159, 20000, 19840, 19680,
|
||||
19519, 19357, 19194, 19031, 18867, 18702, 18537, 18371,
|
||||
18204, 18036, 17868, 17699, 17530, 17360, 17189, 17017,
|
||||
16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623,
|
||||
15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191,
|
||||
14009, 13827, 13645, 13462, 13278, 13094, 12909, 12724,
|
||||
12539, 12353, 12166, 11980, 11792, 11604, 11416, 11227,
|
||||
11038, 10849, 10659, 10469, 10278, 10087, 9895, 9703,
|
||||
9511, 9319, 9126, 8932, 8739, 8545, 8351, 8156,
|
||||
7961, 7766, 7571, 7375, 7179, 6982, 6786, 6589,
|
||||
6392, 6195, 5997, 5799, 5601, 5403, 5205, 5006,
|
||||
4807, 4608, 4409, 4210, 4011, 3811, 3611, 3411,
|
||||
3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808,
|
||||
1607, 1406, 1206, 1005, 804, 603, 402, 201,
|
||||
0, -201, -402, -603, -804, -1005, -1206, -1406,
|
||||
-1607, -1808, -2009, -2209, -2410, -2610, -2811, -3011,
|
||||
-3211, -3411, -3611, -3811, -4011, -4210, -4409, -4608,
|
||||
-4807, -5006, -5205, -5403, -5601, -5799, -5997, -6195,
|
||||
-6392, -6589, -6786, -6982, -7179, -7375, -7571, -7766,
|
||||
-7961, -8156, -8351, -8545, -8739, -8932, -9126, -9319,
|
||||
-9511, -9703, -9895, -10087, -10278, -10469, -10659, -10849,
|
||||
-11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
|
||||
-12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827,
|
||||
-14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268,
|
||||
-15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672,
|
||||
-16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036,
|
||||
-18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357,
|
||||
-19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631,
|
||||
-20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855,
|
||||
-22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027,
|
||||
-23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
|
||||
-24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201,
|
||||
-25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198,
|
||||
-26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132,
|
||||
-27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001,
|
||||
-28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802,
|
||||
-28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534,
|
||||
-29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195,
|
||||
-30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783,
|
||||
-30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
|
||||
-31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735,
|
||||
-31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097,
|
||||
-32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382,
|
||||
-32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588,
|
||||
-32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717,
|
||||
-32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766,
|
||||
-32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736,
|
||||
-32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628,
|
||||
-32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
|
||||
-32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176,
|
||||
-32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833,
|
||||
-31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413,
|
||||
-31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918,
|
||||
-30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349,
|
||||
-30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706,
|
||||
-29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992,
|
||||
-28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208,
|
||||
-28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
|
||||
-27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437,
|
||||
-26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456,
|
||||
-25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413,
|
||||
-24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311,
|
||||
-23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153,
|
||||
-22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942,
|
||||
-20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680,
|
||||
-19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371,
|
||||
-18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
|
||||
-16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623,
|
||||
-15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191,
|
||||
-14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724,
|
||||
-12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227,
|
||||
-11038, -10849, -10659, -10469, -10278, -10087, -9895, -9703,
|
||||
-9511, -9319, -9126, -8932, -8739, -8545, -8351, -8156,
|
||||
-7961, -7766, -7571, -7375, -7179, -6982, -6786, -6589,
|
||||
-6392, -6195, -5997, -5799, -5601, -5403, -5205, -5006,
|
||||
-4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411,
|
||||
-3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808,
|
||||
-1607, -1406, -1206, -1005, -804, -603, -402, -201
|
||||
};
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
|
82
third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
vendored
Normal file
82
third_party/webrtc/src/webrtc/common_audio/signal_processing/copy_set_operations.c
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the implementation of functions
|
||||
* WebRtcSpl_MemSetW16()
|
||||
* WebRtcSpl_MemSetW32()
|
||||
* WebRtcSpl_MemCpyReversedOrder()
|
||||
* WebRtcSpl_CopyFromEndW16()
|
||||
* WebRtcSpl_ZerosArrayW16()
|
||||
* WebRtcSpl_ZerosArrayW32()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
|
||||
void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
|
||||
{
|
||||
size_t j;
|
||||
int16_t *arrptr = ptr;
|
||||
|
||||
for (j = length; j > 0; j--)
|
||||
{
|
||||
*arrptr++ = set_value;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
|
||||
{
|
||||
size_t j;
|
||||
int32_t *arrptr = ptr;
|
||||
|
||||
for (j = length; j > 0; j--)
|
||||
{
|
||||
*arrptr++ = set_value;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
|
||||
int16_t* source,
|
||||
size_t length)
|
||||
{
|
||||
size_t j;
|
||||
int16_t* destPtr = dest;
|
||||
int16_t* sourcePtr = source;
|
||||
|
||||
for (j = 0; j < length; j++)
|
||||
{
|
||||
*destPtr-- = *sourcePtr++;
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
|
||||
size_t length,
|
||||
size_t samples,
|
||||
int16_t *vector_out)
|
||||
{
|
||||
// Copy the last <samples> of the input vector to vector_out
|
||||
WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
|
||||
}
|
||||
|
||||
void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
|
||||
{
|
||||
WebRtcSpl_MemSetW16(vector, 0, length);
|
||||
}
|
||||
|
||||
void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
|
||||
{
|
||||
WebRtcSpl_MemSetW32(vector, 0, length);
|
||||
}
|
30
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
vendored
Normal file
30
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation.c
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
|
||||
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
const int16_t* seq2,
|
||||
size_t dim_seq,
|
||||
size_t dim_cross_correlation,
|
||||
int right_shifts,
|
||||
int step_seq2) {
|
||||
size_t i = 0, j = 0;
|
||||
|
||||
for (i = 0; i < dim_cross_correlation; i++) {
|
||||
int32_t corr = 0;
|
||||
for (j = 0; j < dim_seq; j++)
|
||||
corr += (seq1[j] * seq2[j]) >> right_shifts;
|
||||
seq2 += step_seq2;
|
||||
*cross_correlation++ = corr;
|
||||
}
|
||||
}
|
104
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
vendored
Normal file
104
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_mips.c
vendored
Normal file
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
const int16_t* seq2,
|
||||
size_t dim_seq,
|
||||
size_t dim_cross_correlation,
|
||||
int right_shifts,
|
||||
int step_seq2) {
|
||||
|
||||
int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
|
||||
int16_t *pseq2 = NULL;
|
||||
int16_t *pseq1 = NULL;
|
||||
int16_t *pseq1_0 = (int16_t*)&seq1[0];
|
||||
int16_t *pseq2_0 = (int16_t*)&seq2[0];
|
||||
int k = 0;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"sll %[step_seq2], %[step_seq2], 1 \n\t"
|
||||
"andi %[t0], %[dim_seq], 1 \n\t"
|
||||
"bgtz %[t0], 3f \n\t"
|
||||
" nop \n\t"
|
||||
"1: \n\t"
|
||||
"move %[pseq1], %[pseq1_0] \n\t"
|
||||
"move %[pseq2], %[pseq2_0] \n\t"
|
||||
"sra %[k], %[dim_seq], 1 \n\t"
|
||||
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
|
||||
"xor %[sum], %[sum], %[sum] \n\t"
|
||||
"2: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"lh %[t2], 2(%[pseq1]) \n\t"
|
||||
"lh %[t3], 2(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"addiu %[k], %[k], -1 \n\t"
|
||||
"mul %[t2], %[t2], %[t3] \n\t"
|
||||
"addiu %[pseq1], %[pseq1], 4 \n\t"
|
||||
"addiu %[pseq2], %[pseq2], 4 \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"srav %[t2], %[t2], %[right_shifts] \n\t"
|
||||
"bgtz %[k], 2b \n\t"
|
||||
" addu %[sum], %[sum], %[t2] \n\t"
|
||||
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
|
||||
"sw %[sum], 0(%[cc]) \n\t"
|
||||
"bgtz %[dim_cc], 1b \n\t"
|
||||
" addiu %[cc], %[cc], 4 \n\t"
|
||||
"b 6f \n\t"
|
||||
" nop \n\t"
|
||||
"3: \n\t"
|
||||
"move %[pseq1], %[pseq1_0] \n\t"
|
||||
"move %[pseq2], %[pseq2_0] \n\t"
|
||||
"sra %[k], %[dim_seq], 1 \n\t"
|
||||
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
|
||||
"beqz %[k], 5f \n\t"
|
||||
" xor %[sum], %[sum], %[sum] \n\t"
|
||||
"4: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"lh %[t2], 2(%[pseq1]) \n\t"
|
||||
"lh %[t3], 2(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"addiu %[k], %[k], -1 \n\t"
|
||||
"mul %[t2], %[t2], %[t3] \n\t"
|
||||
"addiu %[pseq1], %[pseq1], 4 \n\t"
|
||||
"addiu %[pseq2], %[pseq2], 4 \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"srav %[t2], %[t2], %[right_shifts] \n\t"
|
||||
"bgtz %[k], 4b \n\t"
|
||||
" addu %[sum], %[sum], %[t2] \n\t"
|
||||
"5: \n\t"
|
||||
"lh %[t0], 0(%[pseq1]) \n\t"
|
||||
"lh %[t1], 0(%[pseq2]) \n\t"
|
||||
"mul %[t0], %[t0], %[t1] \n\t"
|
||||
"srav %[t0], %[t0], %[right_shifts] \n\t"
|
||||
"addu %[sum], %[sum], %[t0] \n\t"
|
||||
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
|
||||
"sw %[sum], 0(%[cc]) \n\t"
|
||||
"bgtz %[dim_cc], 3b \n\t"
|
||||
" addiu %[cc], %[cc], 4 \n\t"
|
||||
"6: \n\t"
|
||||
".set pop \n\t"
|
||||
: [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
|
||||
[t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
|
||||
[pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
|
||||
[k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
|
||||
[cc] "+r" (cross_correlation)
|
||||
: [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
|
||||
: "hi", "lo", "memory"
|
||||
);
|
||||
}
|
87
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
vendored
Normal file
87
third_party/webrtc/src/webrtc/common_audio/signal_processing/cross_correlation_neon.c
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
|
||||
const int16_t* vector1,
|
||||
const int16_t* vector2,
|
||||
size_t length,
|
||||
int scaling) {
|
||||
size_t i = 0;
|
||||
size_t len1 = length >> 3;
|
||||
size_t len2 = length & 7;
|
||||
int64x2_t sum0 = vdupq_n_s64(0);
|
||||
int64x2_t sum1 = vdupq_n_s64(0);
|
||||
|
||||
for (i = len1; i > 0; i -= 1) {
|
||||
int16x8_t seq1_16x8 = vld1q_s16(vector1);
|
||||
int16x8_t seq2_16x8 = vld1q_s16(vector2);
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
|
||||
vget_low_s16(seq2_16x8));
|
||||
int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
|
||||
#else
|
||||
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
|
||||
vget_low_s16(seq2_16x8));
|
||||
int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
|
||||
vget_high_s16(seq2_16x8));
|
||||
#endif
|
||||
sum0 = vpadalq_s32(sum0, tmp0);
|
||||
sum1 = vpadalq_s32(sum1, tmp1);
|
||||
vector1 += 8;
|
||||
vector2 += 8;
|
||||
}
|
||||
|
||||
// Calculate the rest of the samples.
|
||||
int64_t sum_res = 0;
|
||||
for (i = len2; i > 0; i -= 1) {
|
||||
sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2);
|
||||
vector1++;
|
||||
vector2++;
|
||||
}
|
||||
|
||||
sum0 = vaddq_s64(sum0, sum1);
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
int64_t sum2 = vaddvq_s64(sum0);
|
||||
*cross_correlation = (int32_t)((sum2 + sum_res) >> scaling);
|
||||
#else
|
||||
int64x1_t shift = vdup_n_s64(-scaling);
|
||||
int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0));
|
||||
sum2 = vadd_s64(sum2, vdup_n_s64(sum_res));
|
||||
sum2 = vshl_s64(sum2, shift);
|
||||
vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */
|
||||
void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
const int16_t* seq2,
|
||||
size_t dim_seq,
|
||||
size_t dim_cross_correlation,
|
||||
int right_shifts,
|
||||
int step_seq2) {
|
||||
size_t i = 0;
|
||||
|
||||
for (i = 0; i < dim_cross_correlation; i++) {
|
||||
const int16_t* seq1_ptr = seq1;
|
||||
const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
|
||||
|
||||
DotProductWithScaleNeon(cross_correlation,
|
||||
seq1_ptr,
|
||||
seq2_ptr,
|
||||
dim_seq,
|
||||
right_shifts);
|
||||
cross_correlation++;
|
||||
}
|
||||
}
|
138
third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
vendored
Normal file
138
third_party/webrtc/src/webrtc/common_audio/signal_processing/division_operations.c
vendored
Normal file
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the divisions
|
||||
* WebRtcSpl_DivU32U16()
|
||||
* WebRtcSpl_DivW32W16()
|
||||
* WebRtcSpl_DivW32W16ResW16()
|
||||
* WebRtcSpl_DivResultInQ31()
|
||||
* WebRtcSpl_DivW32HiLow()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
|
||||
{
|
||||
// Guard against division with 0
|
||||
if (den != 0)
|
||||
{
|
||||
return (uint32_t)(num / den);
|
||||
} else
|
||||
{
|
||||
return (uint32_t)0xFFFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
|
||||
{
|
||||
// Guard against division with 0
|
||||
if (den != 0)
|
||||
{
|
||||
return (int32_t)(num / den);
|
||||
} else
|
||||
{
|
||||
return (int32_t)0x7FFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
|
||||
{
|
||||
// Guard against division with 0
|
||||
if (den != 0)
|
||||
{
|
||||
return (int16_t)(num / den);
|
||||
} else
|
||||
{
|
||||
return (int16_t)0x7FFF;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
|
||||
{
|
||||
int32_t L_num = num;
|
||||
int32_t L_den = den;
|
||||
int32_t div = 0;
|
||||
int k = 31;
|
||||
int change_sign = 0;
|
||||
|
||||
if (num == 0)
|
||||
return 0;
|
||||
|
||||
if (num < 0)
|
||||
{
|
||||
change_sign++;
|
||||
L_num = -num;
|
||||
}
|
||||
if (den < 0)
|
||||
{
|
||||
change_sign++;
|
||||
L_den = -den;
|
||||
}
|
||||
while (k--)
|
||||
{
|
||||
div <<= 1;
|
||||
L_num <<= 1;
|
||||
if (L_num >= L_den)
|
||||
{
|
||||
L_num -= L_den;
|
||||
div++;
|
||||
}
|
||||
}
|
||||
if (change_sign == 1)
|
||||
{
|
||||
div = -div;
|
||||
}
|
||||
return div;
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
|
||||
{
|
||||
int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
|
||||
int32_t tmpW32;
|
||||
|
||||
approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
|
||||
// result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
|
||||
|
||||
// tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
|
||||
tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
|
||||
// tmpW32 = den * approx
|
||||
|
||||
tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
|
||||
|
||||
// Store tmpW32 in hi and low format
|
||||
tmp_hi = (int16_t)(tmpW32 >> 16);
|
||||
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
|
||||
|
||||
// tmpW32 = 1/den in Q29
|
||||
tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
|
||||
|
||||
// 1/den in hi and low format
|
||||
tmp_hi = (int16_t)(tmpW32 >> 16);
|
||||
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
|
||||
|
||||
// Store num in hi and low format
|
||||
num_hi = (int16_t)(num >> 16);
|
||||
num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
|
||||
|
||||
// num * (1/den) by 32 bit multiplication (result in Q28)
|
||||
|
||||
tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
|
||||
(num_low * tmp_hi >> 15);
|
||||
|
||||
// Put result in Q31 (convert from Q28)
|
||||
tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
|
||||
|
||||
return tmpW32;
|
||||
}
|
32
third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
vendored
Normal file
32
third_party/webrtc/src/webrtc/common_audio/signal_processing/dot_product_with_scale.c
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
|
||||
const int16_t* vector2,
|
||||
size_t length,
|
||||
int scaling) {
|
||||
int32_t sum = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Unroll the loop to improve performance. */
|
||||
for (i = 0; i + 3 < length; i += 4) {
|
||||
sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
|
||||
sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
|
||||
sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
|
||||
sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
|
||||
}
|
||||
for (; i < length; i++) {
|
||||
sum += (vector1[i] * vector2[i]) >> scaling;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
48
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
vendored
Normal file
48
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast.c
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// TODO(Bjornv): Change the function parameter order to WebRTC code style.
|
||||
// C version of WebRtcSpl_DownsampleFast() for generic platforms.
|
||||
int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
|
||||
size_t data_in_length,
|
||||
int16_t* data_out,
|
||||
size_t data_out_length,
|
||||
const int16_t* __restrict coefficients,
|
||||
size_t coefficients_length,
|
||||
int factor,
|
||||
size_t delay) {
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
int32_t out_s32 = 0;
|
||||
size_t endpos = delay + factor * (data_out_length - 1) + 1;
|
||||
|
||||
// Return error if any of the running conditions doesn't meet.
|
||||
if (data_out_length == 0 || coefficients_length == 0
|
||||
|| data_in_length < endpos) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = delay; i < endpos; i += factor) {
|
||||
out_s32 = 2048; // Round value, 0.5 in Q12.
|
||||
|
||||
for (j = 0; j < coefficients_length; j++) {
|
||||
out_s32 += coefficients[j] * data_in[i - j]; // Q12.
|
||||
}
|
||||
|
||||
out_s32 >>= 12; // Q0.
|
||||
|
||||
// Saturate and store the output.
|
||||
*data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
169
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
vendored
Normal file
169
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_mips.c
vendored
Normal file
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
|
||||
int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
|
||||
size_t data_in_length,
|
||||
int16_t* data_out,
|
||||
size_t data_out_length,
|
||||
const int16_t* __restrict coefficients,
|
||||
size_t coefficients_length,
|
||||
int factor,
|
||||
size_t delay) {
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
int32_t out_s32 = 0;
|
||||
size_t endpos = delay + factor * (data_out_length - 1) + 1;
|
||||
|
||||
int32_t tmp1, tmp2, tmp3, tmp4, factor_2;
|
||||
int16_t* p_coefficients;
|
||||
int16_t* p_data_in;
|
||||
int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
|
||||
int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
int32_t max_16 = 0x7FFF;
|
||||
int32_t min_16 = 0xFFFF8000;
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
// Return error if any of the running conditions doesn't meet.
|
||||
if (data_out_length == 0 || coefficients_length == 0
|
||||
|| data_in_length < endpos) {
|
||||
return -1;
|
||||
}
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"subu %[i], %[endpos], %[delay] \n\t"
|
||||
"sll %[factor_2], %[factor], 1 \n\t"
|
||||
"1: \n\t"
|
||||
"move %[p_data_in], %[p_data_in_0] \n\t"
|
||||
"mult $zero, $zero \n\t"
|
||||
"move %[p_coefs], %[p_coefs_0] \n\t"
|
||||
"sra %[j], %[coef_length], 2 \n\t"
|
||||
"beq %[j], $zero, 3f \n\t"
|
||||
" andi %[k], %[coef_length], 3 \n\t"
|
||||
"2: \n\t"
|
||||
"lwl %[tmp1], 1(%[p_data_in]) \n\t"
|
||||
"lwl %[tmp2], 3(%[p_coefs]) \n\t"
|
||||
"lwl %[tmp3], -3(%[p_data_in]) \n\t"
|
||||
"lwl %[tmp4], 7(%[p_coefs]) \n\t"
|
||||
"lwr %[tmp1], -2(%[p_data_in]) \n\t"
|
||||
"lwr %[tmp2], 0(%[p_coefs]) \n\t"
|
||||
"lwr %[tmp3], -6(%[p_data_in]) \n\t"
|
||||
"lwr %[tmp4], 4(%[p_coefs]) \n\t"
|
||||
"packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t"
|
||||
"packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t"
|
||||
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
|
||||
"dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t"
|
||||
"addiu %[j], %[j], -1 \n\t"
|
||||
"addiu %[p_data_in], %[p_data_in], -8 \n\t"
|
||||
"bgtz %[j], 2b \n\t"
|
||||
" addiu %[p_coefs], %[p_coefs], 8 \n\t"
|
||||
"3: \n\t"
|
||||
"beq %[k], $zero, 5f \n\t"
|
||||
" nop \n\t"
|
||||
"4: \n\t"
|
||||
"lhu %[tmp1], 0(%[p_data_in]) \n\t"
|
||||
"lhu %[tmp2], 0(%[p_coefs]) \n\t"
|
||||
"addiu %[p_data_in], %[p_data_in], -2 \n\t"
|
||||
"addiu %[k], %[k], -1 \n\t"
|
||||
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
|
||||
"bgtz %[k], 4b \n\t"
|
||||
" addiu %[p_coefs], %[p_coefs], 2 \n\t"
|
||||
"5: \n\t"
|
||||
"extr_r.w %[out_s32], $ac0, 12 \n\t"
|
||||
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
|
||||
"subu %[i], %[i], %[factor] \n\t"
|
||||
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
|
||||
"sra %[out_s32], %[out_s32], 16 \n\t"
|
||||
"sh %[out_s32], 0(%[data_out]) \n\t"
|
||||
"bgtz %[i], 1b \n\t"
|
||||
" addiu %[data_out], %[data_out], 2 \n\t"
|
||||
".set pop \n\t"
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
|
||||
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
|
||||
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
|
||||
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
|
||||
[i] "=&r" (i), [k] "=&r" (k)
|
||||
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
|
||||
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
|
||||
[delay] "r" (delay), [factor] "r" (factor)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
#else // #if defined(MIPS_DSP_R2_LE)
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"sll %[factor_2], %[factor], 1 \n\t"
|
||||
"subu %[i], %[endpos], %[delay] \n\t"
|
||||
"1: \n\t"
|
||||
"move %[p_data_in], %[p_data_in_0] \n\t"
|
||||
"addiu %[out_s32], $zero, 2048 \n\t"
|
||||
"move %[p_coefs], %[p_coefs_0] \n\t"
|
||||
"sra %[j], %[coef_length], 1 \n\t"
|
||||
"beq %[j], $zero, 3f \n\t"
|
||||
" andi %[k], %[coef_length], 1 \n\t"
|
||||
"2: \n\t"
|
||||
"lh %[tmp1], 0(%[p_data_in]) \n\t"
|
||||
"lh %[tmp2], 0(%[p_coefs]) \n\t"
|
||||
"lh %[tmp3], -2(%[p_data_in]) \n\t"
|
||||
"lh %[tmp4], 2(%[p_coefs]) \n\t"
|
||||
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
|
||||
"addiu %[p_coefs], %[p_coefs], 4 \n\t"
|
||||
"mul %[tmp3], %[tmp3], %[tmp4] \n\t"
|
||||
"addiu %[j], %[j], -1 \n\t"
|
||||
"addiu %[p_data_in], %[p_data_in], -4 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[tmp3] \n\t"
|
||||
"bgtz %[j], 2b \n\t"
|
||||
" addu %[out_s32], %[out_s32], %[tmp1] \n\t"
|
||||
"3: \n\t"
|
||||
"beq %[k], $zero, 4f \n\t"
|
||||
" nop \n\t"
|
||||
"lh %[tmp1], 0(%[p_data_in]) \n\t"
|
||||
"lh %[tmp2], 0(%[p_coefs]) \n\t"
|
||||
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
|
||||
"addu %[out_s32], %[out_s32], %[tmp1] \n\t"
|
||||
"4: \n\t"
|
||||
"sra %[out_s32], %[out_s32], 12 \n\t"
|
||||
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
|
||||
"sra %[out_s32], %[out_s32], 16 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"slt %[tmp1], %[max_16], %[out_s32] \n\t"
|
||||
"movn %[out_s32], %[max_16], %[tmp1] \n\t"
|
||||
"slt %[tmp1], %[out_s32], %[min_16] \n\t"
|
||||
"movn %[out_s32], %[min_16], %[tmp1] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"subu %[i], %[i], %[factor] \n\t"
|
||||
"sh %[out_s32], 0(%[data_out]) \n\t"
|
||||
"bgtz %[i], 1b \n\t"
|
||||
" addiu %[data_out], %[data_out], 2 \n\t"
|
||||
".set pop \n\t"
|
||||
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
|
||||
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
|
||||
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
|
||||
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
|
||||
[i] "=&r" (i)
|
||||
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
|
||||
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
[max_16] "r" (max_16), [min_16] "r" (min_16),
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
[delay] "r" (delay), [factor] "r" (factor)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
return 0;
|
||||
}
|
217
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
vendored
Normal file
217
third_party/webrtc/src/webrtc/common_audio/signal_processing/downsample_fast_neon.c
vendored
Normal file
|
@ -0,0 +1,217 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
// NEON intrinsics version of WebRtcSpl_DownsampleFast()
|
||||
// for ARM 32-bit/64-bit platforms.
|
||||
int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
|
||||
size_t data_in_length,
|
||||
int16_t* data_out,
|
||||
size_t data_out_length,
|
||||
const int16_t* __restrict coefficients,
|
||||
size_t coefficients_length,
|
||||
int factor,
|
||||
size_t delay) {
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
int32_t out_s32 = 0;
|
||||
size_t endpos = delay + factor * (data_out_length - 1) + 1;
|
||||
size_t res = data_out_length & 0x7;
|
||||
size_t endpos1 = endpos - factor * res;
|
||||
|
||||
// Return error if any of the running conditions doesn't meet.
|
||||
if (data_out_length == 0 || coefficients_length == 0
|
||||
|| data_in_length < endpos) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// First part, unroll the loop 8 times, with 3 subcases
|
||||
// (factor == 2, 4, others).
|
||||
switch (factor) {
|
||||
case 2: {
|
||||
for (i = delay; i < endpos1; i += 16) {
|
||||
// Round value, 0.5 in Q12.
|
||||
int32x4_t out32x4_0 = vdupq_n_s32(2048);
|
||||
int32x4_t out32x4_1 = vdupq_n_s32(2048);
|
||||
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
// Unroll the loop 2 times.
|
||||
for (j = 0; j < coefficients_length - 1; j += 2) {
|
||||
int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
|
||||
int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
|
||||
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
|
||||
|
||||
// Mul and accumulate low 64-bit data.
|
||||
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
|
||||
int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0);
|
||||
|
||||
// Mul and accumulate high 64-bit data.
|
||||
// TODO: vget_high_s16 need extra cost on ARM64. This could be
|
||||
// replaced by vmlal_high_lane_s16. But for the interface of
|
||||
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
|
||||
// This issue need to be tracked in the future.
|
||||
int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]);
|
||||
int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
|
||||
}
|
||||
|
||||
for (; j < coefficients_length; j++) {
|
||||
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
|
||||
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
|
||||
|
||||
// Mul and accumulate low 64-bit data.
|
||||
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
|
||||
|
||||
// Mul and accumulate high 64-bit data.
|
||||
// TODO: vget_high_s16 need extra cost on ARM64. This could be
|
||||
// replaced by vmlal_high_lane_s16. But for the interface of
|
||||
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
|
||||
// This issue need to be tracked in the future.
|
||||
int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
|
||||
}
|
||||
#else
|
||||
// On ARMv7, the loop unrolling 2 times results in performance
|
||||
// regression.
|
||||
for (j = 0; j < coefficients_length; j++) {
|
||||
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
|
||||
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
|
||||
|
||||
// Mul and accumulate.
|
||||
int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]);
|
||||
int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Saturate and store the output.
|
||||
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
|
||||
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
|
||||
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
|
||||
data_out += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
for (i = delay; i < endpos1; i += 32) {
|
||||
// Round value, 0.5 in Q12.
|
||||
int32x4_t out32x4_0 = vdupq_n_s32(2048);
|
||||
int32x4_t out32x4_1 = vdupq_n_s32(2048);
|
||||
|
||||
// Unroll the loop 4 times.
|
||||
for (j = 0; j < coefficients_length - 3; j += 4) {
|
||||
int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
|
||||
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
|
||||
|
||||
// Mul and accumulate low 64-bit data.
|
||||
int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
|
||||
int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]);
|
||||
int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]);
|
||||
int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0);
|
||||
|
||||
// Mul and accumulate high 64-bit data.
|
||||
// TODO: vget_high_s16 need extra cost on ARM64. This could be
|
||||
// replaced by vmlal_high_lane_s16. But for the interface of
|
||||
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
|
||||
// This issue need to be tracked in the future.
|
||||
int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
|
||||
int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]);
|
||||
int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]);
|
||||
int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
|
||||
}
|
||||
|
||||
for (; j < coefficients_length; j++) {
|
||||
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
|
||||
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
|
||||
|
||||
// Mul and accumulate low 64-bit data.
|
||||
int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]);
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
|
||||
|
||||
// Mul and accumulate high 64-bit data.
|
||||
// TODO: vget_high_s16 need extra cost on ARM64. This could be
|
||||
// replaced by vmlal_high_lane_s16. But for the interface of
|
||||
// vmlal_high_lane_s16, there is a bug in gcc 4.9.
|
||||
// This issue need to be tracked in the future.
|
||||
int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
|
||||
}
|
||||
|
||||
// Saturate and store the output.
|
||||
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
|
||||
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
|
||||
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
|
||||
data_out += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
for (i = delay; i < endpos1; i += factor * 8) {
|
||||
// Round value, 0.5 in Q12.
|
||||
int32x4_t out32x4_0 = vdupq_n_s32(2048);
|
||||
int32x4_t out32x4_1 = vdupq_n_s32(2048);
|
||||
|
||||
for (j = 0; j < coefficients_length; j++) {
|
||||
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
|
||||
int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
|
||||
in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
|
||||
in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2);
|
||||
in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3);
|
||||
int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]);
|
||||
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1);
|
||||
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2);
|
||||
in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3);
|
||||
|
||||
// Mul and accumulate.
|
||||
out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0);
|
||||
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0);
|
||||
}
|
||||
|
||||
// Saturate and store the output.
|
||||
int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12);
|
||||
int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12);
|
||||
vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1));
|
||||
data_out += 8;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Second part, do the rest iterations (if any).
|
||||
for (; i < endpos; i += factor) {
|
||||
out_s32 = 2048; // Round value, 0.5 in Q12.
|
||||
|
||||
for (j = 0; j < coefficients_length; j++) {
|
||||
out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
|
||||
}
|
||||
|
||||
// Saturate and store the output.
|
||||
out_s32 >>= 12;
|
||||
*data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_Energy().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
int32_t WebRtcSpl_Energy(int16_t* vector,
|
||||
size_t vector_length,
|
||||
int* scale_factor)
|
||||
{
|
||||
int32_t en = 0;
|
||||
size_t i;
|
||||
int scaling =
|
||||
WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
|
||||
size_t looptimes = vector_length;
|
||||
int16_t *vectorptr = vector;
|
||||
|
||||
for (i = 0; i < looptimes; i++)
|
||||
{
|
||||
en += (*vectorptr * *vectorptr) >> scaling;
|
||||
vectorptr++;
|
||||
}
|
||||
*scale_factor = scaling;
|
||||
|
||||
return en;
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_FilterAR().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
size_t WebRtcSpl_FilterAR(const int16_t* a,
|
||||
size_t a_length,
|
||||
const int16_t* x,
|
||||
size_t x_length,
|
||||
int16_t* state,
|
||||
size_t state_length,
|
||||
int16_t* state_low,
|
||||
size_t state_low_length,
|
||||
int16_t* filtered,
|
||||
int16_t* filtered_low,
|
||||
size_t filtered_low_length)
|
||||
{
|
||||
int32_t o;
|
||||
int32_t oLOW;
|
||||
size_t i, j, stop;
|
||||
const int16_t* x_ptr = &x[0];
|
||||
int16_t* filteredFINAL_ptr = filtered;
|
||||
int16_t* filteredFINAL_LOW_ptr = filtered_low;
|
||||
|
||||
for (i = 0; i < x_length; i++)
|
||||
{
|
||||
// Calculate filtered[i] and filtered_low[i]
|
||||
const int16_t* a_ptr = &a[1];
|
||||
int16_t* filtered_ptr = &filtered[i - 1];
|
||||
int16_t* filtered_low_ptr = &filtered_low[i - 1];
|
||||
int16_t* state_ptr = &state[state_length - 1];
|
||||
int16_t* state_low_ptr = &state_low[state_length - 1];
|
||||
|
||||
o = (int32_t)(*x_ptr++) << 12;
|
||||
oLOW = (int32_t)0;
|
||||
|
||||
stop = (i < a_length) ? i + 1 : a_length;
|
||||
for (j = 1; j < stop; j++)
|
||||
{
|
||||
o -= *a_ptr * *filtered_ptr--;
|
||||
oLOW -= *a_ptr++ * *filtered_low_ptr--;
|
||||
}
|
||||
for (j = i + 1; j < a_length; j++)
|
||||
{
|
||||
o -= *a_ptr * *state_ptr--;
|
||||
oLOW -= *a_ptr++ * *state_low_ptr--;
|
||||
}
|
||||
|
||||
o += (oLOW >> 12);
|
||||
*filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
|
||||
*filteredFINAL_LOW_ptr++ = (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++)
|
||||
<< 12));
|
||||
}
|
||||
|
||||
// Save the filter state
|
||||
if (x_length >= state_length)
|
||||
{
|
||||
WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
|
||||
WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
|
||||
} else
|
||||
{
|
||||
for (i = 0; i < state_length - x_length; i++)
|
||||
{
|
||||
state[i] = state[i + x_length];
|
||||
state_low[i] = state_low[i + x_length];
|
||||
}
|
||||
for (i = 0; i < x_length; i++)
|
||||
{
|
||||
state[state_length - x_length + i] = filtered[i];
|
||||
state[state_length - x_length + i] = filtered_low[i];
|
||||
}
|
||||
}
|
||||
|
||||
return x_length;
|
||||
}
|
42
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
vendored
Normal file
42
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12.c
vendored
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// TODO(bjornv): Change the return type to report errors.
|
||||
|
||||
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
|
||||
int16_t* data_out,
|
||||
const int16_t* __restrict coefficients,
|
||||
size_t coefficients_length,
|
||||
size_t data_length) {
|
||||
size_t i = 0;
|
||||
size_t j = 0;
|
||||
|
||||
assert(data_length > 0);
|
||||
assert(coefficients_length > 1);
|
||||
|
||||
for (i = 0; i < data_length; i++) {
|
||||
int32_t output = 0;
|
||||
int32_t sum = 0;
|
||||
|
||||
for (j = coefficients_length - 1; j > 0; j--) {
|
||||
sum += coefficients[j] * data_out[i - j];
|
||||
}
|
||||
|
||||
output = coefficients[0] * data_in[i];
|
||||
output -= sum;
|
||||
|
||||
// Saturate and store the output.
|
||||
output = WEBRTC_SPL_SAT(134215679, output, -134217728);
|
||||
data_out[i] = (int16_t)((output + 2048) >> 12);
|
||||
}
|
||||
}
|
218
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
vendored
Normal file
218
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S
vendored
Normal file
|
@ -0,0 +1,218 @@
|
|||
@
|
||||
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
@
|
||||
@ Use of this source code is governed by a BSD-style license
|
||||
@ that can be found in the LICENSE file in the root of the source
|
||||
@ tree. An additional intellectual property rights grant can be found
|
||||
@ in the file PATENTS. All contributing project authors may
|
||||
@ be found in the AUTHORS file in the root of the source tree.
|
||||
@
|
||||
|
||||
@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
|
||||
@ ARMv7 platform. The description header can be found in
|
||||
@ signal_processing_library.h
|
||||
@
|
||||
@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
|
||||
@ the reference C code at end of this file.
|
||||
|
||||
@ Assumptions:
|
||||
@ (1) data_length > 0
|
||||
@ (2) coefficients_length > 1
|
||||
|
||||
@ Register usage:
|
||||
@
|
||||
@ r0: &data_in[i]
|
||||
@ r1: &data_out[i], for result ouput
|
||||
@ r2: &coefficients[0]
|
||||
@ r3: coefficients_length
|
||||
@ r4: Iteration counter for the outer loop.
|
||||
@ r5: data_out[j] as multiplication inputs
|
||||
@ r6: Calculated value for output data_out[]; interation counter for inner loop
|
||||
@ r7: Partial sum of a filtering multiplication results
|
||||
@ r8: Partial sum of a filtering multiplication results
|
||||
@ r9: &data_out[], for filtering input; data_in[i]
|
||||
@ r10: coefficients[j]
|
||||
@ r11: Scratch
|
||||
@ r12: &coefficients[j]
|
||||
|
||||
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
||||
|
||||
GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
|
||||
push {r4-r11}
|
||||
|
||||
ldrsh r12, [sp, #32] @ data_length
|
||||
subs r4, r12, #1
|
||||
beq ODD_LENGTH @ jump if data_length == 1
|
||||
|
||||
LOOP_LENGTH:
|
||||
add r12, r2, r3, lsl #1
|
||||
sub r12, #4 @ &coefficients[coefficients_length - 2]
|
||||
sub r9, r1, r3, lsl #1
|
||||
add r9, #2 @ &data_out[i - coefficients_length + 1]
|
||||
ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}]
|
||||
|
||||
mov r7, #0 @ sum1
|
||||
mov r8, #0 @ sum2
|
||||
subs r6, r3, #3 @ Iteration counter for inner loop.
|
||||
beq ODD_A_LENGTH @ branch if coefficients_length == 3
|
||||
blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2
|
||||
|
||||
LOOP_A_LENGTH:
|
||||
ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
|
||||
subs r6, #2
|
||||
smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1];
|
||||
smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
|
||||
smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1];
|
||||
ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3]
|
||||
smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2];
|
||||
bgt LOOP_A_LENGTH
|
||||
blt POST_LOOP_A_LENGTH
|
||||
|
||||
ODD_A_LENGTH:
|
||||
ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2]
|
||||
sub r12, #2 @ &coefficients[0]
|
||||
smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2];
|
||||
smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1];
|
||||
ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i]
|
||||
|
||||
POST_LOOP_A_LENGTH:
|
||||
ldr r10, [r12] @ coefficients[0], coefficients[1]
|
||||
smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
|
||||
|
||||
ldr r9, [r0], #4 @ data_in[i], data_in[i + 1]
|
||||
smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
|
||||
sub r6, r7 @ output1 -= sum1;
|
||||
|
||||
sbfx r11, r6, #12, #16
|
||||
ssat r7, #16, r6, asr #12
|
||||
cmp r7, r11
|
||||
addeq r6, r6, #2048
|
||||
ssat r6, #16, r6, asr #12
|
||||
strh r6, [r1], #2 @ Store data_out[i]
|
||||
|
||||
smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i];
|
||||
smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1];
|
||||
sub r6, r8 @ output1 -= sum1;
|
||||
|
||||
sbfx r11, r6, #12, #16
|
||||
ssat r7, #16, r6, asr #12
|
||||
cmp r7, r11
|
||||
addeq r6, r6, #2048
|
||||
ssat r6, #16, r6, asr #12
|
||||
strh r6, [r1], #2 @ Store data_out[i + 1]
|
||||
|
||||
subs r4, #2
|
||||
bgt LOOP_LENGTH
|
||||
blt END @ For even data_length, it's done. Jump to END.
|
||||
|
||||
@ Process i = data_length -1, for the case of an odd length.
|
||||
ODD_LENGTH:
|
||||
add r12, r2, r3, lsl #1
|
||||
sub r12, #4 @ &coefficients[coefficients_length - 2]
|
||||
sub r9, r1, r3, lsl #1
|
||||
add r9, #2 @ &data_out[i - coefficients_length + 1]
|
||||
mov r7, #0 @ sum1
|
||||
mov r8, #0 @ sum1
|
||||
subs r6, r3, #2 @ inner loop counter
|
||||
beq EVEN_A_LENGTH @ branch if coefficients_length == 2
|
||||
|
||||
LOOP2_A_LENGTH:
|
||||
ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
|
||||
ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1]
|
||||
subs r6, #2
|
||||
smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
|
||||
smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1];
|
||||
bgt LOOP2_A_LENGTH
|
||||
addlt r12, #2
|
||||
blt POST_LOOP2_A_LENGTH
|
||||
|
||||
EVEN_A_LENGTH:
|
||||
ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1]
|
||||
ldrsh r5, [r9] @ data_out[i - 1]
|
||||
smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
|
||||
|
||||
POST_LOOP2_A_LENGTH:
|
||||
ldrsh r10, [r12] @ Filter coefficients coefficients[0]
|
||||
ldrsh r9, [r0] @ data_in[i]
|
||||
smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
|
||||
sub r6, r7 @ output1 -= sum1;
|
||||
sub r6, r8 @ output1 -= sum1;
|
||||
sbfx r8, r6, #12, #16
|
||||
ssat r7, #16, r6, asr #12
|
||||
cmp r7, r8
|
||||
addeq r6, r6, #2048
|
||||
ssat r6, #16, r6, asr #12
|
||||
strh r6, [r1] @ Store the data_out[i]
|
||||
|
||||
END:
|
||||
pop {r4-r11}
|
||||
bx lr
|
||||
|
||||
@Reference C code:
|
||||
@
|
||||
@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
|
||||
@ int16_t* data_out,
|
||||
@ int16_t* __restrict coefficients,
|
||||
@ size_t coefficients_length,
|
||||
@ size_t data_length) {
|
||||
@ size_t i = 0;
|
||||
@ size_t j = 0;
|
||||
@
|
||||
@ assert(data_length > 0);
|
||||
@ assert(coefficients_length > 1);
|
||||
@
|
||||
@ for (i = 0; i < data_length - 1; i += 2) {
|
||||
@ int32_t output1 = 0;
|
||||
@ int32_t sum1 = 0;
|
||||
@ int32_t output2 = 0;
|
||||
@ int32_t sum2 = 0;
|
||||
@
|
||||
@ for (j = coefficients_length - 1; j > 2; j -= 2) {
|
||||
@ sum1 += coefficients[j] * data_out[i - j];
|
||||
@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
|
||||
@ sum2 += coefficients[j] * data_out[i - j + 1];
|
||||
@ sum2 += coefficients[j - 1] * data_out[i - j + 2];
|
||||
@ }
|
||||
@
|
||||
@ if (j == 2) {
|
||||
@ sum1 += coefficients[2] * data_out[i - 2];
|
||||
@ sum2 += coefficients[2] * data_out[i - 1];
|
||||
@ }
|
||||
@
|
||||
@ sum1 += coefficients[1] * data_out[i - 1];
|
||||
@ output1 = coefficients[0] * data_in[i];
|
||||
@ output1 -= sum1;
|
||||
@ // Saturate and store the output.
|
||||
@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
|
||||
@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
|
||||
@
|
||||
@ sum2 += coefficients[1] * data_out[i];
|
||||
@ output2 = coefficients[0] * data_in[i + 1];
|
||||
@ output2 -= sum2;
|
||||
@ // Saturate and store the output.
|
||||
@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
|
||||
@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
|
||||
@ }
|
||||
@
|
||||
@ if (i == data_length - 1) {
|
||||
@ int32_t output1 = 0;
|
||||
@ int32_t sum1 = 0;
|
||||
@
|
||||
@ for (j = coefficients_length - 1; j > 1; j -= 2) {
|
||||
@ sum1 += coefficients[j] * data_out[i - j];
|
||||
@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
|
||||
@ }
|
||||
@
|
||||
@ if (j == 1) {
|
||||
@ sum1 += coefficients[1] * data_out[i - 1];
|
||||
@ }
|
||||
@
|
||||
@ output1 = coefficients[0] * data_in[i];
|
||||
@ output1 -= sum1;
|
||||
@ // Saturate and store the output.
|
||||
@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
|
||||
@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
|
||||
@ }
|
||||
@}
|
140
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
vendored
Normal file
140
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
|
||||
int16_t* data_out,
|
||||
const int16_t* __restrict coefficients,
|
||||
size_t coefficients_length,
|
||||
size_t data_length) {
|
||||
int r0, r1, r2, r3;
|
||||
int coef0, offset;
|
||||
int i, j, k;
|
||||
int coefptr, outptr, tmpout, inptr;
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
int max16 = 0x7FFF;
|
||||
int min16 = 0xFFFF8000;
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
assert(data_length > 0);
|
||||
assert(coefficients_length > 1);
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[i], %[data_length], 0 \n\t"
|
||||
"lh %[coef0], 0(%[coefficients]) \n\t"
|
||||
"addiu %[j], %[coefficients_length], -1 \n\t"
|
||||
"andi %[k], %[j], 1 \n\t"
|
||||
"sll %[offset], %[j], 1 \n\t"
|
||||
"subu %[outptr], %[data_out], %[offset] \n\t"
|
||||
"addiu %[inptr], %[data_in], 0 \n\t"
|
||||
"bgtz %[k], 3f \n\t"
|
||||
" addu %[coefptr], %[coefficients], %[offset] \n\t"
|
||||
"1: \n\t"
|
||||
"lh %[r0], 0(%[inptr]) \n\t"
|
||||
"addiu %[i], %[i], -1 \n\t"
|
||||
"addiu %[tmpout], %[outptr], 0 \n\t"
|
||||
"mult %[r0], %[coef0] \n\t"
|
||||
"2: \n\t"
|
||||
"lh %[r0], 0(%[tmpout]) \n\t"
|
||||
"lh %[r1], 0(%[coefptr]) \n\t"
|
||||
"lh %[r2], 2(%[tmpout]) \n\t"
|
||||
"lh %[r3], -2(%[coefptr]) \n\t"
|
||||
"addiu %[tmpout], %[tmpout], 4 \n\t"
|
||||
"msub %[r0], %[r1] \n\t"
|
||||
"msub %[r2], %[r3] \n\t"
|
||||
"addiu %[j], %[j], -2 \n\t"
|
||||
"bgtz %[j], 2b \n\t"
|
||||
" addiu %[coefptr], %[coefptr], -4 \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"extr_r.w %[r0], $ac0, 12 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"mflo %[r0] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"addu %[coefptr], %[coefficients], %[offset] \n\t"
|
||||
"addiu %[inptr], %[inptr], 2 \n\t"
|
||||
"addiu %[j], %[coefficients_length], -1 \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"shll_s.w %[r0], %[r0], 16 \n\t"
|
||||
"sra %[r0], %[r0], 16 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"addiu %[r0], %[r0], 2048 \n\t"
|
||||
"sra %[r0], %[r0], 12 \n\t"
|
||||
"slt %[r1], %[max16], %[r0] \n\t"
|
||||
"movn %[r0], %[max16], %[r1] \n\t"
|
||||
"slt %[r1], %[r0], %[min16] \n\t"
|
||||
"movn %[r0], %[min16], %[r1] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"sh %[r0], 0(%[tmpout]) \n\t"
|
||||
"bgtz %[i], 1b \n\t"
|
||||
" addiu %[outptr], %[outptr], 2 \n\t"
|
||||
"b 5f \n\t"
|
||||
" nop \n\t"
|
||||
"3: \n\t"
|
||||
"lh %[r0], 0(%[inptr]) \n\t"
|
||||
"addiu %[i], %[i], -1 \n\t"
|
||||
"addiu %[tmpout], %[outptr], 0 \n\t"
|
||||
"mult %[r0], %[coef0] \n\t"
|
||||
"4: \n\t"
|
||||
"lh %[r0], 0(%[tmpout]) \n\t"
|
||||
"lh %[r1], 0(%[coefptr]) \n\t"
|
||||
"lh %[r2], 2(%[tmpout]) \n\t"
|
||||
"lh %[r3], -2(%[coefptr]) \n\t"
|
||||
"addiu %[tmpout], %[tmpout], 4 \n\t"
|
||||
"msub %[r0], %[r1] \n\t"
|
||||
"msub %[r2], %[r3] \n\t"
|
||||
"addiu %[j], %[j], -2 \n\t"
|
||||
"bgtz %[j], 4b \n\t"
|
||||
" addiu %[coefptr], %[coefptr], -4 \n\t"
|
||||
"lh %[r0], 0(%[tmpout]) \n\t"
|
||||
"lh %[r1], 0(%[coefptr]) \n\t"
|
||||
"msub %[r0], %[r1] \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"extr_r.w %[r0], $ac0, 12 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"mflo %[r0] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"addu %[coefptr], %[coefficients], %[offset] \n\t"
|
||||
"addiu %[inptr], %[inptr], 2 \n\t"
|
||||
"addiu %[j], %[coefficients_length], -1 \n\t"
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
"shll_s.w %[r0], %[r0], 16 \n\t"
|
||||
"sra %[r0], %[r0], 16 \n\t"
|
||||
#else // #if defined(MIPS_DSP_R1_LE)
|
||||
"addiu %[r0], %[r0], 2048 \n\t"
|
||||
"sra %[r0], %[r0], 12 \n\t"
|
||||
"slt %[r1], %[max16], %[r0] \n\t"
|
||||
"movn %[r0], %[max16], %[r1] \n\t"
|
||||
"slt %[r1], %[r0], %[min16] \n\t"
|
||||
"movn %[r0], %[min16], %[r1] \n\t"
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
"sh %[r0], 2(%[tmpout]) \n\t"
|
||||
"bgtz %[i], 3b \n\t"
|
||||
" addiu %[outptr], %[outptr], 2 \n\t"
|
||||
"5: \n\t"
|
||||
".set pop \n\t"
|
||||
: [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
|
||||
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
|
||||
[coef0] "=&r" (coef0), [offset] "=&r" (offset),
|
||||
[outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
|
||||
[coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
|
||||
: [coefficients] "r" (coefficients), [data_length] "r" (data_length),
|
||||
[coefficients_length] "r" (coefficients_length),
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
[max16] "r" (max16), [min16] "r" (min16),
|
||||
#endif
|
||||
[data_out] "r" (data_out), [data_in] "r" (data_in)
|
||||
: "hi", "lo", "memory"
|
||||
);
|
||||
}
|
||||
|
45
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
vendored
Normal file
45
third_party/webrtc/src/webrtc/common_audio/signal_processing/filter_ma_fast_q12.c
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_FilterMAFastQ12().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
|
||||
int16_t* out_ptr,
|
||||
const int16_t* B,
|
||||
size_t B_length,
|
||||
size_t length)
|
||||
{
|
||||
size_t i, j;
|
||||
for (i = 0; i < length; i++)
|
||||
{
|
||||
int32_t o = 0;
|
||||
|
||||
for (j = 0; j < B_length; j++)
|
||||
{
|
||||
o += B[j] * in_ptr[i - j];
|
||||
}
|
||||
|
||||
// If output is higher than 32768, saturate it. Same with negative side
|
||||
// 2^27 = 134217728, which corresponds to 32768 in Q12
|
||||
|
||||
// Saturate the output
|
||||
o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
|
||||
|
||||
*out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
|
||||
}
|
||||
return;
|
||||
}
|
77
third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
vendored
Normal file
77
third_party/webrtc/src/webrtc/common_audio/signal_processing/get_hanning_window.c
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_GetHanningWindow().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// Hanning table with 256 entries
|
||||
static const int16_t kHanningTable[] = {
|
||||
1, 2, 6, 10, 15, 22, 30, 39,
|
||||
50, 62, 75, 89, 104, 121, 138, 157,
|
||||
178, 199, 222, 246, 271, 297, 324, 353,
|
||||
383, 413, 446, 479, 513, 549, 586, 624,
|
||||
663, 703, 744, 787, 830, 875, 920, 967,
|
||||
1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381,
|
||||
1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859,
|
||||
1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399,
|
||||
2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995,
|
||||
3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641,
|
||||
3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330,
|
||||
4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057,
|
||||
5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814,
|
||||
5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594,
|
||||
6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389,
|
||||
7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192,
|
||||
8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995,
|
||||
9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790,
|
||||
9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570,
|
||||
10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327,
|
||||
11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054,
|
||||
12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743,
|
||||
12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389,
|
||||
13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985,
|
||||
14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525,
|
||||
14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003,
|
||||
15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417,
|
||||
15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760,
|
||||
15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031,
|
||||
16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227,
|
||||
16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345,
|
||||
16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384
|
||||
};
|
||||
|
||||
void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
|
||||
{
|
||||
size_t jj;
|
||||
int16_t *vptr1;
|
||||
|
||||
int32_t index;
|
||||
int32_t factor = ((int32_t)0x40000000);
|
||||
|
||||
factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
|
||||
if (size < 513)
|
||||
index = (int32_t)-0x200000;
|
||||
else
|
||||
index = (int32_t)-0x100000;
|
||||
vptr1 = v;
|
||||
|
||||
for (jj = 0; jj < size; jj++)
|
||||
{
|
||||
index += factor;
|
||||
(*vptr1++) = kHanningTable[index >> 22];
|
||||
}
|
||||
|
||||
}
|
46
third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
vendored
Normal file
46
third_party/webrtc/src/webrtc/common_audio/signal_processing/get_scaling_square.c
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_GetScalingSquare().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
|
||||
size_t in_vector_length,
|
||||
size_t times)
|
||||
{
|
||||
int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
|
||||
size_t i;
|
||||
int16_t smax = -1;
|
||||
int16_t sabs;
|
||||
int16_t *sptr = in_vector;
|
||||
int16_t t;
|
||||
size_t looptimes = in_vector_length;
|
||||
|
||||
for (i = looptimes; i > 0; i--)
|
||||
{
|
||||
sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
|
||||
smax = (sabs > smax ? sabs : smax);
|
||||
}
|
||||
t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
|
||||
|
||||
if (smax == 0)
|
||||
{
|
||||
return 0; // Since norm(0) returns 0
|
||||
} else
|
||||
{
|
||||
return (t > nbits) ? 0 : nbits - t;
|
||||
}
|
||||
}
|
90
third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
vendored
Normal file
90
third_party/webrtc/src/webrtc/common_audio/signal_processing/ilbc_specific_functions.c
vendored
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the iLBC specific functions
|
||||
* WebRtcSpl_ReverseOrderMultArrayElements()
|
||||
* WebRtcSpl_ElementwiseVectorMult()
|
||||
* WebRtcSpl_AddVectorsAndShift()
|
||||
* WebRtcSpl_AddAffineVectorToVector()
|
||||
* WebRtcSpl_AffineTransformVector()
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
|
||||
const int16_t *win,
|
||||
size_t vector_length,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
size_t i;
|
||||
int16_t *outptr = out;
|
||||
const int16_t *inptr = in;
|
||||
const int16_t *winptr = win;
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
*outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
|
||||
const int16_t *win, size_t vector_length,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
size_t i;
|
||||
int16_t *outptr = out;
|
||||
const int16_t *inptr = in;
|
||||
const int16_t *winptr = win;
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
*outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
|
||||
const int16_t *in2, size_t vector_length,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
size_t i;
|
||||
int16_t *outptr = out;
|
||||
const int16_t *in1ptr = in1;
|
||||
const int16_t *in2ptr = in2;
|
||||
for (i = vector_length; i > 0; i--)
|
||||
{
|
||||
(*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_AddAffineVectorToVector(int16_t *out, int16_t *in,
|
||||
int16_t gain, int32_t add_constant,
|
||||
int16_t right_shifts,
|
||||
size_t vector_length)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_AffineTransformVector(int16_t *out, int16_t *in,
|
||||
int16_t gain, int32_t add_constant,
|
||||
int16_t right_shifts, size_t vector_length)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
|
||||
}
|
||||
}
|
97
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
vendored
Normal file
97
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/real_fft.h
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
|
||||
#define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// For ComplexFFT(), the maximum fft order is 10;
|
||||
// for OpenMax FFT in ARM, it is 12;
|
||||
// WebRTC APM uses orders of only 7 and 8.
|
||||
enum {kMaxFFTOrder = 10};
|
||||
|
||||
struct RealFFT;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
|
||||
void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
|
||||
|
||||
// Compute an FFT for a real-valued signal of length of 2^order,
|
||||
// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
|
||||
// specification structure, which must be initialized prior to calling the FFT
|
||||
// function with WebRtcSpl_CreateRealFFT().
|
||||
// The relationship between the input and output sequences can
|
||||
// be expressed in terms of the DFT, i.e.:
|
||||
// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
|
||||
// n=0,1,2,...N-1
|
||||
// N=2^order.
|
||||
// The conjugate-symmetric output sequence is represented using a CCS vector,
|
||||
// which is of length N+2, and is organized as follows:
|
||||
// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
|
||||
// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
|
||||
// where R[n] and I[n], respectively, denote the real and imaginary components
|
||||
// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
|
||||
// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
|
||||
// the foldover frequency.
|
||||
//
|
||||
// Input Arguments:
|
||||
// self - pointer to preallocated and initialized FFT specification structure.
|
||||
// real_data_in - the input signal. For an ARM Neon platform, it must be
|
||||
// aligned on a 32-byte boundary.
|
||||
//
|
||||
// Output Arguments:
|
||||
// complex_data_out - the output complex signal with (2^order + 2) 16-bit
|
||||
// elements. For an ARM Neon platform, it must be different
|
||||
// from real_data_in, and aligned on a 32-byte boundary.
|
||||
//
|
||||
// Return Value:
|
||||
// 0 - FFT calculation is successful.
|
||||
// -1 - Error with bad arguments (NULL pointers).
|
||||
int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
|
||||
const int16_t* real_data_in,
|
||||
int16_t* complex_data_out);
|
||||
|
||||
// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
|
||||
// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
|
||||
// the specification structure, which must be initialized prior to calling the
|
||||
// FFT function with WebRtcSpl_CreateRealFFT().
|
||||
// For a transform of length M, the input sequence is represented using a packed
|
||||
// CCS vector of length M+2, which is explained in the comments for
|
||||
// WebRtcSpl_RealForwardFFTC above.
|
||||
//
|
||||
// Input Arguments:
|
||||
// self - pointer to preallocated and initialized FFT specification structure.
|
||||
// complex_data_in - the input complex signal with (2^order + 2) 16-bit
|
||||
// elements. For an ARM Neon platform, it must be aligned on
|
||||
// a 32-byte boundary.
|
||||
//
|
||||
// Output Arguments:
|
||||
// real_data_out - the output real signal. For an ARM Neon platform, it must
|
||||
// be different to complex_data_in, and aligned on a 32-byte
|
||||
// boundary.
|
||||
//
|
||||
// Return Value:
|
||||
// 0 or a positive number - a value that the elements in the |real_data_out|
|
||||
// should be shifted left with in order to get
|
||||
// correct physical values.
|
||||
// -1 - Error with bad arguments (NULL pointers).
|
||||
int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
|
||||
const int16_t* complex_data_in,
|
||||
int16_t* real_data_out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
|
1645
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
vendored
Normal file
1645
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/signal_processing_library.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
173
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
vendored
Normal file
173
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl.h
vendored
Normal file
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
// This header file includes the inline functions in
|
||||
// the fix point signal processing library.
|
||||
|
||||
#ifndef WEBRTC_SPL_SPL_INL_H_
|
||||
#define WEBRTC_SPL_SPL_INL_H_
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
|
||||
#else
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
|
||||
#endif
|
||||
|
||||
#if !defined(MIPS_DSP_R1_LE)
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
int16_t out16 = (int16_t) value32;
|
||||
|
||||
if (value32 > 32767)
|
||||
out16 = 32767;
|
||||
else if (value32 < -32768)
|
||||
out16 = -32768;
|
||||
|
||||
return out16;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sum;
|
||||
|
||||
// Perform long addition
|
||||
l_sum = l_var1 + l_var2;
|
||||
|
||||
if (l_var1 < 0) { // Check for underflow.
|
||||
if ((l_var2 < 0) && (l_sum >= 0)) {
|
||||
l_sum = (int32_t)0x80000000;
|
||||
}
|
||||
} else { // Check for overflow.
|
||||
if ((l_var2 > 0) && (l_sum < 0)) {
|
||||
l_sum = (int32_t)0x7FFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
return l_sum;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_diff;
|
||||
|
||||
// Perform subtraction.
|
||||
l_diff = l_var1 - l_var2;
|
||||
|
||||
if (l_var1 < 0) { // Check for underflow.
|
||||
if ((l_var2 > 0) && (l_diff > 0)) {
|
||||
l_diff = (int32_t)0x80000000;
|
||||
}
|
||||
} else { // Check for overflow.
|
||||
if ((l_var2 < 0) && (l_diff < 0)) {
|
||||
l_diff = (int32_t)0x7FFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
return l_diff;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b);
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2);
|
||||
}
|
||||
#endif // #if !defined(MIPS_DSP_R1_LE)
|
||||
|
||||
#if !defined(MIPS32_LE)
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
int16_t bits;
|
||||
|
||||
if (0xFFFF0000 & n) {
|
||||
bits = 16;
|
||||
} else {
|
||||
bits = 0;
|
||||
}
|
||||
if (0x0000FF00 & (n >> bits)) bits += 8;
|
||||
if (0x000000F0 & (n >> bits)) bits += 4;
|
||||
if (0x0000000C & (n >> bits)) bits += 2;
|
||||
if (0x00000002 & (n >> bits)) bits += 1;
|
||||
if (0x00000001 & (n >> bits)) bits += 1;
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
|
||||
int16_t zeros;
|
||||
|
||||
if (a == 0) {
|
||||
return 0;
|
||||
}
|
||||
else if (a < 0) {
|
||||
a = ~a;
|
||||
}
|
||||
|
||||
if (!(0xFFFF8000 & a)) {
|
||||
zeros = 16;
|
||||
} else {
|
||||
zeros = 0;
|
||||
}
|
||||
if (!(0xFF800000 & (a << zeros))) zeros += 8;
|
||||
if (!(0xF8000000 & (a << zeros))) zeros += 4;
|
||||
if (!(0xE0000000 & (a << zeros))) zeros += 2;
|
||||
if (!(0xC0000000 & (a << zeros))) zeros += 1;
|
||||
|
||||
return zeros;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
|
||||
int16_t zeros;
|
||||
|
||||
if (a == 0) return 0;
|
||||
|
||||
if (!(0xFFFF0000 & a)) {
|
||||
zeros = 16;
|
||||
} else {
|
||||
zeros = 0;
|
||||
}
|
||||
if (!(0xFF000000 & (a << zeros))) zeros += 8;
|
||||
if (!(0xF0000000 & (a << zeros))) zeros += 4;
|
||||
if (!(0xC0000000 & (a << zeros))) zeros += 2;
|
||||
if (!(0x80000000 & (a << zeros))) zeros += 1;
|
||||
|
||||
return zeros;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
|
||||
int16_t zeros;
|
||||
|
||||
if (a == 0) {
|
||||
return 0;
|
||||
}
|
||||
else if (a < 0) {
|
||||
a = ~a;
|
||||
}
|
||||
|
||||
if (!(0xFF80 & a)) {
|
||||
zeros = 8;
|
||||
} else {
|
||||
zeros = 0;
|
||||
}
|
||||
if (!(0xF800 & (a << zeros))) zeros += 4;
|
||||
if (!(0xE000 & (a << zeros))) zeros += 2;
|
||||
if (!(0xC000 & (a << zeros))) zeros += 1;
|
||||
|
||||
return zeros;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
|
||||
return (a * b + c);
|
||||
}
|
||||
#endif // #if !defined(MIPS32_LE)
|
||||
|
||||
#endif // WEBRTC_ARCH_ARM_V7
|
||||
|
||||
#endif // WEBRTC_SPL_SPL_INL_H_
|
136
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
vendored
Normal file
136
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_armv7.h
vendored
Normal file
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/* This header file includes the inline functions for ARM processors in
|
||||
* the fix point signal processing library.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_
|
||||
#define WEBRTC_SPL_SPL_INL_ARMV7_H_
|
||||
|
||||
/* TODO(kma): Replace some assembly code with GCC intrinsics
|
||||
* (e.g. __builtin_clz).
|
||||
*/
|
||||
|
||||
/* This function produces result that is not bit exact with that by the generic
|
||||
* C version in some cases, although the former is at least as accurate as the
|
||||
* later.
|
||||
*/
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) {
|
||||
int32_t tmp = 0;
|
||||
__asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) {
|
||||
int32_t tmp = 0;
|
||||
__asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
// TODO(kma): add unit test.
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
|
||||
int32_t tmp = 0;
|
||||
__asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
int32_t s_sum = 0;
|
||||
|
||||
__asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b));
|
||||
|
||||
return (int16_t) s_sum;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sum = 0;
|
||||
|
||||
__asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2));
|
||||
|
||||
return l_sum;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sub = 0;
|
||||
|
||||
__asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2));
|
||||
|
||||
return l_sub;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
int32_t s_sub = 0;
|
||||
|
||||
__asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2));
|
||||
|
||||
return (int16_t)s_sub;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
int32_t tmp = 0;
|
||||
|
||||
__asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n));
|
||||
|
||||
return (int16_t)(32 - tmp);
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
|
||||
int32_t tmp = 0;
|
||||
|
||||
if (a == 0) {
|
||||
return 0;
|
||||
}
|
||||
else if (a < 0) {
|
||||
a ^= 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
__asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
|
||||
|
||||
return (int16_t)(tmp - 1);
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
|
||||
int tmp = 0;
|
||||
|
||||
if (a == 0) return 0;
|
||||
|
||||
__asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a));
|
||||
|
||||
return (int16_t)tmp;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
|
||||
int32_t tmp = 0;
|
||||
int32_t a_32 = a;
|
||||
|
||||
if (a_32 == 0) {
|
||||
return 0;
|
||||
}
|
||||
else if (a_32 < 0) {
|
||||
a_32 ^= 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
__asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a_32));
|
||||
|
||||
return (int16_t)(tmp - 17);
|
||||
}
|
||||
|
||||
// TODO(kma): add unit test.
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
int32_t out = 0;
|
||||
|
||||
__asm __volatile ("ssat %0, #16, %1" : "=r"(out) : "r"(value32));
|
||||
|
||||
return (int16_t)out;
|
||||
}
|
||||
|
||||
#endif // WEBRTC_SPL_SPL_INL_ARMV7_H_
|
225
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
vendored
Normal file
225
third_party/webrtc/src/webrtc/common_audio/signal_processing/include/spl_inl_mips.h
vendored
Normal file
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
// This header file includes the inline functions in
|
||||
// the fix point signal processing library.
|
||||
|
||||
#ifndef WEBRTC_SPL_SPL_INL_MIPS_H_
|
||||
#define WEBRTC_SPL_SPL_INL_MIPS_H_
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a,
|
||||
int32_t b) {
|
||||
int32_t value32 = 0;
|
||||
int32_t a1 = 0, b1 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a1], %[a] \n\t"
|
||||
"seh %[b1], %[b] \n\t"
|
||||
#else
|
||||
"sll %[a1], %[a], 16 \n\t"
|
||||
"sll %[b1], %[b], 16 \n\t"
|
||||
"sra %[a1], %[a1], 16 \n\t"
|
||||
"sra %[b1], %[b1], 16 \n\t"
|
||||
#endif
|
||||
"mul %[value32], %[a1], %[b1] \n\t"
|
||||
: [value32] "=r" (value32), [a1] "=&r" (a1), [b1] "=&r" (b1)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a,
|
||||
int32_t b) {
|
||||
int32_t value32 = 0, b1 = 0, b2 = 0;
|
||||
int32_t a1 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a1], %[a] \n\t"
|
||||
#else
|
||||
"sll %[a1], %[a], 16 \n\t"
|
||||
"sra %[a1], %[a1], 16 \n\t"
|
||||
#endif
|
||||
"andi %[b2], %[b], 0xFFFF \n\t"
|
||||
"sra %[b1], %[b], 16 \n\t"
|
||||
"sra %[b2], %[b2], 1 \n\t"
|
||||
"mul %[value32], %[a1], %[b1] \n\t"
|
||||
"mul %[b2], %[a1], %[b2] \n\t"
|
||||
"addiu %[b2], %[b2], 0x4000 \n\t"
|
||||
"sra %[b2], %[b2], 15 \n\t"
|
||||
"addu %[value32], %[value32], %[b2] \n\t"
|
||||
: [value32] "=&r" (value32), [b1] "=&r" (b1), [b2] "=&r" (b2),
|
||||
[a1] "=&r" (a1)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return value32;
|
||||
}
|
||||
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
|
||||
__asm __volatile(
|
||||
"shll_s.w %[value32], %[value32], 16 \n\t"
|
||||
"sra %[value32], %[value32], 16 \n\t"
|
||||
: [value32] "+r" (value32)
|
||||
:
|
||||
);
|
||||
int16_t out16 = (int16_t)value32;
|
||||
return out16;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
|
||||
int32_t value32 = 0;
|
||||
|
||||
__asm __volatile(
|
||||
"addq_s.ph %[value32], %[a], %[b] \n\t"
|
||||
: [value32] "=r" (value32)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return (int16_t)value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_sum;
|
||||
|
||||
__asm __volatile(
|
||||
"addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t"
|
||||
: [l_sum] "=r" (l_sum)
|
||||
: [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
|
||||
);
|
||||
|
||||
return l_sum;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
|
||||
int32_t value32;
|
||||
|
||||
__asm __volatile(
|
||||
"subq_s.ph %[value32], %[var1], %[var2] \n\t"
|
||||
: [value32] "=r" (value32)
|
||||
: [var1] "r" (var1), [var2] "r" (var2)
|
||||
);
|
||||
|
||||
return (int16_t)value32;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) {
|
||||
int32_t l_diff;
|
||||
|
||||
__asm __volatile(
|
||||
"subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t"
|
||||
: [l_diff] "=r" (l_diff)
|
||||
: [l_var1] "r" (l_var1), [l_var2] "r" (l_var2)
|
||||
);
|
||||
|
||||
return l_diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
|
||||
int bits = 0;
|
||||
int i32 = 32;
|
||||
|
||||
__asm __volatile(
|
||||
"clz %[bits], %[n] \n\t"
|
||||
"subu %[bits], %[i32], %[bits] \n\t"
|
||||
: [bits] "=&r" (bits)
|
||||
: [n] "r" (n), [i32] "r" (i32)
|
||||
);
|
||||
|
||||
return (int16_t)bits;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
|
||||
int zeros = 0;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"bnez %[a], 1f \n\t"
|
||||
" sra %[zeros], %[a], 31 \n\t"
|
||||
"b 2f \n\t"
|
||||
" move %[zeros], $zero \n\t"
|
||||
"1: \n\t"
|
||||
"xor %[zeros], %[a], %[zeros] \n\t"
|
||||
"clz %[zeros], %[zeros] \n\t"
|
||||
"addiu %[zeros], %[zeros], -1 \n\t"
|
||||
"2: \n\t"
|
||||
".set pop \n\t"
|
||||
: [zeros]"=&r"(zeros)
|
||||
: [a] "r" (a)
|
||||
);
|
||||
|
||||
return (int16_t)zeros;
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
|
||||
int zeros = 0;
|
||||
|
||||
__asm __volatile(
|
||||
"clz %[zeros], %[a] \n\t"
|
||||
: [zeros] "=r" (zeros)
|
||||
: [a] "r" (a)
|
||||
);
|
||||
|
||||
return (int16_t)(zeros & 0x1f);
|
||||
}
|
||||
|
||||
static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
|
||||
int zeros = 0;
|
||||
int a0 = a << 16;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"bnez %[a0], 1f \n\t"
|
||||
" sra %[zeros], %[a0], 31 \n\t"
|
||||
"b 2f \n\t"
|
||||
" move %[zeros], $zero \n\t"
|
||||
"1: \n\t"
|
||||
"xor %[zeros], %[a0], %[zeros] \n\t"
|
||||
"clz %[zeros], %[zeros] \n\t"
|
||||
"addiu %[zeros], %[zeros], -1 \n\t"
|
||||
"2: \n\t"
|
||||
".set pop \n\t"
|
||||
: [zeros]"=&r"(zeros)
|
||||
: [a0] "r" (a0)
|
||||
);
|
||||
|
||||
return (int16_t)zeros;
|
||||
}
|
||||
|
||||
static __inline int32_t WebRtc_MulAccumW16(int16_t a,
|
||||
int16_t b,
|
||||
int32_t c) {
|
||||
int32_t res = 0, c1 = 0;
|
||||
__asm __volatile(
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"seh %[a], %[a] \n\t"
|
||||
"seh %[b], %[b] \n\t"
|
||||
#else
|
||||
"sll %[a], %[a], 16 \n\t"
|
||||
"sll %[b], %[b], 16 \n\t"
|
||||
"sra %[a], %[a], 16 \n\t"
|
||||
"sra %[b], %[b], 16 \n\t"
|
||||
#endif
|
||||
"mul %[res], %[a], %[b] \n\t"
|
||||
"addu %[c1], %[c], %[res] \n\t"
|
||||
: [c1] "=r" (c1), [res] "=&r" (res)
|
||||
: [a] "r" (a), [b] "r" (b), [c] "r" (c)
|
||||
: "hi", "lo"
|
||||
);
|
||||
return (c1);
|
||||
}
|
||||
|
||||
#endif // WEBRTC_SPL_SPL_INL_MIPS_H_
|
246
third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
vendored
Normal file
246
third_party/webrtc/src/webrtc/common_audio/signal_processing/levinson_durbin.c
vendored
Normal file
|
@ -0,0 +1,246 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_LevinsonDurbin().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#define SPL_LEVINSON_MAXORDER 20
|
||||
|
||||
int16_t WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
|
||||
size_t order)
|
||||
{
|
||||
size_t i, j;
|
||||
// Auto-correlation coefficients in high precision
|
||||
int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
|
||||
// LPC coefficients in high precision
|
||||
int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
|
||||
// LPC coefficients for next iteration
|
||||
int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
|
||||
// Reflection coefficient in high precision
|
||||
int16_t K_hi, K_low;
|
||||
// Prediction gain Alpha in high precision and with scale factor
|
||||
int16_t Alpha_hi, Alpha_low, Alpha_exp;
|
||||
int16_t tmp_hi, tmp_low;
|
||||
int32_t temp1W32, temp2W32, temp3W32;
|
||||
int16_t norm;
|
||||
|
||||
// Normalize the autocorrelation R[0]...R[order+1]
|
||||
|
||||
norm = WebRtcSpl_NormW32(R[0]);
|
||||
|
||||
for (i = 0; i <= order; ++i)
|
||||
{
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32(R[i], norm);
|
||||
// Put R in hi and low format
|
||||
R_hi[i] = (int16_t)(temp1W32 >> 16);
|
||||
R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] << 16)) >> 1);
|
||||
}
|
||||
|
||||
// K = A[1] = -R[1] / R[0]
|
||||
|
||||
temp2W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[1],16)
|
||||
+ WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[1],1); // R[1] in Q31
|
||||
temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
|
||||
temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
|
||||
// Put back the sign on R[1]
|
||||
if (temp2W32 > 0)
|
||||
{
|
||||
temp1W32 = -temp1W32;
|
||||
}
|
||||
|
||||
// Put K in hi and low format
|
||||
K_hi = (int16_t)(temp1W32 >> 16);
|
||||
K_low = (int16_t)((temp1W32 - ((int32_t)K_hi << 16)) >> 1);
|
||||
|
||||
// Store first reflection coefficient
|
||||
K[0] = K_hi;
|
||||
|
||||
temp1W32 >>= 4; // A[1] in Q27.
|
||||
|
||||
// Put A[1] in hi and low format
|
||||
A_hi[1] = (int16_t)(temp1W32 >> 16);
|
||||
A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] << 16)) >> 1);
|
||||
|
||||
// Alpha = R[0] * (1-K^2)
|
||||
|
||||
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // = k^2 in Q31
|
||||
|
||||
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
|
||||
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
|
||||
|
||||
// Store temp1W32 = 1 - K[0]*K[0] on hi and low format
|
||||
tmp_hi = (int16_t)(temp1W32 >> 16);
|
||||
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
|
||||
|
||||
// Calculate Alpha in Q31
|
||||
temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
|
||||
(R_low[0] * tmp_hi >> 15)) << 1;
|
||||
|
||||
// Normalize Alpha and put it in hi and low format
|
||||
|
||||
Alpha_exp = WebRtcSpl_NormW32(temp1W32);
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
|
||||
Alpha_hi = (int16_t)(temp1W32 >> 16);
|
||||
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
|
||||
|
||||
// Perform the iterative calculations in the Levinson-Durbin algorithm
|
||||
|
||||
for (i = 2; i <= order; i++)
|
||||
{
|
||||
/* ----
|
||||
temp1W32 = R[i] + > R[j]*A[i-j]
|
||||
/
|
||||
----
|
||||
j=1..i-1
|
||||
*/
|
||||
|
||||
temp1W32 = 0;
|
||||
|
||||
for (j = 1; j < i; j++)
|
||||
{
|
||||
// temp1W32 is in Q31
|
||||
temp1W32 += (R_hi[j] * A_hi[i - j] << 1) +
|
||||
(((R_hi[j] * A_low[i - j] >> 15) +
|
||||
(R_low[j] * A_hi[i - j] >> 15)) << 1);
|
||||
}
|
||||
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, 4);
|
||||
temp1W32 += (WEBRTC_SPL_LSHIFT_W32((int32_t)R_hi[i], 16)
|
||||
+ WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1));
|
||||
|
||||
// K = -temp1W32 / Alpha
|
||||
temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
|
||||
temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
|
||||
|
||||
// Put the sign of temp1W32 back again
|
||||
if (temp1W32 > 0)
|
||||
{
|
||||
temp3W32 = -temp3W32;
|
||||
}
|
||||
|
||||
// Use the Alpha shifts from earlier to de-normalize
|
||||
norm = WebRtcSpl_NormW32(temp3W32);
|
||||
if ((Alpha_exp <= norm) || (temp3W32 == 0))
|
||||
{
|
||||
temp3W32 = WEBRTC_SPL_LSHIFT_W32(temp3W32, Alpha_exp);
|
||||
} else
|
||||
{
|
||||
if (temp3W32 > 0)
|
||||
{
|
||||
temp3W32 = (int32_t)0x7fffffffL;
|
||||
} else
|
||||
{
|
||||
temp3W32 = (int32_t)0x80000000L;
|
||||
}
|
||||
}
|
||||
|
||||
// Put K on hi and low format
|
||||
K_hi = (int16_t)(temp3W32 >> 16);
|
||||
K_low = (int16_t)((temp3W32 - ((int32_t)K_hi << 16)) >> 1);
|
||||
|
||||
// Store Reflection coefficient in Q15
|
||||
K[i - 1] = K_hi;
|
||||
|
||||
// Test for unstable filter.
|
||||
// If unstable return 0 and let the user decide what to do in that case
|
||||
|
||||
if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
|
||||
{
|
||||
return 0; // Unstable filter
|
||||
}
|
||||
|
||||
/*
|
||||
Compute updated LPC coefficient: Anew[i]
|
||||
Anew[j]= A[j] + K*A[i-j] for j=1..i-1
|
||||
Anew[i]= K
|
||||
*/
|
||||
|
||||
for (j = 1; j < i; j++)
|
||||
{
|
||||
// temp1W32 = A[j] in Q27
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[j],16)
|
||||
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
|
||||
|
||||
// temp1W32 += K*A[i-j] in Q27
|
||||
temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
|
||||
(K_low * A_hi[i - j] >> 15)) << 1;
|
||||
|
||||
// Put Anew in hi and low format
|
||||
A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
|
||||
A_upd_low[j] = (int16_t)(
|
||||
(temp1W32 - ((int32_t)A_upd_hi[j] << 16)) >> 1);
|
||||
}
|
||||
|
||||
// temp3W32 = K in Q27 (Convert from Q31 to Q27)
|
||||
temp3W32 >>= 4;
|
||||
|
||||
// Store Anew in hi and low format
|
||||
A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
|
||||
A_upd_low[i] = (int16_t)(
|
||||
(temp3W32 - ((int32_t)A_upd_hi[i] << 16)) >> 1);
|
||||
|
||||
// Alpha = Alpha * (1-K^2)
|
||||
|
||||
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) << 1; // K*K in Q31
|
||||
|
||||
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
|
||||
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31
|
||||
|
||||
// Convert 1- K^2 in hi and low format
|
||||
tmp_hi = (int16_t)(temp1W32 >> 16);
|
||||
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
|
||||
|
||||
// Calculate Alpha = Alpha * (1-K^2) in Q31
|
||||
temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
|
||||
(Alpha_low * tmp_hi >> 15)) << 1;
|
||||
|
||||
// Normalize Alpha and store it on hi and low format
|
||||
|
||||
norm = WebRtcSpl_NormW32(temp1W32);
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
|
||||
|
||||
Alpha_hi = (int16_t)(temp1W32 >> 16);
|
||||
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
|
||||
|
||||
// Update the total normalization of Alpha
|
||||
Alpha_exp = Alpha_exp + norm;
|
||||
|
||||
// Update A[]
|
||||
|
||||
for (j = 1; j <= i; j++)
|
||||
{
|
||||
A_hi[j] = A_upd_hi[j];
|
||||
A_low[j] = A_upd_low[j];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Set A[0] to 1.0 and store the A[i] i=1...order in Q12
|
||||
(Convert from Q27 and use rounding)
|
||||
*/
|
||||
|
||||
A[0] = 4096;
|
||||
|
||||
for (i = 1; i <= order; i++)
|
||||
{
|
||||
// temp1W32 in Q27
|
||||
temp1W32 = WEBRTC_SPL_LSHIFT_W32((int32_t)A_hi[i], 16)
|
||||
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
|
||||
// Round and store upper word
|
||||
A[i] = (int16_t)(((temp1W32 << 1) + 32768) >> 16);
|
||||
}
|
||||
return 1; // Stable filters
|
||||
}
|
56
third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
vendored
Normal file
56
third_party/webrtc/src/webrtc/common_audio/signal_processing/lpc_to_refl_coef.c
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_LpcToReflCoef().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
|
||||
|
||||
void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
|
||||
{
|
||||
int m, k;
|
||||
int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
|
||||
int32_t tmp_inv_denom32;
|
||||
int16_t tmp_inv_denom16;
|
||||
|
||||
k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15
|
||||
for (m = use_order - 1; m > 0; m--)
|
||||
{
|
||||
// (1 - k^2) in Q30
|
||||
tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
|
||||
// (1 - k^2) in Q15
|
||||
tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
|
||||
|
||||
for (k = 1; k <= m; k++)
|
||||
{
|
||||
// tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
|
||||
|
||||
// [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
|
||||
tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
|
||||
|
||||
tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
|
||||
}
|
||||
|
||||
for (k = 1; k < m; k++)
|
||||
{
|
||||
a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12
|
||||
}
|
||||
|
||||
tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
|
||||
k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
|
||||
}
|
||||
return;
|
||||
}
|
224
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
vendored
Normal file
224
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations.c
vendored
Normal file
|
@ -0,0 +1,224 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains the implementation of functions
|
||||
* WebRtcSpl_MaxAbsValueW16C()
|
||||
* WebRtcSpl_MaxAbsValueW32C()
|
||||
* WebRtcSpl_MaxValueW16C()
|
||||
* WebRtcSpl_MaxValueW32C()
|
||||
* WebRtcSpl_MinValueW16C()
|
||||
* WebRtcSpl_MinValueW32C()
|
||||
* WebRtcSpl_MaxAbsIndexW16()
|
||||
* WebRtcSpl_MaxIndexW16()
|
||||
* WebRtcSpl_MaxIndexW32()
|
||||
* WebRtcSpl_MinIndexW16()
|
||||
* WebRtcSpl_MinIndexW32()
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
|
||||
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
|
||||
// TODO(kma): Move the next six functions into min_max_operations_c.c.
|
||||
|
||||
// Maximum absolute value of word16 vector. C version for generic platforms.
|
||||
int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
|
||||
size_t i = 0;
|
||||
int absolute = 0, maximum = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
absolute = abs((int)vector[i]);
|
||||
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
}
|
||||
}
|
||||
|
||||
// Guard the case for abs(-32768).
|
||||
if (maximum > WEBRTC_SPL_WORD16_MAX) {
|
||||
maximum = WEBRTC_SPL_WORD16_MAX;
|
||||
}
|
||||
|
||||
return (int16_t)maximum;
|
||||
}
|
||||
|
||||
// Maximum absolute value of word32 vector. C version for generic platforms.
|
||||
int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
|
||||
// Use uint32_t for the local variables, to accommodate the return value
|
||||
// of abs(0x80000000), which is 0x80000000.
|
||||
|
||||
uint32_t absolute = 0, maximum = 0;
|
||||
size_t i = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
absolute = abs((int)vector[i]);
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
}
|
||||
}
|
||||
|
||||
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
|
||||
|
||||
return (int32_t)maximum;
|
||||
}
|
||||
|
||||
// Maximum value of word16 vector. C version for generic platforms.
|
||||
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
|
||||
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||
size_t i = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] > maximum)
|
||||
maximum = vector[i];
|
||||
}
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Maximum value of word32 vector. C version for generic platforms.
|
||||
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
|
||||
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||
size_t i = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] > maximum)
|
||||
maximum = vector[i];
|
||||
}
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Minimum value of word16 vector. C version for generic platforms.
|
||||
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
|
||||
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||
size_t i = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] < minimum)
|
||||
minimum = vector[i];
|
||||
}
|
||||
return minimum;
|
||||
}
|
||||
|
||||
// Minimum value of word32 vector. C version for generic platforms.
|
||||
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
|
||||
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||
size_t i = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] < minimum)
|
||||
minimum = vector[i];
|
||||
}
|
||||
return minimum;
|
||||
}
|
||||
|
||||
// Index of maximum absolute value in a word16 vector.
|
||||
size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
|
||||
// Use type int for local variables, to accomodate the value of abs(-32768).
|
||||
|
||||
size_t i = 0, index = 0;
|
||||
int absolute = 0, maximum = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
absolute = abs((int)vector[i]);
|
||||
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Index of maximum value in a word16 vector.
|
||||
size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
|
||||
size_t i = 0, index = 0;
|
||||
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] > maximum) {
|
||||
maximum = vector[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Index of maximum value in a word32 vector.
|
||||
size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
|
||||
size_t i = 0, index = 0;
|
||||
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] > maximum) {
|
||||
maximum = vector[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Index of minimum value in a word16 vector.
|
||||
size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
|
||||
size_t i = 0, index = 0;
|
||||
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] < minimum) {
|
||||
minimum = vector[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
// Index of minimum value in a word32 vector.
|
||||
size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
|
||||
size_t i = 0, index = 0;
|
||||
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (vector[i] < minimum) {
|
||||
minimum = vector[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
376
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
vendored
Normal file
376
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_mips.c
vendored
Normal file
|
@ -0,0 +1,376 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains the implementation of function
|
||||
* WebRtcSpl_MaxAbsValueW16()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// Maximum absolute value of word16 vector.
|
||||
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
|
||||
int32_t totMax = 0;
|
||||
int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3;
|
||||
size_t i, loop_size;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
#if defined(MIPS_DSP_R1)
|
||||
const int32_t* tmpvec32 = (int32_t*)vector;
|
||||
loop_size = length >> 4;
|
||||
|
||||
for (i = 0; i < loop_size; i++) {
|
||||
__asm__ volatile (
|
||||
"lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
|
||||
"lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
|
||||
"lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
|
||||
"lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
|
||||
|
||||
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
|
||||
|
||||
"lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
|
||||
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
|
||||
|
||||
"lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
|
||||
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
|
||||
|
||||
"lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
|
||||
|
||||
"lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
|
||||
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
|
||||
|
||||
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
|
||||
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
|
||||
|
||||
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
|
||||
|
||||
"addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
|
||||
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
|
||||
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
|
||||
[totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
__asm__ volatile (
|
||||
"rotr %[tmp32_0], %[totMax], 16 \n\t"
|
||||
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
|
||||
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
|
||||
"packrl.ph %[totMax], $0, %[totMax] \n\t"
|
||||
: [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
|
||||
:
|
||||
);
|
||||
loop_size = length & 0xf;
|
||||
for (i = 0; i < loop_size; i++) {
|
||||
__asm__ volatile (
|
||||
"lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
|
||||
"addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
|
||||
"absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
|
||||
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
|
||||
[tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
#else // #if defined(MIPS_DSP_R1)
|
||||
int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
|
||||
int32_t r, r1, r2, r3;
|
||||
const int16_t* tmpvector = vector;
|
||||
loop_size = length >> 4;
|
||||
for (i = 0; i < loop_size; i++) {
|
||||
__asm__ volatile (
|
||||
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_1], 2(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_2], 4(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_3], 6(%[tmpvector]) \n\t"
|
||||
|
||||
"abs %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"abs %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"abs %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"abs %[tmp32_3], %[tmp32_3] \n\t"
|
||||
|
||||
"slt %[r], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[r] \n\t"
|
||||
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
|
||||
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
|
||||
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
|
||||
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
|
||||
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
|
||||
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
|
||||
|
||||
"lh %[tmp32_0], 8(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_1], 10(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_2], 12(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_3], 14(%[tmpvector]) \n\t"
|
||||
|
||||
"abs %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"abs %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"abs %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"abs %[tmp32_3], %[tmp32_3] \n\t"
|
||||
|
||||
"slt %[r], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[r] \n\t"
|
||||
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
|
||||
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
|
||||
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
|
||||
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
|
||||
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
|
||||
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
|
||||
|
||||
"lh %[tmp32_0], 16(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_1], 18(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_2], 20(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_3], 22(%[tmpvector]) \n\t"
|
||||
|
||||
"abs %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"abs %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"abs %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"abs %[tmp32_3], %[tmp32_3] \n\t"
|
||||
|
||||
"slt %[r], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[r] \n\t"
|
||||
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
|
||||
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
|
||||
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
|
||||
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
|
||||
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
|
||||
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
|
||||
|
||||
"lh %[tmp32_0], 24(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_1], 26(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_2], 28(%[tmpvector]) \n\t"
|
||||
"lh %[tmp32_3], 30(%[tmpvector]) \n\t"
|
||||
|
||||
"abs %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"abs %[tmp32_1], %[tmp32_1] \n\t"
|
||||
"abs %[tmp32_2], %[tmp32_2] \n\t"
|
||||
"abs %[tmp32_3], %[tmp32_3] \n\t"
|
||||
|
||||
"slt %[r], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[r] \n\t"
|
||||
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
|
||||
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
|
||||
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
|
||||
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
|
||||
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
|
||||
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
|
||||
|
||||
"addiu %[tmpvector], %[tmpvector], 32 \n\t"
|
||||
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
|
||||
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
|
||||
[totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
|
||||
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
loop_size = length & 0xf;
|
||||
for (i = 0; i < loop_size; i++) {
|
||||
__asm__ volatile (
|
||||
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
|
||||
"addiu %[tmpvector], %[tmpvector], 2 \n\t"
|
||||
"abs %[tmp32_0], %[tmp32_0] \n\t"
|
||||
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
|
||||
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
|
||||
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
|
||||
[tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
__asm__ volatile (
|
||||
"slt %[r], %[v16MaxMax], %[totMax] \n\t"
|
||||
"movn %[totMax], %[v16MaxMax], %[r] \n\t"
|
||||
: [totMax] "+r" (totMax), [r] "=&r" (r)
|
||||
: [v16MaxMax] "r" (v16MaxMax)
|
||||
);
|
||||
#endif // #if defined(MIPS_DSP_R1)
|
||||
return (int16_t)totMax;
|
||||
}
|
||||
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
// Maximum absolute value of word32 vector. Version for MIPS platform.
|
||||
int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
|
||||
// Use uint32_t for the local variables, to accommodate the return value
|
||||
// of abs(0x80000000), which is 0x80000000.
|
||||
|
||||
uint32_t absolute = 0, maximum = 0;
|
||||
int tmp1 = 0, max_value = 0x7fffffff;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lw %[absolute], 0(%[vector]) \n\t"
|
||||
"absq_s.w %[absolute], %[absolute] \n\t"
|
||||
"addiu %[length], %[length], -1 \n\t"
|
||||
"slt %[tmp1], %[maximum], %[absolute] \n\t"
|
||||
"movn %[maximum], %[absolute], %[tmp1] \n\t"
|
||||
"bgtz %[length], 1b \n\t"
|
||||
" addiu %[vector], %[vector], 4 \n\t"
|
||||
"slt %[tmp1], %[max_value], %[maximum] \n\t"
|
||||
"movn %[maximum], %[max_value], %[tmp1] \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
|
||||
: [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return (int32_t)maximum;
|
||||
}
|
||||
#endif // #if defined(MIPS_DSP_R1_LE)
|
||||
|
||||
// Maximum value of word16 vector. Version for MIPS platform.
|
||||
int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
|
||||
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||
int tmp1;
|
||||
int16_t value;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lh %[value], 0(%[vector]) \n\t"
|
||||
"addiu %[length], %[length], -1 \n\t"
|
||||
"slt %[tmp1], %[maximum], %[value] \n\t"
|
||||
"movn %[maximum], %[value], %[tmp1] \n\t"
|
||||
"bgtz %[length], 1b \n\t"
|
||||
" addiu %[vector], %[vector], 2 \n\t"
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
|
||||
: [vector] "r" (vector), [length] "r" (length)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Maximum value of word32 vector. Version for MIPS platform.
|
||||
int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
|
||||
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||
int tmp1, value;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lw %[value], 0(%[vector]) \n\t"
|
||||
"addiu %[length], %[length], -1 \n\t"
|
||||
"slt %[tmp1], %[maximum], %[value] \n\t"
|
||||
"movn %[maximum], %[value], %[tmp1] \n\t"
|
||||
"bgtz %[length], 1b \n\t"
|
||||
" addiu %[vector], %[vector], 4 \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
|
||||
: [vector] "r" (vector), [length] "r" (length)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Minimum value of word16 vector. Version for MIPS platform.
|
||||
int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
|
||||
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||
int tmp1;
|
||||
int16_t value;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lh %[value], 0(%[vector]) \n\t"
|
||||
"addiu %[length], %[length], -1 \n\t"
|
||||
"slt %[tmp1], %[value], %[minimum] \n\t"
|
||||
"movn %[minimum], %[value], %[tmp1] \n\t"
|
||||
"bgtz %[length], 1b \n\t"
|
||||
" addiu %[vector], %[vector], 2 \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
|
||||
: [vector] "r" (vector), [length] "r" (length)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return minimum;
|
||||
}
|
||||
|
||||
// Minimum value of word32 vector. Version for MIPS platform.
|
||||
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
|
||||
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||
int tmp1, value;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
__asm__ volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lw %[value], 0(%[vector]) \n\t"
|
||||
"addiu %[length], %[length], -1 \n\t"
|
||||
"slt %[tmp1], %[value], %[minimum] \n\t"
|
||||
"movn %[minimum], %[value], %[tmp1] \n\t"
|
||||
"bgtz %[length], 1b \n\t"
|
||||
" addiu %[vector], %[vector], 4 \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
|
||||
: [vector] "r" (vector), [length] "r" (length)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return minimum;
|
||||
}
|
283
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
vendored
Normal file
283
third_party/webrtc/src/webrtc/common_audio/signal_processing/min_max_operations_neon.c
vendored
Normal file
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// Maximum absolute value of word16 vector. C version for generic platforms.
|
||||
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
|
||||
int absolute = 0, maximum = 0;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int16_t* p_start = vector;
|
||||
size_t rest = length & 7;
|
||||
const int16_t* p_end = vector + length - rest;
|
||||
|
||||
int16x8_t v;
|
||||
uint16x8_t max_qv;
|
||||
max_qv = vdupq_n_u16(0);
|
||||
|
||||
while (p_start < p_end) {
|
||||
v = vld1q_s16(p_start);
|
||||
// Note vabs doesn't change the value of -32768.
|
||||
v = vabsq_s16(v);
|
||||
// Use u16 so we don't lose the value -32768.
|
||||
max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
|
||||
p_start += 8;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM64
|
||||
maximum = (int)vmaxvq_u16(max_qv);
|
||||
#else
|
||||
uint16x4_t max_dv;
|
||||
max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
|
||||
max_dv = vpmax_u16(max_dv, max_dv);
|
||||
max_dv = vpmax_u16(max_dv, max_dv);
|
||||
|
||||
maximum = (int)vget_lane_u16(max_dv, 0);
|
||||
#endif
|
||||
|
||||
p_end = vector + length;
|
||||
while (p_start < p_end) {
|
||||
absolute = abs((int)(*p_start));
|
||||
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
}
|
||||
p_start++;
|
||||
}
|
||||
|
||||
// Guard the case for abs(-32768).
|
||||
if (maximum > WEBRTC_SPL_WORD16_MAX) {
|
||||
maximum = WEBRTC_SPL_WORD16_MAX;
|
||||
}
|
||||
|
||||
return (int16_t)maximum;
|
||||
}
|
||||
|
||||
// Maximum absolute value of word32 vector. NEON intrinsics version for
|
||||
// ARM 32-bit/64-bit platforms.
|
||||
int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
|
||||
// Use uint32_t for the local variables, to accommodate the return value
|
||||
// of abs(0x80000000), which is 0x80000000.
|
||||
|
||||
uint32_t absolute = 0, maximum = 0;
|
||||
size_t i = 0;
|
||||
size_t residual = length & 0x7;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int32_t* p_start = vector;
|
||||
uint32x4_t max32x4_0 = vdupq_n_u32(0);
|
||||
uint32x4_t max32x4_1 = vdupq_n_u32(0);
|
||||
|
||||
// First part, unroll the loop 8 times.
|
||||
for (i = 0; i < length - residual; i += 8) {
|
||||
int32x4_t in32x4_0 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
int32x4_t in32x4_1 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
in32x4_0 = vabsq_s32(in32x4_0);
|
||||
in32x4_1 = vabsq_s32(in32x4_1);
|
||||
// vabs doesn't change the value of 0x80000000.
|
||||
// Use u32 so we don't lose the value 0x80000000.
|
||||
max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
|
||||
max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
|
||||
}
|
||||
|
||||
uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
maximum = vmaxvq_u32(max32x4);
|
||||
#else
|
||||
uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
|
||||
max32x2 = vpmax_u32(max32x2, max32x2);
|
||||
|
||||
maximum = vget_lane_u32(max32x2, 0);
|
||||
#endif
|
||||
|
||||
// Second part, do the remaining iterations (if any).
|
||||
for (i = residual; i > 0; i--) {
|
||||
absolute = abs((int)(*p_start));
|
||||
if (absolute > maximum) {
|
||||
maximum = absolute;
|
||||
}
|
||||
p_start++;
|
||||
}
|
||||
|
||||
// Guard against the case for 0x80000000.
|
||||
maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
|
||||
|
||||
return (int32_t)maximum;
|
||||
}
|
||||
|
||||
// Maximum value of word16 vector. NEON intrinsics version for
|
||||
// ARM 32-bit/64-bit platforms.
|
||||
int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
|
||||
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
|
||||
size_t i = 0;
|
||||
size_t residual = length & 0x7;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int16_t* p_start = vector;
|
||||
int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
|
||||
|
||||
// First part, unroll the loop 8 times.
|
||||
for (i = 0; i < length - residual; i += 8) {
|
||||
int16x8_t in16x8 = vld1q_s16(p_start);
|
||||
max16x8 = vmaxq_s16(max16x8, in16x8);
|
||||
p_start += 8;
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
maximum = vmaxvq_s16(max16x8);
|
||||
#else
|
||||
int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
|
||||
max16x4 = vpmax_s16(max16x4, max16x4);
|
||||
max16x4 = vpmax_s16(max16x4, max16x4);
|
||||
|
||||
maximum = vget_lane_s16(max16x4, 0);
|
||||
#endif
|
||||
|
||||
// Second part, do the remaining iterations (if any).
|
||||
for (i = residual; i > 0; i--) {
|
||||
if (*p_start > maximum)
|
||||
maximum = *p_start;
|
||||
p_start++;
|
||||
}
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Maximum value of word32 vector. NEON intrinsics version for
|
||||
// ARM 32-bit/64-bit platforms.
|
||||
int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
|
||||
int32_t maximum = WEBRTC_SPL_WORD32_MIN;
|
||||
size_t i = 0;
|
||||
size_t residual = length & 0x7;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int32_t* p_start = vector;
|
||||
int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
|
||||
int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
|
||||
|
||||
// First part, unroll the loop 8 times.
|
||||
for (i = 0; i < length - residual; i += 8) {
|
||||
int32x4_t in32x4_0 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
int32x4_t in32x4_1 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
|
||||
max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
|
||||
}
|
||||
|
||||
int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
maximum = vmaxvq_s32(max32x4);
|
||||
#else
|
||||
int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
|
||||
max32x2 = vpmax_s32(max32x2, max32x2);
|
||||
|
||||
maximum = vget_lane_s32(max32x2, 0);
|
||||
#endif
|
||||
|
||||
// Second part, do the remaining iterations (if any).
|
||||
for (i = residual; i > 0; i--) {
|
||||
if (*p_start > maximum)
|
||||
maximum = *p_start;
|
||||
p_start++;
|
||||
}
|
||||
return maximum;
|
||||
}
|
||||
|
||||
// Minimum value of word16 vector. NEON intrinsics version for
|
||||
// ARM 32-bit/64-bit platforms.
|
||||
int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
|
||||
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
|
||||
size_t i = 0;
|
||||
size_t residual = length & 0x7;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int16_t* p_start = vector;
|
||||
int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
|
||||
|
||||
// First part, unroll the loop 8 times.
|
||||
for (i = 0; i < length - residual; i += 8) {
|
||||
int16x8_t in16x8 = vld1q_s16(p_start);
|
||||
min16x8 = vminq_s16(min16x8, in16x8);
|
||||
p_start += 8;
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
minimum = vminvq_s16(min16x8);
|
||||
#else
|
||||
int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
|
||||
min16x4 = vpmin_s16(min16x4, min16x4);
|
||||
min16x4 = vpmin_s16(min16x4, min16x4);
|
||||
|
||||
minimum = vget_lane_s16(min16x4, 0);
|
||||
#endif
|
||||
|
||||
// Second part, do the remaining iterations (if any).
|
||||
for (i = residual; i > 0; i--) {
|
||||
if (*p_start < minimum)
|
||||
minimum = *p_start;
|
||||
p_start++;
|
||||
}
|
||||
return minimum;
|
||||
}
|
||||
|
||||
// Minimum value of word32 vector. NEON intrinsics version for
|
||||
// ARM 32-bit/64-bit platforms.
|
||||
int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
|
||||
int32_t minimum = WEBRTC_SPL_WORD32_MAX;
|
||||
size_t i = 0;
|
||||
size_t residual = length & 0x7;
|
||||
|
||||
assert(length > 0);
|
||||
|
||||
const int32_t* p_start = vector;
|
||||
int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
|
||||
int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
|
||||
|
||||
// First part, unroll the loop 8 times.
|
||||
for (i = 0; i < length - residual; i += 8) {
|
||||
int32x4_t in32x4_0 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
int32x4_t in32x4_1 = vld1q_s32(p_start);
|
||||
p_start += 4;
|
||||
min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
|
||||
min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
|
||||
}
|
||||
|
||||
int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
minimum = vminvq_s32(min32x4);
|
||||
#else
|
||||
int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
|
||||
min32x2 = vpmin_s32(min32x2, min32x2);
|
||||
|
||||
minimum = vget_lane_s32(min32x2, 0);
|
||||
#endif
|
||||
|
||||
// Second part, do the remaining iterations (if any).
|
||||
for (i = residual; i > 0; i--) {
|
||||
if (*p_start < minimum)
|
||||
minimum = *p_start;
|
||||
p_start++;
|
||||
}
|
||||
return minimum;
|
||||
}
|
||||
|
115
third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
vendored
Normal file
115
third_party/webrtc/src/webrtc/common_audio/signal_processing/randomization_functions.c
vendored
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the randomization functions
|
||||
* WebRtcSpl_RandU()
|
||||
* WebRtcSpl_RandN()
|
||||
* WebRtcSpl_RandUArray()
|
||||
*
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
static const uint32_t kMaxSeedUsed = 0x80000000;
|
||||
|
||||
static const int16_t kRandNTable[] = {
|
||||
9178, -7260, 40, 10189, 4894, -3531, -13779, 14764,
|
||||
-4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817,
|
||||
-9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361,
|
||||
6484, 2241, -8633, 792, 199, -3344, 6553, -10079,
|
||||
-15040, 95, 11608, -12469, 14161, -4176, 2476, 6403,
|
||||
13685, -16005, 6646, 2239, 10916, -3004, -602, -3141,
|
||||
2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112,
|
||||
16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739,
|
||||
-6392, 536, 10923, 10872, 5059, -4748, -7770, 5477,
|
||||
38, -1025, -2892, 1638, 6304, 14375, -11028, 1553,
|
||||
-1565, 10762, -393, 4040, 5257, 12310, 6554, -4799,
|
||||
4899, -6354, 1603, -1048, -2220, 8247, -186, -8944,
|
||||
-12004, 2332, 4801, -4933, 6371, 131, 8614, -5927,
|
||||
-8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250,
|
||||
3766, 784, 6494, -62, 3531, -1582, 15572, 662,
|
||||
-3952, -330, -3196, 669, 7236, -2678, -6569, 23319,
|
||||
-8645, -741, 14830, -15976, 4903, 315, -11342, 10311,
|
||||
1858, -7777, 2145, 5436, 5677, -113, -10033, 826,
|
||||
-1353, 17210, 7768, 986, -1471, 8291, -4982, 8207,
|
||||
-14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025,
|
||||
7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807,
|
||||
9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455,
|
||||
-5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618,
|
||||
-6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482,
|
||||
-7048, 1547, -21890, -6505, -7414, -424, -11722, 7955,
|
||||
1653, -17299, 1823, 473, -9232, 3337, 1111, 873,
|
||||
4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539,
|
||||
3231, 7054, -8537, 7616, 6244, 16635, 447, -2915,
|
||||
13967, 705, -2669, -1520, -1771, -16188, 5956, 5117,
|
||||
6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236,
|
||||
8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883,
|
||||
67, 5731, -2874, 13480, -3743, 9298, -3280, 3552,
|
||||
-4425, -18, -3785, -9988, -5357, 5477, -11794, 2117,
|
||||
1416, -9935, 3376, 802, -5079, -8243, 12652, 66,
|
||||
3653, -2368, 6781, -21895, -7227, 2487, 7839, -385,
|
||||
6646, -7016, -4658, 5531, -1705, 834, 129, 3694,
|
||||
-1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494,
|
||||
-5626, 185, -3615, -2041, -7972, -3106, -60, -23497,
|
||||
-1566, 17064, 3519, 2518, 304, -6805, -10269, 2105,
|
||||
1936, -426, -736, -8122, -1467, 4238, -6939, -13309,
|
||||
360, 7402, -7970, 12576, 3287, 12194, -6289, -16006,
|
||||
9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739,
|
||||
1028, -5406, -1696, 5889, -666, -4736, 4971, 3565,
|
||||
9362, -6292, 3876, -3652, -19666, 7523, -4061, 391,
|
||||
-11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496,
|
||||
7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847,
|
||||
3698, 6978, 4751, -6957, -3581, -45, 6252, 1513,
|
||||
-4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777,
|
||||
7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303,
|
||||
7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305,
|
||||
6534, -13539, -9971, 997, 8464, -4064, -1495, 1857,
|
||||
13624, 5458, 9490, -11086, -4524, 12022, -550, -198,
|
||||
408, -8455, -7068, 10289, 9712, -3366, 9028, -7621,
|
||||
-5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563,
|
||||
-62, -566, 1624, -7010, 14730, -17791, -3697, -2344,
|
||||
-1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031,
|
||||
12784, 891, 14189, -3963, -5683, 421, -12575, 1724,
|
||||
-12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536,
|
||||
12799, 794, 5738, 3459, -11689, -258, -3738, -3775,
|
||||
-8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644,
|
||||
-19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834,
|
||||
2009, -7349, 130, -14547, 338, -5998, 3337, 21492,
|
||||
2406, 7703, -951, 11196, -564, 3406, 2217, 4806,
|
||||
2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492
|
||||
};
|
||||
|
||||
static uint32_t IncreaseSeed(uint32_t* seed) {
|
||||
seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);
|
||||
return seed[0];
|
||||
}
|
||||
|
||||
int16_t WebRtcSpl_RandU(uint32_t* seed) {
|
||||
return (int16_t)(IncreaseSeed(seed) >> 16);
|
||||
}
|
||||
|
||||
int16_t WebRtcSpl_RandN(uint32_t* seed) {
|
||||
return kRandNTable[IncreaseSeed(seed) >> 23];
|
||||
}
|
||||
|
||||
// Creates an array of uniformly distributed variables.
|
||||
int16_t WebRtcSpl_RandUArray(int16_t* vector,
|
||||
int16_t vector_length,
|
||||
uint32_t* seed) {
|
||||
int i;
|
||||
for (i = 0; i < vector_length; i++) {
|
||||
vector[i] = WebRtcSpl_RandU(seed);
|
||||
}
|
||||
return vector_length;
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
struct RealFFT {
|
||||
int order;
|
||||
};
|
||||
|
||||
struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
|
||||
struct RealFFT* self = NULL;
|
||||
|
||||
if (order > kMaxFFTOrder || order < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self = malloc(sizeof(struct RealFFT));
|
||||
if (self == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
self->order = order;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
|
||||
if (self != NULL) {
|
||||
free(self);
|
||||
}
|
||||
}
|
||||
|
||||
// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and
|
||||
// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued
|
||||
// FFT implementation in SPL.
|
||||
|
||||
int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
|
||||
const int16_t* real_data_in,
|
||||
int16_t* complex_data_out) {
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int result = 0;
|
||||
int n = 1 << self->order;
|
||||
// The complex-value FFT implementation needs a buffer to hold 2^order
|
||||
// 16-bit COMPLEX numbers, for both time and frequency data.
|
||||
int16_t complex_buffer[2 << kMaxFFTOrder];
|
||||
|
||||
// Insert zeros to the imaginary parts for complex forward FFT input.
|
||||
for (i = 0, j = 0; i < n; i += 1, j += 2) {
|
||||
complex_buffer[j] = real_data_in[i];
|
||||
complex_buffer[j + 1] = 0;
|
||||
};
|
||||
|
||||
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
|
||||
result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
|
||||
|
||||
// For real FFT output, use only the first N + 2 elements from
|
||||
// complex forward FFT.
|
||||
memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
|
||||
const int16_t* complex_data_in,
|
||||
int16_t* real_data_out) {
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int result = 0;
|
||||
int n = 1 << self->order;
|
||||
// Create the buffer specific to complex-valued FFT implementation.
|
||||
int16_t complex_buffer[2 << kMaxFFTOrder];
|
||||
|
||||
// For n-point FFT, first copy the first n + 2 elements into complex
|
||||
// FFT, then construct the remaining n - 2 elements by real FFT's
|
||||
// conjugate-symmetric properties.
|
||||
memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
|
||||
for (i = n + 2; i < 2 * n; i += 2) {
|
||||
complex_buffer[i] = complex_data_in[2 * n - i];
|
||||
complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
|
||||
}
|
||||
|
||||
WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
|
||||
result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
|
||||
|
||||
// Strip out the imaginary parts of the complex inverse FFT output.
|
||||
for (i = 0, j = 0; i < n; i += 1, j += 2) {
|
||||
real_data_out[i] = complex_buffer[j];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
108
third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
vendored
Normal file
108
third_party/webrtc/src/webrtc/common_audio/signal_processing/real_fft_unittest.cc
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/test/testsupport/gtest_disable.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// FFT order.
|
||||
const int kOrder = 5;
|
||||
// Lengths for real FFT's time and frequency bufffers.
|
||||
// For N-point FFT, the length requirements from API are N and N+2 respectively.
|
||||
const int kTimeDataLength = 1 << kOrder;
|
||||
const int kFreqDataLength = (1 << kOrder) + 2;
|
||||
// For complex FFT's time and freq buffer. The implementation requires
|
||||
// 2*N 16-bit words.
|
||||
const int kComplexFftDataLength = 2 << kOrder;
|
||||
// Reference data for time signal.
|
||||
const int16_t kRefData[kTimeDataLength] = {
|
||||
11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
|
||||
-26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
|
||||
1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
|
||||
-2629, 5607, -3, 1178, -23819, 1498, -25772, 10076
|
||||
};
|
||||
|
||||
class RealFFTTest : public ::testing::Test {
|
||||
protected:
|
||||
RealFFTTest() {
|
||||
WebRtcSpl_Init();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(RealFFTTest, CreateFailsOnBadInput) {
|
||||
RealFFT* fft = WebRtcSpl_CreateRealFFT(11);
|
||||
EXPECT_TRUE(fft == NULL);
|
||||
fft = WebRtcSpl_CreateRealFFT(-1);
|
||||
EXPECT_TRUE(fft == NULL);
|
||||
}
|
||||
|
||||
TEST_F(RealFFTTest, RealAndComplexMatch) {
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int16_t real_fft_time[kTimeDataLength] = {0};
|
||||
int16_t real_fft_freq[kFreqDataLength] = {0};
|
||||
// One common buffer for complex FFT's time and frequency data.
|
||||
int16_t complex_fft_buff[kComplexFftDataLength] = {0};
|
||||
|
||||
// Prepare the inputs to forward FFT's.
|
||||
memcpy(real_fft_time, kRefData, sizeof(kRefData));
|
||||
for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
|
||||
complex_fft_buff[j] = kRefData[i];
|
||||
complex_fft_buff[j + 1] = 0; // Insert zero's to imaginary parts.
|
||||
};
|
||||
|
||||
// Create and run real forward FFT.
|
||||
RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
|
||||
EXPECT_TRUE(fft != NULL);
|
||||
EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
|
||||
|
||||
// Run complex forward FFT.
|
||||
WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
|
||||
EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
|
||||
|
||||
// Verify the results between complex and real forward FFT.
|
||||
for (i = 0; i < kFreqDataLength; i++) {
|
||||
EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
|
||||
}
|
||||
|
||||
// Prepare the inputs to inverse real FFT.
|
||||
// We use whatever data in complex_fft_buff[] since we don't care
|
||||
// about data contents. Only kFreqDataLength 16-bit words are copied
|
||||
// from complex_fft_buff to real_fft_freq since remaining words (2nd half)
|
||||
// are conjugate-symmetric to the first half in theory.
|
||||
memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
|
||||
|
||||
// Run real inverse FFT.
|
||||
int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
|
||||
EXPECT_GE(real_scale, 0);
|
||||
|
||||
// Run complex inverse FFT.
|
||||
WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
|
||||
int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
|
||||
|
||||
// Verify the results between complex and real inverse FFT.
|
||||
// They are not bit-exact, since complex IFFT doesn't produce
|
||||
// exactly conjugate-symmetric data (between first and second half).
|
||||
EXPECT_EQ(real_scale, complex_scale);
|
||||
for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
|
||||
EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
|
||||
}
|
||||
|
||||
WebRtcSpl_FreeRealFFT(fft);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
59
third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
vendored
Normal file
59
third_party/webrtc/src/webrtc/common_audio/signal_processing/refl_coef_to_lpc.c
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_ReflCoefToLpc().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
|
||||
{
|
||||
int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
|
||||
int16_t *aptr, *aptr2, *anyptr;
|
||||
const int16_t *kptr;
|
||||
int m, i;
|
||||
|
||||
kptr = k;
|
||||
*a = 4096; // i.e., (Word16_MAX >> 3)+1.
|
||||
*any = *a;
|
||||
a[1] = *k >> 3;
|
||||
|
||||
for (m = 1; m < use_order; m++)
|
||||
{
|
||||
kptr++;
|
||||
aptr = a;
|
||||
aptr++;
|
||||
aptr2 = &a[m];
|
||||
anyptr = any;
|
||||
anyptr++;
|
||||
|
||||
any[m + 1] = *kptr >> 3;
|
||||
for (i = 0; i < m; i++)
|
||||
{
|
||||
*anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
|
||||
anyptr++;
|
||||
aptr++;
|
||||
aptr2--;
|
||||
}
|
||||
|
||||
aptr = a;
|
||||
anyptr = any;
|
||||
for (i = 0; i < (m + 2); i++)
|
||||
{
|
||||
*aptr = *anyptr;
|
||||
aptr++;
|
||||
anyptr++;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,505 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the resampling functions for 22 kHz.
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
|
||||
|
||||
// Declaration of internally used functions
|
||||
static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
|
||||
int32_t K);
|
||||
|
||||
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
|
||||
int32_t K);
|
||||
|
||||
// interpolation coefficients
|
||||
static const int16_t kCoefficients32To22[5][9] = {
|
||||
{127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154},
|
||||
{-39, 230, -830, 2785, 32366, -2324, 760, -218, 38},
|
||||
{117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137},
|
||||
{-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71},
|
||||
{ 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}
|
||||
};
|
||||
|
||||
//////////////////////
|
||||
// 22 kHz -> 16 kHz //
|
||||
//////////////////////
|
||||
|
||||
// number of subblocks; options: 1, 2, 4, 5, 10
|
||||
#define SUB_BLOCKS_22_16 5
|
||||
|
||||
// 22 -> 16 resampler
|
||||
void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
|
||||
{
|
||||
int k;
|
||||
|
||||
// process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
|
||||
for (k = 0; k < SUB_BLOCKS_22_16; k++)
|
||||
{
|
||||
///// 22 --> 44 /////
|
||||
// int16_t in[220/SUB_BLOCKS_22_16]
|
||||
// int32_t out[440/SUB_BLOCKS_22_16]
|
||||
/////
|
||||
WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
|
||||
|
||||
///// 44 --> 32 /////
|
||||
// int32_t in[440/SUB_BLOCKS_22_16]
|
||||
// int32_t out[320/SUB_BLOCKS_22_16]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
tmpmem[8] = state->S_44_32[0];
|
||||
tmpmem[9] = state->S_44_32[1];
|
||||
tmpmem[10] = state->S_44_32[2];
|
||||
tmpmem[11] = state->S_44_32[3];
|
||||
tmpmem[12] = state->S_44_32[4];
|
||||
tmpmem[13] = state->S_44_32[5];
|
||||
tmpmem[14] = state->S_44_32[6];
|
||||
tmpmem[15] = state->S_44_32[7];
|
||||
state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
|
||||
state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
|
||||
state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
|
||||
state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
|
||||
state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
|
||||
state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
|
||||
state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
|
||||
state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
|
||||
|
||||
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
|
||||
|
||||
///// 32 --> 16 /////
|
||||
// int32_t in[320/SUB_BLOCKS_22_16]
|
||||
// int32_t out[160/SUB_BLOCKS_22_16]
|
||||
/////
|
||||
WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
|
||||
|
||||
// move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
|
||||
in += 220 / SUB_BLOCKS_22_16;
|
||||
out += 160 / SUB_BLOCKS_22_16;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize state of 22 -> 16 resampler
|
||||
void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
|
||||
{
|
||||
int k;
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
state->S_22_44[k] = 0;
|
||||
state->S_44_32[k] = 0;
|
||||
state->S_32_16[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
// 16 kHz -> 22 kHz //
|
||||
//////////////////////
|
||||
|
||||
// number of subblocks; options: 1, 2, 4, 5, 10
|
||||
#define SUB_BLOCKS_16_22 4
|
||||
|
||||
// 16 -> 22 resampler
|
||||
void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
|
||||
{
|
||||
int k;
|
||||
|
||||
// process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
|
||||
for (k = 0; k < SUB_BLOCKS_16_22; k++)
|
||||
{
|
||||
///// 16 --> 32 /////
|
||||
// int16_t in[160/SUB_BLOCKS_16_22]
|
||||
// int32_t out[320/SUB_BLOCKS_16_22]
|
||||
/////
|
||||
WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
|
||||
|
||||
///// 32 --> 22 /////
|
||||
// int32_t in[320/SUB_BLOCKS_16_22]
|
||||
// int32_t out[220/SUB_BLOCKS_16_22]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
tmpmem[0] = state->S_32_22[0];
|
||||
tmpmem[1] = state->S_32_22[1];
|
||||
tmpmem[2] = state->S_32_22[2];
|
||||
tmpmem[3] = state->S_32_22[3];
|
||||
tmpmem[4] = state->S_32_22[4];
|
||||
tmpmem[5] = state->S_32_22[5];
|
||||
tmpmem[6] = state->S_32_22[6];
|
||||
tmpmem[7] = state->S_32_22[7];
|
||||
state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
|
||||
state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
|
||||
state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
|
||||
state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
|
||||
state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
|
||||
state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
|
||||
state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
|
||||
state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
|
||||
|
||||
WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
|
||||
|
||||
// move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
|
||||
in += 160 / SUB_BLOCKS_16_22;
|
||||
out += 220 / SUB_BLOCKS_16_22;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize state of 16 -> 22 resampler
|
||||
void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
|
||||
{
|
||||
int k;
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
state->S_16_32[k] = 0;
|
||||
state->S_32_22[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
// 22 kHz -> 8 kHz //
|
||||
//////////////////////
|
||||
|
||||
// number of subblocks; options: 1, 2, 5, 10
|
||||
#define SUB_BLOCKS_22_8 2
|
||||
|
||||
// 22 -> 8 resampler
|
||||
void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
|
||||
{
|
||||
int k;
|
||||
|
||||
// process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
|
||||
for (k = 0; k < SUB_BLOCKS_22_8; k++)
|
||||
{
|
||||
///// 22 --> 22 lowpass /////
|
||||
// int16_t in[220/SUB_BLOCKS_22_8]
|
||||
// int32_t out[220/SUB_BLOCKS_22_8]
|
||||
/////
|
||||
WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
|
||||
|
||||
///// 22 --> 16 /////
|
||||
// int32_t in[220/SUB_BLOCKS_22_8]
|
||||
// int32_t out[160/SUB_BLOCKS_22_8]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
tmpmem[8] = state->S_22_16[0];
|
||||
tmpmem[9] = state->S_22_16[1];
|
||||
tmpmem[10] = state->S_22_16[2];
|
||||
tmpmem[11] = state->S_22_16[3];
|
||||
tmpmem[12] = state->S_22_16[4];
|
||||
tmpmem[13] = state->S_22_16[5];
|
||||
tmpmem[14] = state->S_22_16[6];
|
||||
tmpmem[15] = state->S_22_16[7];
|
||||
state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
|
||||
state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
|
||||
state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
|
||||
state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
|
||||
state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
|
||||
state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
|
||||
state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
|
||||
state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
|
||||
|
||||
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
|
||||
|
||||
///// 16 --> 8 /////
|
||||
// int32_t in[160/SUB_BLOCKS_22_8]
|
||||
// int32_t out[80/SUB_BLOCKS_22_8]
|
||||
/////
|
||||
WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
|
||||
|
||||
// move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
|
||||
in += 220 / SUB_BLOCKS_22_8;
|
||||
out += 80 / SUB_BLOCKS_22_8;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize state of 22 -> 8 resampler
|
||||
void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
|
||||
{
|
||||
int k;
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
state->S_22_22[k] = 0;
|
||||
state->S_22_22[k + 8] = 0;
|
||||
state->S_22_16[k] = 0;
|
||||
state->S_16_8[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
// 8 kHz -> 22 kHz //
|
||||
//////////////////////
|
||||
|
||||
// number of subblocks; options: 1, 2, 5, 10
|
||||
#define SUB_BLOCKS_8_22 2
|
||||
|
||||
// 8 -> 22 resampler
|
||||
void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
|
||||
{
|
||||
int k;
|
||||
|
||||
// process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
|
||||
for (k = 0; k < SUB_BLOCKS_8_22; k++)
|
||||
{
|
||||
///// 8 --> 16 /////
|
||||
// int16_t in[80/SUB_BLOCKS_8_22]
|
||||
// int32_t out[160/SUB_BLOCKS_8_22]
|
||||
/////
|
||||
WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
|
||||
|
||||
///// 16 --> 11 /////
|
||||
// int32_t in[160/SUB_BLOCKS_8_22]
|
||||
// int32_t out[110/SUB_BLOCKS_8_22]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
tmpmem[10] = state->S_16_11[0];
|
||||
tmpmem[11] = state->S_16_11[1];
|
||||
tmpmem[12] = state->S_16_11[2];
|
||||
tmpmem[13] = state->S_16_11[3];
|
||||
tmpmem[14] = state->S_16_11[4];
|
||||
tmpmem[15] = state->S_16_11[5];
|
||||
tmpmem[16] = state->S_16_11[6];
|
||||
tmpmem[17] = state->S_16_11[7];
|
||||
state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
|
||||
state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
|
||||
state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
|
||||
state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
|
||||
state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
|
||||
state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
|
||||
state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
|
||||
state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
|
||||
|
||||
WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
|
||||
|
||||
///// 11 --> 22 /////
|
||||
// int32_t in[110/SUB_BLOCKS_8_22]
|
||||
// int16_t out[220/SUB_BLOCKS_8_22]
|
||||
/////
|
||||
WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
|
||||
|
||||
// move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
|
||||
in += 80 / SUB_BLOCKS_8_22;
|
||||
out += 220 / SUB_BLOCKS_8_22;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize state of 8 -> 22 resampler
|
||||
void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
|
||||
{
|
||||
int k;
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
state->S_8_16[k] = 0;
|
||||
state->S_16_11[k] = 0;
|
||||
state->S_11_22[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// compute two inner-products and store them to output array
|
||||
static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
|
||||
const int16_t* coef_ptr, int32_t* out1,
|
||||
int32_t* out2)
|
||||
{
|
||||
int32_t tmp1 = 16384;
|
||||
int32_t tmp2 = 16384;
|
||||
int16_t coef;
|
||||
|
||||
coef = coef_ptr[0];
|
||||
tmp1 += coef * in1[0];
|
||||
tmp2 += coef * in2[-0];
|
||||
|
||||
coef = coef_ptr[1];
|
||||
tmp1 += coef * in1[1];
|
||||
tmp2 += coef * in2[-1];
|
||||
|
||||
coef = coef_ptr[2];
|
||||
tmp1 += coef * in1[2];
|
||||
tmp2 += coef * in2[-2];
|
||||
|
||||
coef = coef_ptr[3];
|
||||
tmp1 += coef * in1[3];
|
||||
tmp2 += coef * in2[-3];
|
||||
|
||||
coef = coef_ptr[4];
|
||||
tmp1 += coef * in1[4];
|
||||
tmp2 += coef * in2[-4];
|
||||
|
||||
coef = coef_ptr[5];
|
||||
tmp1 += coef * in1[5];
|
||||
tmp2 += coef * in2[-5];
|
||||
|
||||
coef = coef_ptr[6];
|
||||
tmp1 += coef * in1[6];
|
||||
tmp2 += coef * in2[-6];
|
||||
|
||||
coef = coef_ptr[7];
|
||||
tmp1 += coef * in1[7];
|
||||
tmp2 += coef * in2[-7];
|
||||
|
||||
coef = coef_ptr[8];
|
||||
*out1 = tmp1 + coef * in1[8];
|
||||
*out2 = tmp2 + coef * in2[-8];
|
||||
}
|
||||
|
||||
// compute two inner-products and store them to output array
|
||||
static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
|
||||
const int16_t* coef_ptr, int16_t* out1,
|
||||
int16_t* out2)
|
||||
{
|
||||
int32_t tmp1 = 16384;
|
||||
int32_t tmp2 = 16384;
|
||||
int16_t coef;
|
||||
|
||||
coef = coef_ptr[0];
|
||||
tmp1 += coef * in1[0];
|
||||
tmp2 += coef * in2[-0];
|
||||
|
||||
coef = coef_ptr[1];
|
||||
tmp1 += coef * in1[1];
|
||||
tmp2 += coef * in2[-1];
|
||||
|
||||
coef = coef_ptr[2];
|
||||
tmp1 += coef * in1[2];
|
||||
tmp2 += coef * in2[-2];
|
||||
|
||||
coef = coef_ptr[3];
|
||||
tmp1 += coef * in1[3];
|
||||
tmp2 += coef * in2[-3];
|
||||
|
||||
coef = coef_ptr[4];
|
||||
tmp1 += coef * in1[4];
|
||||
tmp2 += coef * in2[-4];
|
||||
|
||||
coef = coef_ptr[5];
|
||||
tmp1 += coef * in1[5];
|
||||
tmp2 += coef * in2[-5];
|
||||
|
||||
coef = coef_ptr[6];
|
||||
tmp1 += coef * in1[6];
|
||||
tmp2 += coef * in2[-6];
|
||||
|
||||
coef = coef_ptr[7];
|
||||
tmp1 += coef * in1[7];
|
||||
tmp2 += coef * in2[-7];
|
||||
|
||||
coef = coef_ptr[8];
|
||||
tmp1 += coef * in1[8];
|
||||
tmp2 += coef * in2[-8];
|
||||
|
||||
// scale down, round and saturate
|
||||
tmp1 >>= 15;
|
||||
if (tmp1 > (int32_t)0x00007FFF)
|
||||
tmp1 = 0x00007FFF;
|
||||
if (tmp1 < (int32_t)0xFFFF8000)
|
||||
tmp1 = 0xFFFF8000;
|
||||
tmp2 >>= 15;
|
||||
if (tmp2 > (int32_t)0x00007FFF)
|
||||
tmp2 = 0x00007FFF;
|
||||
if (tmp2 < (int32_t)0xFFFF8000)
|
||||
tmp2 = 0xFFFF8000;
|
||||
*out1 = (int16_t)tmp1;
|
||||
*out2 = (int16_t)tmp2;
|
||||
}
|
||||
|
||||
// Resampling ratio: 11/16
|
||||
// input: int32_t (normalized, not saturated) :: size 16 * K
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
|
||||
// K: Number of blocks
|
||||
|
||||
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
|
||||
int32_t* Out,
|
||||
int32_t K)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Filter operation:
|
||||
//
|
||||
// Perform resampling (16 input samples -> 11 output samples);
|
||||
// process in sub blocks of size 16 samples.
|
||||
int32_t m;
|
||||
|
||||
for (m = 0; m < K; m++)
|
||||
{
|
||||
// first output sample
|
||||
Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
|
||||
|
||||
// update pointers
|
||||
In += 16;
|
||||
Out += 11;
|
||||
}
|
||||
}
|
||||
|
||||
// Resampling ratio: 11/16
|
||||
// input: int32_t (normalized, not saturated) :: size 16 * K
|
||||
// output: int16_t (saturated) :: size 11 * K
|
||||
// K: Number of blocks
|
||||
|
||||
void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
|
||||
int16_t *Out,
|
||||
int32_t K)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Filter operation:
|
||||
//
|
||||
// Perform resampling (16 input samples -> 11 output samples);
|
||||
// process in sub blocks of size 16 samples.
|
||||
int32_t tmp;
|
||||
int32_t m;
|
||||
|
||||
for (m = 0; m < K; m++)
|
||||
{
|
||||
// first output sample
|
||||
tmp = In[3];
|
||||
if (tmp > (int32_t)0x00007FFF)
|
||||
tmp = 0x00007FFF;
|
||||
if (tmp < (int32_t)0xFFFF8000)
|
||||
tmp = 0xFFFF8000;
|
||||
Out[0] = (int16_t)tmp;
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
|
||||
|
||||
// update pointers
|
||||
In += 16;
|
||||
Out += 11;
|
||||
}
|
||||
}
|
186
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
vendored
Normal file
186
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_48khz.c
vendored
Normal file
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains resampling functions between 48 kHz and nb/wb.
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
|
||||
|
||||
////////////////////////////
|
||||
///// 48 kHz -> 16 kHz /////
|
||||
////////////////////////////
|
||||
|
||||
// 48 -> 16 resampler
|
||||
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
|
||||
{
|
||||
///// 48 --> 48(LP) /////
|
||||
// int16_t in[480]
|
||||
// int32_t out[480]
|
||||
/////
|
||||
WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
|
||||
|
||||
///// 48 --> 32 /////
|
||||
// int32_t in[480]
|
||||
// int32_t out[320]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
|
||||
memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
|
||||
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
|
||||
|
||||
///// 32 --> 16 /////
|
||||
// int32_t in[320]
|
||||
// int16_t out[160]
|
||||
/////
|
||||
WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
|
||||
}
|
||||
|
||||
// initialize state of 48 -> 16 resampler
|
||||
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
|
||||
{
|
||||
memset(state->S_48_48, 0, 16 * sizeof(int32_t));
|
||||
memset(state->S_48_32, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_32_16, 0, 8 * sizeof(int32_t));
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
///// 16 kHz -> 48 kHz /////
|
||||
////////////////////////////
|
||||
|
||||
// 16 -> 48 resampler
|
||||
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
|
||||
{
|
||||
///// 16 --> 32 /////
|
||||
// int16_t in[160]
|
||||
// int32_t out[320]
|
||||
/////
|
||||
WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
|
||||
|
||||
///// 32 --> 24 /////
|
||||
// int32_t in[320]
|
||||
// int32_t out[240]
|
||||
// copy state to and from input array
|
||||
/////
|
||||
memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
|
||||
memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
|
||||
WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
|
||||
|
||||
///// 24 --> 48 /////
|
||||
// int32_t in[240]
|
||||
// int16_t out[480]
|
||||
/////
|
||||
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
|
||||
}
|
||||
|
||||
// initialize state of 16 -> 48 resampler
|
||||
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
|
||||
{
|
||||
memset(state->S_16_32, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_32_24, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
///// 48 kHz -> 8 kHz /////
|
||||
////////////////////////////
|
||||
|
||||
// 48 -> 8 resampler
|
||||
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
|
||||
{
|
||||
///// 48 --> 24 /////
|
||||
// int16_t in[480]
|
||||
// int32_t out[240]
|
||||
/////
|
||||
WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
|
||||
|
||||
///// 24 --> 24(LP) /////
|
||||
// int32_t in[240]
|
||||
// int32_t out[240]
|
||||
/////
|
||||
WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
|
||||
|
||||
///// 24 --> 16 /////
|
||||
// int32_t in[240]
|
||||
// int32_t out[160]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
|
||||
memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
|
||||
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
|
||||
|
||||
///// 16 --> 8 /////
|
||||
// int32_t in[160]
|
||||
// int16_t out[80]
|
||||
/////
|
||||
WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
|
||||
}
|
||||
|
||||
// initialize state of 48 -> 8 resampler
|
||||
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
|
||||
{
|
||||
memset(state->S_48_24, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_24_24, 0, 16 * sizeof(int32_t));
|
||||
memset(state->S_24_16, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_16_8, 0, 8 * sizeof(int32_t));
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
///// 8 kHz -> 48 kHz /////
|
||||
////////////////////////////
|
||||
|
||||
// 8 -> 48 resampler
|
||||
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
|
||||
WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
|
||||
{
|
||||
///// 8 --> 16 /////
|
||||
// int16_t in[80]
|
||||
// int32_t out[160]
|
||||
/////
|
||||
WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
|
||||
|
||||
///// 16 --> 12 /////
|
||||
// int32_t in[160]
|
||||
// int32_t out[120]
|
||||
/////
|
||||
// copy state to and from input array
|
||||
memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
|
||||
memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
|
||||
WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
|
||||
|
||||
///// 12 --> 24 /////
|
||||
// int32_t in[120]
|
||||
// int16_t out[240]
|
||||
/////
|
||||
WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
|
||||
|
||||
///// 24 --> 48 /////
|
||||
// int32_t in[240]
|
||||
// int16_t out[480]
|
||||
/////
|
||||
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
|
||||
}
|
||||
|
||||
// initialize state of 8 -> 48 resampler
|
||||
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
|
||||
{
|
||||
memset(state->S_8_16, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_16_12, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_12_24, 0, 8 * sizeof(int32_t));
|
||||
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
|
||||
}
|
183
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
vendored
Normal file
183
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2.c
vendored
Normal file
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the resampling by two functions.
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
|
||||
static const uint32_t kResampleAllpass2[3] =
|
||||
{12199, 37471 << 15, 60255 << 15};
|
||||
|
||||
// Multiply two 32-bit values and accumulate to another input value.
|
||||
// Return: state + ((diff * tbl_value) >> 16)
|
||||
|
||||
static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
|
||||
int32_t diff,
|
||||
int32_t state) {
|
||||
int32_t result;
|
||||
__asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
|
||||
"r"(tbl_value), "r"(state));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Multiply two 32-bit values and accumulate to another input value.
|
||||
// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
|
||||
//
|
||||
// The reason to introduce this function is that, in case we can't use smlawb
|
||||
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
|
||||
// smmla to save some cycles.
|
||||
|
||||
static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
|
||||
int32_t diff,
|
||||
int32_t state) {
|
||||
int32_t result;
|
||||
__asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
|
||||
"r"(tbl_value), "r"(state));
|
||||
return result;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
|
||||
static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
|
||||
|
||||
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
|
||||
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
|
||||
#endif // WEBRTC_ARCH_ARM_V7
|
||||
|
||||
|
||||
// decimator
|
||||
#if !defined(MIPS32_LE)
|
||||
void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
|
||||
int16_t* out, int32_t* filtState) {
|
||||
int32_t tmp1, tmp2, diff, in32, out32;
|
||||
size_t i;
|
||||
|
||||
register int32_t state0 = filtState[0];
|
||||
register int32_t state1 = filtState[1];
|
||||
register int32_t state2 = filtState[2];
|
||||
register int32_t state3 = filtState[3];
|
||||
register int32_t state4 = filtState[4];
|
||||
register int32_t state5 = filtState[5];
|
||||
register int32_t state6 = filtState[6];
|
||||
register int32_t state7 = filtState[7];
|
||||
|
||||
for (i = (len >> 1); i > 0; i--) {
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
}
|
||||
#endif // #if defined(MIPS32_LE)
|
||||
|
||||
|
||||
void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
|
||||
int16_t* out, int32_t* filtState) {
|
||||
int32_t tmp1, tmp2, diff, in32, out32;
|
||||
size_t i;
|
||||
|
||||
register int32_t state0 = filtState[0];
|
||||
register int32_t state1 = filtState[1];
|
||||
register int32_t state2 = filtState[2];
|
||||
register int32_t state3 = filtState[3];
|
||||
register int32_t state4 = filtState[4];
|
||||
register int32_t state5 = filtState[5];
|
||||
register int32_t state6 = filtState[6];
|
||||
register int32_t state7 = filtState[7];
|
||||
|
||||
for (i = len; i > 0; i--) {
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state3 + 512) >> 10;
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
|
||||
// upper allpass filter
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// round; limit amplitude to prevent wrap-around; write to output array
|
||||
out32 = (state7 + 512) >> 10;
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
|
||||
filtState[0] = state0;
|
||||
filtState[1] = state1;
|
||||
filtState[2] = state2;
|
||||
filtState[3] = state3;
|
||||
filtState[4] = state4;
|
||||
filtState[5] = state5;
|
||||
filtState[6] = state6;
|
||||
filtState[7] = state7;
|
||||
}
|
679
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
vendored
Normal file
679
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.c
vendored
Normal file
|
@ -0,0 +1,679 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This header file contains some internal resampling functions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const int16_t kResampleAllpass[2][3] = {
|
||||
{821, 6110, 12382},
|
||||
{3050, 9368, 15063}
|
||||
};
|
||||
|
||||
//
|
||||
// decimator
|
||||
// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
|
||||
// output: int16_t (saturated) (of length len/2)
|
||||
// state: filter state array; length = 8
|
||||
|
||||
void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
|
||||
int32_t *state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
len >>= 1;
|
||||
|
||||
// lower allpass filter (operates on even input samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i << 1];
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// divide by two and store temporarily
|
||||
in[i << 1] = (state[3] >> 1);
|
||||
}
|
||||
|
||||
in++;
|
||||
|
||||
// upper allpass filter (operates on odd input samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i << 1];
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// divide by two and store temporarily
|
||||
in[i << 1] = (state[7] >> 1);
|
||||
}
|
||||
|
||||
in--;
|
||||
|
||||
// combine allpass outputs
|
||||
for (i = 0; i < len; i += 2)
|
||||
{
|
||||
// divide by two, add both allpass outputs and round
|
||||
tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
|
||||
tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
|
||||
if (tmp0 > (int32_t)0x00007FFF)
|
||||
tmp0 = 0x00007FFF;
|
||||
if (tmp0 < (int32_t)0xFFFF8000)
|
||||
tmp0 = 0xFFFF8000;
|
||||
out[i] = (int16_t)tmp0;
|
||||
if (tmp1 > (int32_t)0x00007FFF)
|
||||
tmp1 = 0x00007FFF;
|
||||
if (tmp1 < (int32_t)0xFFFF8000)
|
||||
tmp1 = 0xFFFF8000;
|
||||
out[i + 1] = (int16_t)tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// decimator
|
||||
// input: int16_t
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
|
||||
// state: filter state array; length = 8
|
||||
|
||||
void WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
|
||||
int32_t len,
|
||||
int32_t *out,
|
||||
int32_t *state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
len >>= 1;
|
||||
|
||||
// lower allpass filter (operates on even input samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// divide by two and store temporarily
|
||||
out[i] = (state[3] >> 1);
|
||||
}
|
||||
|
||||
in++;
|
||||
|
||||
// upper allpass filter (operates on odd input samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// divide by two and store temporarily
|
||||
out[i] += (state[7] >> 1);
|
||||
}
|
||||
|
||||
in--;
|
||||
}
|
||||
|
||||
//
|
||||
// interpolator
|
||||
// input: int16_t
|
||||
// output: int32_t (normalized, not saturated) (of length len*2)
|
||||
// state: filter state array; length = 8
|
||||
void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
|
||||
int32_t *state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
// upper allpass filter (generates odd output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[7] >> 15;
|
||||
}
|
||||
|
||||
out++;
|
||||
|
||||
// lower allpass filter (generates even output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[3] >> 15;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// interpolator
|
||||
// input: int32_t (shifted 15 positions to the left, + offset 16384)
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
|
||||
// state: filter state array; length = 8
|
||||
void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
|
||||
int32_t *state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
// upper allpass filter (generates odd output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i];
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[7];
|
||||
}
|
||||
|
||||
out++;
|
||||
|
||||
// lower allpass filter (generates even output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i];
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[3];
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// interpolator
|
||||
// input: int32_t (shifted 15 positions to the left, + offset 16384)
|
||||
// output: int16_t (saturated) (of length len*2)
|
||||
// state: filter state array; length = 8
|
||||
void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
|
||||
int32_t *state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
// upper allpass filter (generates odd output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i];
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// scale down, saturate and store
|
||||
tmp1 = state[7] >> 15;
|
||||
if (tmp1 > (int32_t)0x00007FFF)
|
||||
tmp1 = 0x00007FFF;
|
||||
if (tmp1 < (int32_t)0xFFFF8000)
|
||||
tmp1 = 0xFFFF8000;
|
||||
out[i << 1] = (int16_t)tmp1;
|
||||
}
|
||||
|
||||
out++;
|
||||
|
||||
// lower allpass filter (generates even output samples)
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i];
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// scale down, saturate and store
|
||||
tmp1 = state[3] >> 15;
|
||||
if (tmp1 > (int32_t)0x00007FFF)
|
||||
tmp1 = 0x00007FFF;
|
||||
if (tmp1 < (int32_t)0xFFFF8000)
|
||||
tmp1 = 0xFFFF8000;
|
||||
out[i << 1] = (int16_t)tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
// lowpass filter
|
||||
// input: int16_t
|
||||
// output: int32_t (normalized, not saturated)
|
||||
// state: filter state array; length = 8
|
||||
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
|
||||
int32_t* state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
len >>= 1;
|
||||
|
||||
// lower allpass filter: odd input -> even output samples
|
||||
in++;
|
||||
// initial state of polyphase delay element
|
||||
tmp0 = state[12];
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[3] >> 1;
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
}
|
||||
in--;
|
||||
|
||||
// upper allpass filter: even input -> even output samples
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// average the two allpass outputs, scale down and store
|
||||
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
|
||||
}
|
||||
|
||||
// switch to odd output samples
|
||||
out++;
|
||||
|
||||
// lower allpass filter: even input -> odd output samples
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[9];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[8] + diff * kResampleAllpass[1][0];
|
||||
state[8] = tmp0;
|
||||
diff = tmp1 - state[10];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[9] + diff * kResampleAllpass[1][1];
|
||||
state[9] = tmp1;
|
||||
diff = tmp0 - state[11];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[11] = state[10] + diff * kResampleAllpass[1][2];
|
||||
state[10] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[11] >> 1;
|
||||
}
|
||||
|
||||
// upper allpass filter: odd input -> odd output samples
|
||||
in++;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
|
||||
diff = tmp0 - state[13];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[12] + diff * kResampleAllpass[0][0];
|
||||
state[12] = tmp0;
|
||||
diff = tmp1 - state[14];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[13] + diff * kResampleAllpass[0][1];
|
||||
state[13] = tmp1;
|
||||
diff = tmp0 - state[15];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[15] = state[14] + diff * kResampleAllpass[0][2];
|
||||
state[14] = tmp0;
|
||||
|
||||
// average the two allpass outputs, scale down and store
|
||||
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
|
||||
}
|
||||
}
|
||||
|
||||
// lowpass filter
|
||||
// input: int32_t (shifted 15 positions to the left, + offset 16384)
|
||||
// output: int32_t (normalized, not saturated)
|
||||
// state: filter state array; length = 8
|
||||
void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
|
||||
int32_t* state)
|
||||
{
|
||||
int32_t tmp0, tmp1, diff;
|
||||
int32_t i;
|
||||
|
||||
len >>= 1;
|
||||
|
||||
// lower allpass filter: odd input -> even output samples
|
||||
in++;
|
||||
// initial state of polyphase delay element
|
||||
tmp0 = state[12];
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
diff = tmp0 - state[1];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[0] + diff * kResampleAllpass[1][0];
|
||||
state[0] = tmp0;
|
||||
diff = tmp1 - state[2];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[1] + diff * kResampleAllpass[1][1];
|
||||
state[1] = tmp1;
|
||||
diff = tmp0 - state[3];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[3] = state[2] + diff * kResampleAllpass[1][2];
|
||||
state[2] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[3] >> 1;
|
||||
tmp0 = in[i << 1];
|
||||
}
|
||||
in--;
|
||||
|
||||
// upper allpass filter: even input -> even output samples
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i << 1];
|
||||
diff = tmp0 - state[5];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[4] + diff * kResampleAllpass[0][0];
|
||||
state[4] = tmp0;
|
||||
diff = tmp1 - state[6];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[5] + diff * kResampleAllpass[0][1];
|
||||
state[5] = tmp1;
|
||||
diff = tmp0 - state[7];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[7] = state[6] + diff * kResampleAllpass[0][2];
|
||||
state[6] = tmp0;
|
||||
|
||||
// average the two allpass outputs, scale down and store
|
||||
out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
|
||||
}
|
||||
|
||||
// switch to odd output samples
|
||||
out++;
|
||||
|
||||
// lower allpass filter: even input -> odd output samples
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i << 1];
|
||||
diff = tmp0 - state[9];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[8] + diff * kResampleAllpass[1][0];
|
||||
state[8] = tmp0;
|
||||
diff = tmp1 - state[10];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[9] + diff * kResampleAllpass[1][1];
|
||||
state[9] = tmp1;
|
||||
diff = tmp0 - state[11];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[11] = state[10] + diff * kResampleAllpass[1][2];
|
||||
state[10] = tmp0;
|
||||
|
||||
// scale down, round and store
|
||||
out[i << 1] = state[11] >> 1;
|
||||
}
|
||||
|
||||
// upper allpass filter: odd input -> odd output samples
|
||||
in++;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp0 = in[i << 1];
|
||||
diff = tmp0 - state[13];
|
||||
// scale down and round
|
||||
diff = (diff + (1 << 13)) >> 14;
|
||||
tmp1 = state[12] + diff * kResampleAllpass[0][0];
|
||||
state[12] = tmp0;
|
||||
diff = tmp1 - state[14];
|
||||
// scale down and round
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
tmp0 = state[13] + diff * kResampleAllpass[0][1];
|
||||
state[13] = tmp1;
|
||||
diff = tmp0 - state[15];
|
||||
// scale down and truncate
|
||||
diff = diff >> 14;
|
||||
if (diff < 0)
|
||||
diff += 1;
|
||||
state[15] = state[14] + diff * kResampleAllpass[0][2];
|
||||
state[14] = tmp0;
|
||||
|
||||
// average the two allpass outputs, scale down and store
|
||||
out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
|
||||
}
|
||||
}
|
47
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
vendored
Normal file
47
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_internal.h
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This header file contains some internal resampling functions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
|
||||
#define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
/*******************************************************************
|
||||
* resample_by_2_fast.c
|
||||
* Functions for internal use in the other resample functions
|
||||
******************************************************************/
|
||||
void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
|
||||
int32_t *state);
|
||||
|
||||
void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len,
|
||||
int32_t *out, int32_t *state);
|
||||
|
||||
void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len,
|
||||
int32_t *out, int32_t *state);
|
||||
|
||||
void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
|
||||
int32_t *state);
|
||||
|
||||
void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len,
|
||||
int16_t *out, int32_t *state);
|
||||
|
||||
void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len,
|
||||
int32_t* out, int32_t* state);
|
||||
|
||||
void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
|
||||
int32_t* state);
|
||||
|
||||
#endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_
|
290
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
vendored
Normal file
290
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_by_2_mips.c
vendored
Normal file
|
@ -0,0 +1,290 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the resampling by two functions.
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// allpass filter coefficients.
|
||||
static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528};
|
||||
static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
|
||||
|
||||
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
|
||||
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
|
||||
|
||||
// decimator
|
||||
void WebRtcSpl_DownsampleBy2(const int16_t* in,
|
||||
size_t len,
|
||||
int16_t* out,
|
||||
int32_t* filtState) {
|
||||
int32_t out32;
|
||||
size_t i, len1;
|
||||
|
||||
register int32_t state0 = filtState[0];
|
||||
register int32_t state1 = filtState[1];
|
||||
register int32_t state2 = filtState[2];
|
||||
register int32_t state3 = filtState[3];
|
||||
register int32_t state4 = filtState[4];
|
||||
register int32_t state5 = filtState[5];
|
||||
register int32_t state6 = filtState[6];
|
||||
register int32_t state7 = filtState[7];
|
||||
|
||||
#if defined(MIPS_DSP_R2_LE)
|
||||
int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
|
||||
|
||||
k1Res0= 3284;
|
||||
k1Res1= 24441;
|
||||
k1Res2= 49528;
|
||||
k2Res0= 12199;
|
||||
k2Res1= 37471;
|
||||
k2Res2= 60255;
|
||||
len1 = (len >> 1);
|
||||
|
||||
const int32_t* inw = (int32_t*)in;
|
||||
int32_t tmp11, tmp12, tmp21, tmp22;
|
||||
int32_t in322, in321;
|
||||
int32_t diff1, diff2;
|
||||
for (i = len1; i > 0; i--) {
|
||||
__asm__ volatile (
|
||||
"lh %[in321], 0(%[inw]) \n\t"
|
||||
"lh %[in322], 2(%[inw]) \n\t"
|
||||
|
||||
"sll %[in321], %[in321], 10 \n\t"
|
||||
"sll %[in322], %[in322], 10 \n\t"
|
||||
|
||||
"addiu %[inw], %[inw], 4 \n\t"
|
||||
|
||||
"subu %[diff1], %[in321], %[state1] \n\t"
|
||||
"subu %[diff2], %[in322], %[state5] \n\t"
|
||||
|
||||
: [in322] "=&r" (in322), [in321] "=&r" (in321),
|
||||
[diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
|
||||
: [state1] "r" (state1), [state5] "r" (state5)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
__asm__ volatile (
|
||||
"mult $ac0, %[diff1], %[k2Res0] \n\t"
|
||||
"mult $ac1, %[diff2], %[k1Res0] \n\t"
|
||||
|
||||
"extr.w %[tmp11], $ac0, 16 \n\t"
|
||||
"extr.w %[tmp12], $ac1, 16 \n\t"
|
||||
|
||||
"addu %[tmp11], %[state0], %[tmp11] \n\t"
|
||||
"addu %[tmp12], %[state4], %[tmp12] \n\t"
|
||||
|
||||
"addiu %[state0], %[in321], 0 \n\t"
|
||||
"addiu %[state4], %[in322], 0 \n\t"
|
||||
|
||||
"subu %[diff1], %[tmp11], %[state2] \n\t"
|
||||
"subu %[diff2], %[tmp12], %[state6] \n\t"
|
||||
|
||||
"mult $ac0, %[diff1], %[k2Res1] \n\t"
|
||||
"mult $ac1, %[diff2], %[k1Res1] \n\t"
|
||||
|
||||
"extr.w %[tmp21], $ac0, 16 \n\t"
|
||||
"extr.w %[tmp22], $ac1, 16 \n\t"
|
||||
|
||||
"addu %[tmp21], %[state1], %[tmp21] \n\t"
|
||||
"addu %[tmp22], %[state5], %[tmp22] \n\t"
|
||||
|
||||
"addiu %[state1], %[tmp11], 0 \n\t"
|
||||
"addiu %[state5], %[tmp12], 0 \n\t"
|
||||
: [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
|
||||
[tmp11] "=&r" (tmp11), [state0] "+r" (state0),
|
||||
[state1] "+r" (state1),
|
||||
[state2] "+r" (state2),
|
||||
[state4] "+r" (state4), [tmp12] "=&r" (tmp12),
|
||||
[state6] "+r" (state6), [state5] "+r" (state5)
|
||||
: [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
|
||||
[diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
|
||||
[in321] "r" (in321), [k1Res0] "r" (k1Res0)
|
||||
: "hi", "lo", "$ac1hi", "$ac1lo"
|
||||
);
|
||||
|
||||
// upper allpass filter
|
||||
__asm__ volatile (
|
||||
"subu %[diff1], %[tmp21], %[state3] \n\t"
|
||||
"subu %[diff2], %[tmp22], %[state7] \n\t"
|
||||
|
||||
"mult $ac0, %[diff1], %[k2Res2] \n\t"
|
||||
"mult $ac1, %[diff2], %[k1Res2] \n\t"
|
||||
"extr.w %[state3], $ac0, 16 \n\t"
|
||||
"extr.w %[state7], $ac1, 16 \n\t"
|
||||
"addu %[state3], %[state2], %[state3] \n\t"
|
||||
"addu %[state7], %[state6], %[state7] \n\t"
|
||||
|
||||
"addiu %[state2], %[tmp21], 0 \n\t"
|
||||
"addiu %[state6], %[tmp22], 0 \n\t"
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
"addu %[out32], %[state3], %[state7] \n\t"
|
||||
"addiu %[out32], %[out32], 1024 \n\t"
|
||||
"sra %[out32], %[out32], 11 \n\t"
|
||||
: [state3] "+r" (state3), [state6] "+r" (state6),
|
||||
[state2] "+r" (state2), [diff2] "=&r" (diff2),
|
||||
[out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
|
||||
: [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
|
||||
[k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
|
||||
: "hi", "lo", "$ac1hi", "$ac1lo"
|
||||
);
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
#else // #if defined(MIPS_DSP_R2_LE)
|
||||
int32_t tmp1, tmp2, diff;
|
||||
int32_t in32;
|
||||
len1 = (len >> 1)/4;
|
||||
for (i = len1; i > 0; i--) {
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
// lower allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state1;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
|
||||
state0 = in32;
|
||||
diff = tmp1 - state2;
|
||||
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
|
||||
state1 = tmp1;
|
||||
diff = tmp2 - state3;
|
||||
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
|
||||
state2 = tmp2;
|
||||
|
||||
// upper allpass filter
|
||||
in32 = (int32_t)(*in++) << 10;
|
||||
diff = in32 - state5;
|
||||
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
|
||||
state4 = in32;
|
||||
diff = tmp1 - state6;
|
||||
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
|
||||
state5 = tmp1;
|
||||
diff = tmp2 - state7;
|
||||
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
|
||||
state6 = tmp2;
|
||||
|
||||
// add two allpass outputs, divide by two and round
|
||||
out32 = (state3 + state7 + 1024) >> 11;
|
||||
|
||||
// limit amplitude to prevent wrap-around, and write to output array
|
||||
*out++ = WebRtcSpl_SatW32ToW16(out32);
|
||||
}
|
||||
#endif // #if defined(MIPS_DSP_R2_LE)
|
||||
__asm__ volatile (
|
||||
"sw %[state0], 0(%[filtState]) \n\t"
|
||||
"sw %[state1], 4(%[filtState]) \n\t"
|
||||
"sw %[state2], 8(%[filtState]) \n\t"
|
||||
"sw %[state3], 12(%[filtState]) \n\t"
|
||||
"sw %[state4], 16(%[filtState]) \n\t"
|
||||
"sw %[state5], 20(%[filtState]) \n\t"
|
||||
"sw %[state6], 24(%[filtState]) \n\t"
|
||||
"sw %[state7], 28(%[filtState]) \n\t"
|
||||
:
|
||||
: [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
|
||||
[state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
|
||||
[state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#endif // #if defined(MIPS32_LE)
|
239
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
vendored
Normal file
239
third_party/webrtc/src/webrtc/common_audio/signal_processing/resample_fractional.c
vendored
Normal file
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the resampling functions between 48, 44, 32 and 24 kHz.
|
||||
* The description headers can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// interpolation coefficients
|
||||
static const int16_t kCoefficients48To32[2][8] = {
|
||||
{778, -2050, 1087, 23285, 12903, -3783, 441, 222},
|
||||
{222, 441, -3783, 12903, 23285, 1087, -2050, 778}
|
||||
};
|
||||
|
||||
static const int16_t kCoefficients32To24[3][8] = {
|
||||
{767, -2362, 2434, 24406, 10620, -3838, 721, 90},
|
||||
{386, -381, -2646, 19062, 19062, -2646, -381, 386},
|
||||
{90, 721, -3838, 10620, 24406, 2434, -2362, 767}
|
||||
};
|
||||
|
||||
static const int16_t kCoefficients44To32[4][9] = {
|
||||
{117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
|
||||
{-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
|
||||
{50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
|
||||
{-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
|
||||
};
|
||||
|
||||
// Resampling ratio: 2/3
|
||||
// input: int32_t (normalized, not saturated) :: size 3 * K
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
|
||||
// K: number of blocks
|
||||
|
||||
void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Filter operation:
|
||||
//
|
||||
// Perform resampling (3 input samples -> 2 output samples);
|
||||
// process in sub blocks of size 3 samples.
|
||||
int32_t tmp;
|
||||
size_t m;
|
||||
|
||||
for (m = 0; m < K; m++)
|
||||
{
|
||||
tmp = 1 << 14;
|
||||
tmp += kCoefficients48To32[0][0] * In[0];
|
||||
tmp += kCoefficients48To32[0][1] * In[1];
|
||||
tmp += kCoefficients48To32[0][2] * In[2];
|
||||
tmp += kCoefficients48To32[0][3] * In[3];
|
||||
tmp += kCoefficients48To32[0][4] * In[4];
|
||||
tmp += kCoefficients48To32[0][5] * In[5];
|
||||
tmp += kCoefficients48To32[0][6] * In[6];
|
||||
tmp += kCoefficients48To32[0][7] * In[7];
|
||||
Out[0] = tmp;
|
||||
|
||||
tmp = 1 << 14;
|
||||
tmp += kCoefficients48To32[1][0] * In[1];
|
||||
tmp += kCoefficients48To32[1][1] * In[2];
|
||||
tmp += kCoefficients48To32[1][2] * In[3];
|
||||
tmp += kCoefficients48To32[1][3] * In[4];
|
||||
tmp += kCoefficients48To32[1][4] * In[5];
|
||||
tmp += kCoefficients48To32[1][5] * In[6];
|
||||
tmp += kCoefficients48To32[1][6] * In[7];
|
||||
tmp += kCoefficients48To32[1][7] * In[8];
|
||||
Out[1] = tmp;
|
||||
|
||||
// update pointers
|
||||
In += 3;
|
||||
Out += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Resampling ratio: 3/4
|
||||
// input: int32_t (normalized, not saturated) :: size 4 * K
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
|
||||
// K: number of blocks
|
||||
|
||||
void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Filter operation:
|
||||
//
|
||||
// Perform resampling (4 input samples -> 3 output samples);
|
||||
// process in sub blocks of size 4 samples.
|
||||
size_t m;
|
||||
int32_t tmp;
|
||||
|
||||
for (m = 0; m < K; m++)
|
||||
{
|
||||
tmp = 1 << 14;
|
||||
tmp += kCoefficients32To24[0][0] * In[0];
|
||||
tmp += kCoefficients32To24[0][1] * In[1];
|
||||
tmp += kCoefficients32To24[0][2] * In[2];
|
||||
tmp += kCoefficients32To24[0][3] * In[3];
|
||||
tmp += kCoefficients32To24[0][4] * In[4];
|
||||
tmp += kCoefficients32To24[0][5] * In[5];
|
||||
tmp += kCoefficients32To24[0][6] * In[6];
|
||||
tmp += kCoefficients32To24[0][7] * In[7];
|
||||
Out[0] = tmp;
|
||||
|
||||
tmp = 1 << 14;
|
||||
tmp += kCoefficients32To24[1][0] * In[1];
|
||||
tmp += kCoefficients32To24[1][1] * In[2];
|
||||
tmp += kCoefficients32To24[1][2] * In[3];
|
||||
tmp += kCoefficients32To24[1][3] * In[4];
|
||||
tmp += kCoefficients32To24[1][4] * In[5];
|
||||
tmp += kCoefficients32To24[1][5] * In[6];
|
||||
tmp += kCoefficients32To24[1][6] * In[7];
|
||||
tmp += kCoefficients32To24[1][7] * In[8];
|
||||
Out[1] = tmp;
|
||||
|
||||
tmp = 1 << 14;
|
||||
tmp += kCoefficients32To24[2][0] * In[2];
|
||||
tmp += kCoefficients32To24[2][1] * In[3];
|
||||
tmp += kCoefficients32To24[2][2] * In[4];
|
||||
tmp += kCoefficients32To24[2][3] * In[5];
|
||||
tmp += kCoefficients32To24[2][4] * In[6];
|
||||
tmp += kCoefficients32To24[2][5] * In[7];
|
||||
tmp += kCoefficients32To24[2][6] * In[8];
|
||||
tmp += kCoefficients32To24[2][7] * In[9];
|
||||
Out[2] = tmp;
|
||||
|
||||
// update pointers
|
||||
In += 4;
|
||||
Out += 3;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// fractional resampling filters
|
||||
// Fout = 11/16 * Fin
|
||||
// Fout = 8/11 * Fin
|
||||
//
|
||||
|
||||
// compute two inner-products and store them to output array
|
||||
static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
|
||||
const int16_t *coef_ptr, int32_t *out1,
|
||||
int32_t *out2)
|
||||
{
|
||||
int32_t tmp1 = 16384;
|
||||
int32_t tmp2 = 16384;
|
||||
int16_t coef;
|
||||
|
||||
coef = coef_ptr[0];
|
||||
tmp1 += coef * in1[0];
|
||||
tmp2 += coef * in2[-0];
|
||||
|
||||
coef = coef_ptr[1];
|
||||
tmp1 += coef * in1[1];
|
||||
tmp2 += coef * in2[-1];
|
||||
|
||||
coef = coef_ptr[2];
|
||||
tmp1 += coef * in1[2];
|
||||
tmp2 += coef * in2[-2];
|
||||
|
||||
coef = coef_ptr[3];
|
||||
tmp1 += coef * in1[3];
|
||||
tmp2 += coef * in2[-3];
|
||||
|
||||
coef = coef_ptr[4];
|
||||
tmp1 += coef * in1[4];
|
||||
tmp2 += coef * in2[-4];
|
||||
|
||||
coef = coef_ptr[5];
|
||||
tmp1 += coef * in1[5];
|
||||
tmp2 += coef * in2[-5];
|
||||
|
||||
coef = coef_ptr[6];
|
||||
tmp1 += coef * in1[6];
|
||||
tmp2 += coef * in2[-6];
|
||||
|
||||
coef = coef_ptr[7];
|
||||
tmp1 += coef * in1[7];
|
||||
tmp2 += coef * in2[-7];
|
||||
|
||||
coef = coef_ptr[8];
|
||||
*out1 = tmp1 + coef * in1[8];
|
||||
*out2 = tmp2 + coef * in2[-8];
|
||||
}
|
||||
|
||||
// Resampling ratio: 8/11
|
||||
// input: int32_t (normalized, not saturated) :: size 11 * K
|
||||
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
|
||||
// K: number of blocks
|
||||
|
||||
void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Filter operation:
|
||||
//
|
||||
// Perform resampling (11 input samples -> 8 output samples);
|
||||
// process in sub blocks of size 11 samples.
|
||||
int32_t tmp;
|
||||
size_t m;
|
||||
|
||||
for (m = 0; m < K; m++)
|
||||
{
|
||||
tmp = 1 << 14;
|
||||
|
||||
// first output sample
|
||||
Out[0] = ((int32_t)In[3] << 15) + tmp;
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
tmp += kCoefficients44To32[3][0] * In[5];
|
||||
tmp += kCoefficients44To32[3][1] * In[6];
|
||||
tmp += kCoefficients44To32[3][2] * In[7];
|
||||
tmp += kCoefficients44To32[3][3] * In[8];
|
||||
tmp += kCoefficients44To32[3][4] * In[9];
|
||||
tmp += kCoefficients44To32[3][5] * In[10];
|
||||
tmp += kCoefficients44To32[3][6] * In[11];
|
||||
tmp += kCoefficients44To32[3][7] * In[12];
|
||||
tmp += kCoefficients44To32[3][8] * In[13];
|
||||
Out[4] = tmp;
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
|
||||
|
||||
// sum and accumulate filter coefficients and input samples
|
||||
WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
|
||||
|
||||
// update pointers
|
||||
In += 11;
|
||||
Out += 8;
|
||||
}
|
||||
}
|
579
third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
vendored
Normal file
579
third_party/webrtc/src/webrtc/common_audio/signal_processing/signal_processing_unittest.cc
vendored
Normal file
|
@ -0,0 +1,579 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
static const size_t kVector16Size = 9;
|
||||
static const int16_t vector16[kVector16Size] = {1, -15511, 4323, 1963,
|
||||
WEBRTC_SPL_WORD16_MAX, 0, WEBRTC_SPL_WORD16_MIN + 5, -3333, 345};
|
||||
|
||||
class SplTest : public testing::Test {
|
||||
protected:
|
||||
SplTest() {
|
||||
WebRtcSpl_Init();
|
||||
}
|
||||
virtual ~SplTest() {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(SplTest, MacroTest) {
|
||||
// Macros with inputs.
|
||||
int A = 10;
|
||||
int B = 21;
|
||||
int a = -3;
|
||||
int b = WEBRTC_SPL_WORD32_MAX;
|
||||
|
||||
EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B));
|
||||
EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B));
|
||||
|
||||
EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a));
|
||||
EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a));
|
||||
|
||||
EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B));
|
||||
EXPECT_EQ(-2147483645, WEBRTC_SPL_MUL(a, b));
|
||||
EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b));
|
||||
b = WEBRTC_SPL_WORD16_MAX >> 1;
|
||||
EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b));
|
||||
EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b));
|
||||
|
||||
a = b;
|
||||
b = -3;
|
||||
|
||||
EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b));
|
||||
EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b));
|
||||
EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b));
|
||||
EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b));
|
||||
|
||||
EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2));
|
||||
EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2));
|
||||
|
||||
EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B));
|
||||
EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A));
|
||||
|
||||
// Shifting with negative numbers allowed
|
||||
int shift_amount = 1; // Workaround compiler warning using variable here.
|
||||
// Positive means left shift
|
||||
EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount));
|
||||
|
||||
// Shifting with negative numbers not allowed
|
||||
// We cannot do casting here due to signed/unsigned problem
|
||||
EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1));
|
||||
|
||||
EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1));
|
||||
|
||||
EXPECT_EQ(1470, WEBRTC_SPL_RAND(A));
|
||||
|
||||
EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b));
|
||||
EXPECT_EQ(1073676289, WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX,
|
||||
WEBRTC_SPL_WORD16_MAX));
|
||||
EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX,
|
||||
WEBRTC_SPL_WORD32_MAX));
|
||||
EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
|
||||
WEBRTC_SPL_WORD32_MIN));
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
EXPECT_EQ(-1073741824,
|
||||
WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
|
||||
WEBRTC_SPL_WORD32_MAX));
|
||||
#else
|
||||
EXPECT_EQ(-1073741823,
|
||||
WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN,
|
||||
WEBRTC_SPL_WORD32_MAX));
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_F(SplTest, InlineTest) {
|
||||
int16_t a16 = 121;
|
||||
int16_t b16 = -17;
|
||||
int32_t a32 = 111121;
|
||||
int32_t b32 = -1711;
|
||||
|
||||
EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32));
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_NormW32(0));
|
||||
EXPECT_EQ(31, WebRtcSpl_NormW32(-1));
|
||||
EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN));
|
||||
EXPECT_EQ(14, WebRtcSpl_NormW32(a32));
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_NormW16(0));
|
||||
EXPECT_EQ(15, WebRtcSpl_NormW16(-1));
|
||||
EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN));
|
||||
EXPECT_EQ(4, WebRtcSpl_NormW16(b32));
|
||||
for (int ii = 0; ii < 15; ++ii) {
|
||||
int16_t value = 1 << ii;
|
||||
EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value));
|
||||
EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value));
|
||||
}
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_NormU32(0u));
|
||||
EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff));
|
||||
EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32)));
|
||||
|
||||
EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16));
|
||||
EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16));
|
||||
|
||||
EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32));
|
||||
EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32));
|
||||
|
||||
a32 = 0x80000000;
|
||||
b32 = 0x80000000;
|
||||
// Cast to signed int to avoid compiler complaint on gtest.h.
|
||||
EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_AddSatW32(a32, b32));
|
||||
a32 = 0x7fffffff;
|
||||
b32 = 0x7fffffff;
|
||||
EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32));
|
||||
a32 = 0;
|
||||
b32 = 0x80000000;
|
||||
EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
|
||||
a32 = 0x7fffffff;
|
||||
b32 = 0x80000000;
|
||||
EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32));
|
||||
a32 = 0x80000000;
|
||||
b32 = 0x7fffffff;
|
||||
EXPECT_EQ(static_cast<int>(0x80000000), WebRtcSpl_SubSatW32(a32, b32));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, MathOperationsTest) {
|
||||
int A = 1134567892;
|
||||
int32_t num = 117;
|
||||
int32_t den = -5;
|
||||
uint16_t denU = 5;
|
||||
EXPECT_EQ(33700, WebRtcSpl_Sqrt(A));
|
||||
EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A));
|
||||
|
||||
|
||||
EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num));
|
||||
EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den));
|
||||
EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den));
|
||||
EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU));
|
||||
EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, BasicArrayOperationsTest) {
|
||||
const size_t kVectorSize = 4;
|
||||
int B[] = {4, 12, 133, 1100};
|
||||
int16_t b16[kVectorSize];
|
||||
int32_t b32[kVectorSize];
|
||||
|
||||
int16_t bTmp16[kVectorSize];
|
||||
int32_t bTmp32[kVectorSize];
|
||||
|
||||
WebRtcSpl_MemSetW16(b16, 3, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(3, b16[kk]);
|
||||
}
|
||||
WebRtcSpl_ZerosArrayW16(b16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(0, b16[kk]);
|
||||
}
|
||||
WebRtcSpl_MemSetW32(b32, 3, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(3, b32[kk]);
|
||||
}
|
||||
WebRtcSpl_ZerosArrayW32(b32, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(0, b32[kk]);
|
||||
}
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
bTmp16[kk] = (int16_t)kk;
|
||||
bTmp32[kk] = (int32_t)kk;
|
||||
}
|
||||
WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(b16[kk], bTmp16[kk]);
|
||||
}
|
||||
// WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize);
|
||||
// for (int kk = 0; kk < kVectorSize; ++kk) {
|
||||
// EXPECT_EQ(b32[kk], bTmp32[kk]);
|
||||
// }
|
||||
WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16);
|
||||
for (size_t kk = 0; kk < 2; ++kk) {
|
||||
EXPECT_EQ(static_cast<int16_t>(kk+2), bTmp16[kk]);
|
||||
}
|
||||
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
b32[kk] = B[kk];
|
||||
b16[kk] = (int16_t)B[kk];
|
||||
}
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((B[kk]>>1), bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((B[kk]>>1), bTmp32[kk]);
|
||||
}
|
||||
|
||||
WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(b16[3-kk], bTmp16[kk]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, MinMaxOperationsTest) {
|
||||
const size_t kVectorSize = 17;
|
||||
|
||||
// Vectors to test the cases where minimum values have to be caught
|
||||
// outside of the unrolled loops in ARM-Neon.
|
||||
int16_t vector16[kVectorSize] = {-1, 7485, 0, 3333,
|
||||
-18283, 0, 12334, -29871, 988, -3333,
|
||||
345, -456, 222, 999, 888, 8774, WEBRTC_SPL_WORD16_MIN};
|
||||
int32_t vector32[kVectorSize] = {-1, 0, 283211, 3333,
|
||||
8712345, 0, -3333, 89345, -374585456, 222, 999, 122345334,
|
||||
-12389756, -987329871, 888, -2, WEBRTC_SPL_WORD32_MIN};
|
||||
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
|
||||
WebRtcSpl_MinValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
|
||||
WebRtcSpl_MinValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
|
||||
|
||||
// Test the cases where maximum values have to be caught
|
||||
// outside of the unrolled loops in ARM-Neon.
|
||||
vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX;
|
||||
vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX;
|
||||
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
|
||||
WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
|
||||
WebRtcSpl_MaxValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
|
||||
WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
|
||||
WebRtcSpl_MaxValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
|
||||
|
||||
// Test the cases where multiple maximum and minimum values are present.
|
||||
vector16[1] = WEBRTC_SPL_WORD16_MAX;
|
||||
vector16[6] = WEBRTC_SPL_WORD16_MIN;
|
||||
vector16[11] = WEBRTC_SPL_WORD16_MIN;
|
||||
vector32[1] = WEBRTC_SPL_WORD32_MAX;
|
||||
vector32[6] = WEBRTC_SPL_WORD32_MIN;
|
||||
vector32[11] = WEBRTC_SPL_WORD32_MIN;
|
||||
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
|
||||
WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MAX,
|
||||
WebRtcSpl_MaxValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD16_MIN,
|
||||
WebRtcSpl_MinValueW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
|
||||
WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MAX,
|
||||
WebRtcSpl_MaxValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(WEBRTC_SPL_WORD32_MIN,
|
||||
WebRtcSpl_MinValueW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize));
|
||||
EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize));
|
||||
EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, VectorOperationsTest) {
|
||||
const size_t kVectorSize = 4;
|
||||
int B[] = {4, 12, 133, 1100};
|
||||
int16_t a16[kVectorSize];
|
||||
int16_t b16[kVectorSize];
|
||||
int16_t bTmp16[kVectorSize];
|
||||
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
a16[kk] = B[kk];
|
||||
b16[kk] = B[kk];
|
||||
}
|
||||
|
||||
WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((B[kk]*3+7)>>2, bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((B[kk]*3+B[kk]*2+2)>>2, bTmp16[kk]);
|
||||
}
|
||||
|
||||
WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(((B[kk]*3+B[kk]*2+2)>>2)+((b16[kk]*3+7)>>2), bTmp16[kk]);
|
||||
}
|
||||
|
||||
WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((b16[kk]*13)>>2, bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(((a16[kk]*13)>>2)+((b16[kk]*7)>>2), bTmp16[kk]);
|
||||
}
|
||||
|
||||
WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(B[kk] >> 1, bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((a16[kk]*b16[3-kk])>>2, bTmp16[kk]);
|
||||
}
|
||||
WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ((a16[kk]*b16[kk])>>6, bTmp16[kk]);
|
||||
}
|
||||
|
||||
WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16);
|
||||
for (size_t kk = 0; kk < kVectorSize - 1; ++kk) {
|
||||
EXPECT_EQ(32767, bTmp16[kk]);
|
||||
}
|
||||
EXPECT_EQ(32749, bTmp16[kVectorSize - 1]);
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, EstimatorsTest) {
|
||||
const size_t kOrder = 2;
|
||||
const int32_t unstable_filter[] = { 4, 12, 133, 1100 };
|
||||
const int32_t stable_filter[] = { 1100, 133, 12, 4 };
|
||||
int16_t lpc[kOrder + 2] = { 0 };
|
||||
int16_t refl[kOrder + 2] = { 0 };
|
||||
int16_t lpc_result[] = { 4096, -497, 15, 0 };
|
||||
int16_t refl_result[] = { -3962, 123, 0, 0 };
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder));
|
||||
EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder));
|
||||
for (size_t i = 0; i < kOrder + 2; ++i) {
|
||||
EXPECT_EQ(lpc_result[i], lpc[i]);
|
||||
EXPECT_EQ(refl_result[i], refl[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, FilterTest) {
|
||||
const size_t kVectorSize = 4;
|
||||
const size_t kFilterOrder = 3;
|
||||
int16_t A[] = {1, 2, 33, 100};
|
||||
int16_t A5[] = {1, 2, 33, 100, -5};
|
||||
int16_t B[] = {4, 12, 133, 110};
|
||||
int16_t data_in[kVectorSize];
|
||||
int16_t data_out[kVectorSize];
|
||||
int16_t bTmp16Low[kVectorSize];
|
||||
int16_t bState[kVectorSize];
|
||||
int16_t bStateLow[kVectorSize];
|
||||
|
||||
WebRtcSpl_ZerosArrayW16(bState, kVectorSize);
|
||||
WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize);
|
||||
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
data_in[kk] = A[kk];
|
||||
data_out[kk] = 0;
|
||||
}
|
||||
|
||||
// MA filters.
|
||||
// Note that the input data has |kFilterOrder| states before the actual
|
||||
// data (one sample).
|
||||
WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B,
|
||||
kFilterOrder + 1, 1);
|
||||
EXPECT_EQ(0, data_out[0]);
|
||||
// AR filters.
|
||||
// Note that the output data has |kFilterOrder| states before the actual
|
||||
// data (one sample).
|
||||
WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A,
|
||||
kFilterOrder + 1, 1);
|
||||
EXPECT_EQ(0, data_out[kFilterOrder]);
|
||||
|
||||
EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5,
|
||||
5,
|
||||
data_in,
|
||||
kVectorSize,
|
||||
bState,
|
||||
kVectorSize,
|
||||
bStateLow,
|
||||
kVectorSize,
|
||||
data_out,
|
||||
bTmp16Low,
|
||||
kVectorSize));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, RandTest) {
|
||||
const int kVectorSize = 4;
|
||||
int16_t BU[] = {3653, 12446, 8525, 30691};
|
||||
int16_t b16[kVectorSize];
|
||||
uint32_t bSeed = 100000;
|
||||
|
||||
EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed));
|
||||
EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed));
|
||||
EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed));
|
||||
EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed));
|
||||
for (int kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(BU[kk], b16[kk]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, DotProductWithScaleTest) {
|
||||
EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16,
|
||||
vector16, kVector16Size, 2));
|
||||
}
|
||||
|
||||
TEST_F(SplTest, CrossCorrelationTest) {
|
||||
// Note the function arguments relation specificed by API.
|
||||
const size_t kCrossCorrelationDimension = 3;
|
||||
const int kShift = 2;
|
||||
const int kStep = 1;
|
||||
const size_t kSeqDimension = 6;
|
||||
|
||||
const int16_t kVector16[kVector16Size] = {1, 4323, 1963,
|
||||
WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, -876, 8483, 142};
|
||||
int32_t vector32[kCrossCorrelationDimension] = {0};
|
||||
|
||||
WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension,
|
||||
kCrossCorrelationDimension, kShift, kStep);
|
||||
|
||||
// WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon()
|
||||
// are not bit-exact.
|
||||
const int32_t kExpected[kCrossCorrelationDimension] =
|
||||
{-266947903, -15579555, -171282001};
|
||||
const int32_t* expected = kExpected;
|
||||
#if !defined(MIPS32_LE)
|
||||
const int32_t kExpectedNeon[kCrossCorrelationDimension] =
|
||||
{-266947901, -15579553, -171281999};
|
||||
if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) {
|
||||
expected = kExpectedNeon;
|
||||
}
|
||||
#endif
|
||||
for (size_t i = 0; i < kCrossCorrelationDimension; ++i) {
|
||||
EXPECT_EQ(expected[i], vector32[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, AutoCorrelationTest) {
|
||||
int scale = 0;
|
||||
int32_t vector32[kVector16Size];
|
||||
const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063,
|
||||
-85221647, -17104971, 61806945, 6644603, -669329, 43};
|
||||
|
||||
EXPECT_EQ(kVector16Size,
|
||||
WebRtcSpl_AutoCorrelation(vector16, kVector16Size,
|
||||
kVector16Size - 1, vector32, &scale));
|
||||
EXPECT_EQ(3, scale);
|
||||
for (size_t i = 0; i < kVector16Size; ++i) {
|
||||
EXPECT_EQ(expected[i], vector32[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, SignalProcessingTest) {
|
||||
const size_t kVectorSize = 4;
|
||||
int A[] = {1, 2, 33, 100};
|
||||
const int16_t kHanning[4] = { 2399, 8192, 13985, 16384 };
|
||||
int16_t b16[kVectorSize];
|
||||
|
||||
int16_t bTmp16[kVectorSize];
|
||||
|
||||
int bScale = 0;
|
||||
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
b16[kk] = A[kk];
|
||||
}
|
||||
|
||||
// TODO(bjornv): Activate the Reflection Coefficient tests when refactoring.
|
||||
// WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16);
|
||||
//// for (int kk = 0; kk < kVectorSize; ++kk) {
|
||||
//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
|
||||
//// }
|
||||
// WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16);
|
||||
//// for (int kk = 0; kk < kVectorSize; ++kk) {
|
||||
//// EXPECT_EQ(a16[kk], b16[kk]);
|
||||
//// }
|
||||
// WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16);
|
||||
//// for (int kk = 0; kk < kVectorSize; ++kk) {
|
||||
//// EXPECT_EQ(aTmp16[kk], bTmp16[kk]);
|
||||
//// }
|
||||
|
||||
WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize);
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
EXPECT_EQ(kHanning[kk], bTmp16[kk]);
|
||||
}
|
||||
|
||||
for (size_t kk = 0; kk < kVectorSize; ++kk) {
|
||||
b16[kk] = A[kk];
|
||||
}
|
||||
EXPECT_EQ(11094 , WebRtcSpl_Energy(b16, kVectorSize, &bScale));
|
||||
EXPECT_EQ(0, bScale);
|
||||
}
|
||||
|
||||
TEST_F(SplTest, FFTTest) {
|
||||
int16_t B[] = {1, 2, 33, 100,
|
||||
2, 3, 34, 101,
|
||||
3, 4, 35, 102,
|
||||
4, 5, 36, 103};
|
||||
|
||||
EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1));
|
||||
// for (int kk = 0; kk < 16; ++kk) {
|
||||
// EXPECT_EQ(A[kk], B[kk]);
|
||||
// }
|
||||
EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1));
|
||||
// for (int kk = 0; kk < 16; ++kk) {
|
||||
// EXPECT_EQ(A[kk], B[kk]);
|
||||
// }
|
||||
WebRtcSpl_ComplexBitReverse(B, 3);
|
||||
for (int kk = 0; kk < 16; ++kk) {
|
||||
//EXPECT_EQ(A[kk], B[kk]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SplTest, Resample48WithSaturationTest) {
|
||||
// The test resamples 3*kBlockSize number of samples to 2*kBlockSize number
|
||||
// of samples.
|
||||
const size_t kBlockSize = 16;
|
||||
|
||||
// Saturated input vector of 48 samples.
|
||||
const int32_t kVectorSaturated[3 * kBlockSize + 7] = {
|
||||
-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
|
||||
-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
|
||||
-32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768,
|
||||
32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
|
||||
32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
|
||||
32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
|
||||
32767, 32767, 32767, 32767, 32767, 32767, 32767
|
||||
};
|
||||
|
||||
// All values in |out_vector| should be |kRefValue32kHz|.
|
||||
const int32_t kRefValue32kHz1 = -1077493760;
|
||||
const int32_t kRefValue32kHz2 = 1077493645;
|
||||
|
||||
// After bit shift with saturation, |out_vector_w16| is saturated.
|
||||
|
||||
const int16_t kRefValue16kHz1 = -32768;
|
||||
const int16_t kRefValue16kHz2 = 32767;
|
||||
// Vector for storing output.
|
||||
int32_t out_vector[2 * kBlockSize];
|
||||
int16_t out_vector_w16[2 * kBlockSize];
|
||||
|
||||
WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize);
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector,
|
||||
15);
|
||||
|
||||
// Comparing output values against references. The values at position
|
||||
// 12-15 are skipped to account for the filter lag.
|
||||
for (size_t i = 0; i < 12; ++i) {
|
||||
EXPECT_EQ(kRefValue32kHz1, out_vector[i]);
|
||||
EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]);
|
||||
}
|
||||
for (size_t i = 16; i < 2 * kBlockSize; ++i) {
|
||||
EXPECT_EQ(kRefValue32kHz2, out_vector[i]);
|
||||
EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/* The global function contained in this file initializes SPL function
|
||||
* pointers, currently only for ARM platforms.
|
||||
*
|
||||
* Some code came from common/rtcd.c in the WebM project.
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
|
||||
|
||||
/* Declare function pointers. */
|
||||
MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
|
||||
MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
|
||||
MaxValueW16 WebRtcSpl_MaxValueW16;
|
||||
MaxValueW32 WebRtcSpl_MaxValueW32;
|
||||
MinValueW16 WebRtcSpl_MinValueW16;
|
||||
MinValueW32 WebRtcSpl_MinValueW32;
|
||||
CrossCorrelation WebRtcSpl_CrossCorrelation;
|
||||
DownsampleFast WebRtcSpl_DownsampleFast;
|
||||
ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
|
||||
|
||||
#if (defined(WEBRTC_DETECT_NEON) || !defined(WEBRTC_HAS_NEON)) && \
|
||||
!defined(MIPS32_LE)
|
||||
/* Initialize function pointers to the generic C version. */
|
||||
static void InitPointersToC() {
|
||||
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
|
||||
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
|
||||
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
|
||||
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
|
||||
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
|
||||
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
|
||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
|
||||
/* Initialize function pointers to the Neon version. */
|
||||
static void InitPointersToNeon() {
|
||||
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
|
||||
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
|
||||
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
|
||||
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
|
||||
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
|
||||
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
|
||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
/* Initialize function pointers to the MIPS version. */
|
||||
static void InitPointersToMIPS() {
|
||||
WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
|
||||
WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
|
||||
WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
|
||||
WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
|
||||
WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
|
||||
WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
|
||||
WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
|
||||
#else
|
||||
WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound =
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRoundC;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static void InitFunctionPointers(void) {
|
||||
#if defined(WEBRTC_DETECT_NEON)
|
||||
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
||||
InitPointersToNeon();
|
||||
} else {
|
||||
InitPointersToC();
|
||||
}
|
||||
#elif defined(WEBRTC_HAS_NEON)
|
||||
InitPointersToNeon();
|
||||
#elif defined(MIPS32_LE)
|
||||
InitPointersToMIPS();
|
||||
#else
|
||||
InitPointersToC();
|
||||
#endif /* WEBRTC_DETECT_NEON */
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_POSIX)
|
||||
#include <pthread.h>
|
||||
|
||||
static void once(void (*func)(void)) {
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
}
|
||||
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
|
||||
static void once(void (*func)(void)) {
|
||||
/* Didn't use InitializeCriticalSection() since there's no race-free context
|
||||
* in which to execute it.
|
||||
*
|
||||
* TODO(kma): Change to different implementation (e.g.
|
||||
* InterlockedCompareExchangePointer) to avoid issues similar to
|
||||
* http://code.google.com/p/webm/issues/detail?id=467.
|
||||
*/
|
||||
static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
|
||||
static int done = 0;
|
||||
|
||||
EnterCriticalSection(&lock);
|
||||
if (!done) {
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
LeaveCriticalSection(&lock);
|
||||
}
|
||||
|
||||
/* There's no fallback version as an #else block here to ensure thread safety.
|
||||
* In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
|
||||
* system should pick it up.
|
||||
*/
|
||||
#endif /* WEBRTC_POSIX */
|
||||
|
||||
void WebRtcSpl_Init() {
|
||||
once(InitFunctionPointers);
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_Sqrt().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
int32_t WebRtcSpl_SqrtLocal(int32_t in);
|
||||
|
||||
int32_t WebRtcSpl_SqrtLocal(int32_t in)
|
||||
{
|
||||
|
||||
int16_t x_half, t16;
|
||||
int32_t A, B, x2;
|
||||
|
||||
/* The following block performs:
|
||||
y=in/2
|
||||
x=y-2^30
|
||||
x_half=x/2^31
|
||||
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
|
||||
+ 0.875*((x_half)^5)
|
||||
*/
|
||||
|
||||
B = in / 2;
|
||||
|
||||
B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
|
||||
x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
|
||||
B = B + ((int32_t)0x40000000); // B = 1 + x/2
|
||||
B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
|
||||
|
||||
x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
|
||||
A = -x2; // A = -(x/2)^2
|
||||
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
|
||||
|
||||
A >>= 16;
|
||||
A = A * A * 2; // A = (x/2)^4
|
||||
t16 = (int16_t)(A >> 16);
|
||||
B += -20480 * t16 * 2; // B = B - 0.625*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
|
||||
|
||||
A = x_half * t16 * 2; // A = (x/2)^5
|
||||
t16 = (int16_t)(A >> 16);
|
||||
B += 28672 * t16 * 2; // B = B + 0.875*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
|
||||
|
||||
t16 = (int16_t)(x2 >> 16);
|
||||
A = x_half * t16 * 2; // A = x/2^3
|
||||
|
||||
B = B + (A >> 1); // B = B + 0.5*A
|
||||
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
|
||||
|
||||
B = B + ((int32_t)32768); // Round off bit
|
||||
|
||||
return B;
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_Sqrt(int32_t value)
|
||||
{
|
||||
/*
|
||||
Algorithm:
|
||||
|
||||
Six term Taylor Series is used here to compute the square root of a number
|
||||
y^0.5 = (1+x)^0.5 where x = y-1
|
||||
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
|
||||
0.5 <= x < 1
|
||||
|
||||
Example of how the algorithm works, with ut=sqrt(in), and
|
||||
with in=73632 and ut=271 (even shift value case):
|
||||
|
||||
in=73632
|
||||
y= in/131072
|
||||
x=y-1
|
||||
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
|
||||
ut=t*(1/sqrt(2))*512
|
||||
|
||||
or:
|
||||
|
||||
in=73632
|
||||
in2=73632*2^14
|
||||
y= in2/2^31
|
||||
x=y-1
|
||||
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
|
||||
ut=t*(1/sqrt(2))
|
||||
ut2=ut*2^9
|
||||
|
||||
which gives:
|
||||
|
||||
in = 73632
|
||||
in2 = 1206386688
|
||||
y = 0.56176757812500
|
||||
x = -0.43823242187500
|
||||
t = 0.74973506527313
|
||||
ut = 0.53014274874797
|
||||
ut2 = 2.714330873589594e+002
|
||||
|
||||
or:
|
||||
|
||||
in=73632
|
||||
in2=73632*2^14
|
||||
y=in2/2
|
||||
x=y-2^30
|
||||
x_half=x/2^31
|
||||
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
|
||||
+ 0.875*((x_half)^5)
|
||||
ut=t*(1/sqrt(2))
|
||||
ut2=ut*2^9
|
||||
|
||||
which gives:
|
||||
|
||||
in = 73632
|
||||
in2 = 1206386688
|
||||
y = 603193344
|
||||
x = -470548480
|
||||
x_half = -0.21911621093750
|
||||
t = 0.74973506527313
|
||||
ut = 0.53014274874797
|
||||
ut2 = 2.714330873589594e+002
|
||||
|
||||
*/
|
||||
|
||||
int16_t x_norm, nshift, t16, sh;
|
||||
int32_t A;
|
||||
|
||||
int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
|
||||
|
||||
A = value;
|
||||
|
||||
if (A == 0)
|
||||
return (int32_t)0; // sqrt(0) = 0
|
||||
|
||||
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
|
||||
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
|
||||
if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
|
||||
{
|
||||
A = A + ((int32_t)32768); // Round off bit
|
||||
} else
|
||||
{
|
||||
A = WEBRTC_SPL_WORD32_MAX;
|
||||
}
|
||||
|
||||
x_norm = (int16_t)(A >> 16); // x_norm = AH
|
||||
|
||||
nshift = (sh / 2);
|
||||
assert(nshift >= 0);
|
||||
|
||||
A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
|
||||
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
|
||||
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
|
||||
|
||||
if (2 * nshift == sh) {
|
||||
// Even shift value case
|
||||
|
||||
t16 = (int16_t)(A >> 16); // t16 = AH
|
||||
|
||||
A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
|
||||
A = A + ((int32_t)32768); // Round off
|
||||
A = A & ((int32_t)0x7fff0000); // Round off
|
||||
|
||||
A >>= 15; // A = A>>16
|
||||
|
||||
} else
|
||||
{
|
||||
A >>= 16; // A = A>>16
|
||||
}
|
||||
|
||||
A = A & ((int32_t)0x0000ffff);
|
||||
A >>= nshift; // De-normalize the result.
|
||||
|
||||
return A;
|
||||
}
|
77
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
vendored
Normal file
77
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor.c
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
|
||||
* license.
|
||||
*
|
||||
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||
* Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||
* Subject: Re: sqrt routine
|
||||
* To: Kevin Ma <kma@google.com>
|
||||
* Hi Kevin,
|
||||
* Thanks for asking. Those routines are public domain (originally posted to
|
||||
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||
* Cheers,
|
||||
* Wilco
|
||||
*
|
||||
* ----- Original Message -----
|
||||
* From: "Kevin Ma" <kma@google.com>
|
||||
* To: <Wilco.Dijkstra@ntlworld.com>
|
||||
* Sent: Thursday, June 23, 2011 11:44 PM
|
||||
* Subject: Fwd: sqrt routine
|
||||
* Hi Wilco,
|
||||
* I saw your sqrt routine from several web sites, including
|
||||
* http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||
* Just wonder if there's any copyright information with your Successive
|
||||
* approximation routines, or if I can freely use it for any purpose.
|
||||
* Thanks.
|
||||
* Kevin
|
||||
*/
|
||||
|
||||
// Minor modifications in code style for WebRTC, 2012.
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
/*
|
||||
* Algorithm:
|
||||
* Successive approximation of the equation (root + delta) ^ 2 = N
|
||||
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
|
||||
* Use delta = 2^i for i = 15 .. 0.
|
||||
*
|
||||
* Output precision is 16 bits. Note for large input values (close to
|
||||
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
|
||||
* contains the MSB information (a non-sign value). Do with caution
|
||||
* if you need to cast the output to int16_t type.
|
||||
*
|
||||
* If the input value is negative, it returns 0.
|
||||
*/
|
||||
|
||||
#define WEBRTC_SPL_SQRT_ITER(N) \
|
||||
try1 = root + (1 << (N)); \
|
||||
if (value >= try1 << (N)) \
|
||||
{ \
|
||||
value -= try1 << (N); \
|
||||
root |= 2 << (N); \
|
||||
}
|
||||
|
||||
int32_t WebRtcSpl_SqrtFloor(int32_t value)
|
||||
{
|
||||
int32_t root = 0, try1;
|
||||
|
||||
WEBRTC_SPL_SQRT_ITER (15);
|
||||
WEBRTC_SPL_SQRT_ITER (14);
|
||||
WEBRTC_SPL_SQRT_ITER (13);
|
||||
WEBRTC_SPL_SQRT_ITER (12);
|
||||
WEBRTC_SPL_SQRT_ITER (11);
|
||||
WEBRTC_SPL_SQRT_ITER (10);
|
||||
WEBRTC_SPL_SQRT_ITER ( 9);
|
||||
WEBRTC_SPL_SQRT_ITER ( 8);
|
||||
WEBRTC_SPL_SQRT_ITER ( 7);
|
||||
WEBRTC_SPL_SQRT_ITER ( 6);
|
||||
WEBRTC_SPL_SQRT_ITER ( 5);
|
||||
WEBRTC_SPL_SQRT_ITER ( 4);
|
||||
WEBRTC_SPL_SQRT_ITER ( 3);
|
||||
WEBRTC_SPL_SQRT_ITER ( 2);
|
||||
WEBRTC_SPL_SQRT_ITER ( 1);
|
||||
WEBRTC_SPL_SQRT_ITER ( 0);
|
||||
|
||||
return root >> 1;
|
||||
}
|
110
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
vendored
Normal file
110
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_arm.S
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
@
|
||||
@ Written by Wilco Dijkstra, 1996. The following email exchange establishes the
|
||||
@ license.
|
||||
@
|
||||
@ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||
@ Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||
@ Subject: Re: sqrt routine
|
||||
@ To: Kevin Ma <kma@google.com>
|
||||
@ Hi Kevin,
|
||||
@ Thanks for asking. Those routines are public domain (originally posted to
|
||||
@ comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||
@ Cheers,
|
||||
@ Wilco
|
||||
@
|
||||
@ ----- Original Message -----
|
||||
@ From: "Kevin Ma" <kma@google.com>
|
||||
@ To: <Wilco.Dijkstra@ntlworld.com>
|
||||
@ Sent: Thursday, June 23, 2011 11:44 PM
|
||||
@ Subject: Fwd: sqrt routine
|
||||
@ Hi Wilco,
|
||||
@ I saw your sqrt routine from several web sites, including
|
||||
@ http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||
@ Just wonder if there's any copyright information with your Successive
|
||||
@ approximation routines, or if I can freely use it for any purpose.
|
||||
@ Thanks.
|
||||
@ Kevin
|
||||
|
||||
@ Minor modifications in code style for WebRTC, 2012.
|
||||
@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
|
||||
|
||||
@ Input : r0 32 bit unsigned integer
|
||||
@ Output: r0 = INT (SQRT (r0)), precision is 16 bits
|
||||
@ Registers touched: r1, r2
|
||||
|
||||
#include "webrtc/system_wrappers/interface/asm_defines.h"
|
||||
|
||||
GLOBAL_FUNCTION WebRtcSpl_SqrtFloor
|
||||
.align 2
|
||||
DEFINE_FUNCTION WebRtcSpl_SqrtFloor
|
||||
mov r1, #3 << 30
|
||||
mov r2, #1 << 30
|
||||
|
||||
@ unroll for i = 0 .. 15
|
||||
|
||||
cmp r0, r2, ror #2 * 0
|
||||
subhs r0, r0, r2, ror #2 * 0
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 1
|
||||
subhs r0, r0, r2, ror #2 * 1
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 2
|
||||
subhs r0, r0, r2, ror #2 * 2
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 3
|
||||
subhs r0, r0, r2, ror #2 * 3
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 4
|
||||
subhs r0, r0, r2, ror #2 * 4
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 5
|
||||
subhs r0, r0, r2, ror #2 * 5
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 6
|
||||
subhs r0, r0, r2, ror #2 * 6
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 7
|
||||
subhs r0, r0, r2, ror #2 * 7
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 8
|
||||
subhs r0, r0, r2, ror #2 * 8
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 9
|
||||
subhs r0, r0, r2, ror #2 * 9
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 10
|
||||
subhs r0, r0, r2, ror #2 * 10
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 11
|
||||
subhs r0, r0, r2, ror #2 * 11
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 12
|
||||
subhs r0, r0, r2, ror #2 * 12
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 13
|
||||
subhs r0, r0, r2, ror #2 * 13
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 14
|
||||
subhs r0, r0, r2, ror #2 * 14
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
cmp r0, r2, ror #2 * 15
|
||||
subhs r0, r0, r2, ror #2 * 15
|
||||
adc r2, r1, r2, lsl #1
|
||||
|
||||
bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1
|
||||
bx lr
|
207
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
vendored
Normal file
207
third_party/webrtc/src/webrtc/common_audio/signal_processing/spl_sqrt_floor_mips.c
vendored
Normal file
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Written by Wilco Dijkstra, 1996. The following email exchange establishes the
|
||||
* license.
|
||||
*
|
||||
* From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
|
||||
* Date: Fri, Jun 24, 2011 at 3:20 AM
|
||||
* Subject: Re: sqrt routine
|
||||
* To: Kevin Ma <kma@google.com>
|
||||
* Hi Kevin,
|
||||
* Thanks for asking. Those routines are public domain (originally posted to
|
||||
* comp.sys.arm a long time ago), so you can use them freely for any purpose.
|
||||
* Cheers,
|
||||
* Wilco
|
||||
*
|
||||
* ----- Original Message -----
|
||||
* From: "Kevin Ma" <kma@google.com>
|
||||
* To: <Wilco.Dijkstra@ntlworld.com>
|
||||
* Sent: Thursday, June 23, 2011 11:44 PM
|
||||
* Subject: Fwd: sqrt routine
|
||||
* Hi Wilco,
|
||||
* I saw your sqrt routine from several web sites, including
|
||||
* http://www.finesse.demon.co.uk/steven/sqrt.html.
|
||||
* Just wonder if there's any copyright information with your Successive
|
||||
* approximation routines, or if I can freely use it for any purpose.
|
||||
* Thanks.
|
||||
* Kevin
|
||||
*/
|
||||
|
||||
// Minor modifications in code style for WebRTC, 2012.
|
||||
// Code optimizations for MIPS, 2013.
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
/*
|
||||
* Algorithm:
|
||||
* Successive approximation of the equation (root + delta) ^ 2 = N
|
||||
* until delta < 1. If delta < 1 we have the integer part of SQRT (N).
|
||||
* Use delta = 2^i for i = 15 .. 0.
|
||||
*
|
||||
* Output precision is 16 bits. Note for large input values (close to
|
||||
* 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
|
||||
* contains the MSB information (a non-sign value). Do with caution
|
||||
* if you need to cast the output to int16_t type.
|
||||
*
|
||||
* If the input value is negative, it returns 0.
|
||||
*/
|
||||
|
||||
|
||||
int32_t WebRtcSpl_SqrtFloor(int32_t value)
|
||||
{
|
||||
int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
|
||||
|
||||
__asm __volatile(
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
|
||||
"lui %[tmp1], 0x4000 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"sub %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"lui %[tmp1], 0x1 \n\t"
|
||||
"or %[tmp4], %[root], %[tmp1] \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x4000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 14 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x8000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x2000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 13 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x4000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x1000 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 12 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x2000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x800 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 11 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x1000 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x400 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 10 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x800 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x200 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 9 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x400 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x100 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 8 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x200 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x80 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 7 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x100 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x40 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 6 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x80 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x20 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 5 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x40 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x10 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 4 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x20 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x8 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 3 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x10 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x4 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 2 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x8 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x2 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"sll %[tmp1], 1 \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"subu %[tmp3], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x4 \n\t"
|
||||
"movz %[value], %[tmp3], %[tmp2] \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
"addiu %[tmp1], $0, 0x1 \n\t"
|
||||
"addu %[tmp1], %[tmp1], %[root] \n\t"
|
||||
"slt %[tmp2], %[value], %[tmp1] \n\t"
|
||||
"ori %[tmp4], %[root], 0x2 \n\t"
|
||||
"movz %[root], %[tmp4], %[tmp2] \n\t"
|
||||
|
||||
".set pop \n\t"
|
||||
|
||||
: [root] "+r" (root), [value] "+r" (value),
|
||||
[tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
|
||||
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
|
||||
:
|
||||
);
|
||||
|
||||
return root >> 1;
|
||||
}
|
||||
|
208
third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
vendored
Normal file
208
third_party/webrtc/src/webrtc/common_audio/signal_processing/splitting_filter.c
vendored
Normal file
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains the splitting filter functions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
// Maximum number of samples in a low/high-band frame.
|
||||
enum
|
||||
{
|
||||
kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
|
||||
};
|
||||
|
||||
// QMF filter coefficients in Q16.
|
||||
static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261};
|
||||
static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcSpl_AllPassQMF(...)
|
||||
//
|
||||
// Allpass filter used by the analysis and synthesis parts of the QMF filter.
|
||||
//
|
||||
// Input:
|
||||
// - in_data : Input data sequence (Q10)
|
||||
// - data_length : Length of data sequence (>2)
|
||||
// - filter_coefficients : Filter coefficients (length 3, Q16)
|
||||
//
|
||||
// Input & Output:
|
||||
// - filter_state : Filter state (length 6, Q10).
|
||||
//
|
||||
// Output:
|
||||
// - out_data : Output data sequence (Q10), length equal to
|
||||
// |data_length|
|
||||
//
|
||||
|
||||
void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length,
|
||||
int32_t* out_data, const uint16_t* filter_coefficients,
|
||||
int32_t* filter_state)
|
||||
{
|
||||
// The procedure is to filter the input with three first order all pass filters
|
||||
// (cascade operations).
|
||||
//
|
||||
// a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
|
||||
// y[n] = ----------- ----------- ----------- x[n]
|
||||
// 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
|
||||
//
|
||||
// The input vector |filter_coefficients| includes these three filter coefficients.
|
||||
// The filter state contains the in_data state, in_data[-1], followed by
|
||||
// the out_data state, out_data[-1]. This is repeated for each cascade.
|
||||
// The first cascade filter will filter the |in_data| and store the output in
|
||||
// |out_data|. The second will the take the |out_data| as input and make an
|
||||
// intermediate storage in |in_data|, to save memory. The third, and final, cascade
|
||||
// filter operation takes the |in_data| (which is the output from the previous cascade
|
||||
// filter) and store the output in |out_data|.
|
||||
// Note that the input vector values are changed during the process.
|
||||
size_t k;
|
||||
int32_t diff;
|
||||
// First all-pass cascade; filter from in_data to out_data.
|
||||
|
||||
// Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at
|
||||
// vector position n. Then the final output will be y[n] = y_3[n]
|
||||
|
||||
// First loop, use the states stored in memory.
|
||||
// "diff" should be safe from wrap around since max values are 2^25
|
||||
// diff = (x[0] - y_1[-1])
|
||||
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
|
||||
// y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1])
|
||||
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
|
||||
|
||||
// For the remaining loops, use previous values.
|
||||
for (k = 1; k < data_length; k++)
|
||||
{
|
||||
// diff = (x[n] - y_1[n-1])
|
||||
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
|
||||
// y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1])
|
||||
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
|
||||
}
|
||||
|
||||
// Update states.
|
||||
filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
|
||||
filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
|
||||
|
||||
// Second all-pass cascade; filter from out_data to in_data.
|
||||
// diff = (y_1[0] - y_2[-1])
|
||||
diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
|
||||
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
|
||||
in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
|
||||
for (k = 1; k < data_length; k++)
|
||||
{
|
||||
// diff = (y_1[n] - y_2[n-1])
|
||||
diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
|
||||
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
|
||||
in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
|
||||
}
|
||||
|
||||
filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
|
||||
filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
|
||||
|
||||
// Third all-pass cascade; filter from in_data to out_data.
|
||||
// diff = (y_2[0] - y[-1])
|
||||
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
|
||||
// y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1])
|
||||
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
|
||||
for (k = 1; k < data_length; k++)
|
||||
{
|
||||
// diff = (y_2[n] - y[n-1])
|
||||
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
|
||||
// y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1])
|
||||
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
|
||||
}
|
||||
filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
|
||||
filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
|
||||
}
|
||||
|
||||
void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
|
||||
int16_t* low_band, int16_t* high_band,
|
||||
int32_t* filter_state1, int32_t* filter_state2)
|
||||
{
|
||||
size_t i;
|
||||
int16_t k;
|
||||
int32_t tmp;
|
||||
int32_t half_in1[kMaxBandFrameLength];
|
||||
int32_t half_in2[kMaxBandFrameLength];
|
||||
int32_t filter1[kMaxBandFrameLength];
|
||||
int32_t filter2[kMaxBandFrameLength];
|
||||
const size_t band_length = in_data_length / 2;
|
||||
assert(in_data_length % 2 == 0);
|
||||
assert(band_length <= kMaxBandFrameLength);
|
||||
|
||||
// Split even and odd samples. Also shift them to Q10.
|
||||
for (i = 0, k = 0; i < band_length; i++, k += 2)
|
||||
{
|
||||
half_in2[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k], 10);
|
||||
half_in1[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)in_data[k + 1], 10);
|
||||
}
|
||||
|
||||
// All pass filter even and odd samples, independently.
|
||||
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
|
||||
WebRtcSpl_kAllPassFilter1, filter_state1);
|
||||
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
|
||||
WebRtcSpl_kAllPassFilter2, filter_state2);
|
||||
|
||||
// Take the sum and difference of filtered version of odd and even
|
||||
// branches to get upper & lower band.
|
||||
for (i = 0; i < band_length; i++)
|
||||
{
|
||||
tmp = (filter1[i] + filter2[i] + 1024) >> 11;
|
||||
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
|
||||
tmp = (filter1[i] - filter2[i] + 1024) >> 11;
|
||||
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
|
||||
size_t band_length, int16_t* out_data,
|
||||
int32_t* filter_state1, int32_t* filter_state2)
|
||||
{
|
||||
int32_t tmp;
|
||||
int32_t half_in1[kMaxBandFrameLength];
|
||||
int32_t half_in2[kMaxBandFrameLength];
|
||||
int32_t filter1[kMaxBandFrameLength];
|
||||
int32_t filter2[kMaxBandFrameLength];
|
||||
size_t i;
|
||||
int16_t k;
|
||||
assert(band_length <= kMaxBandFrameLength);
|
||||
|
||||
// Obtain the sum and difference channels out of upper and lower-band channels.
|
||||
// Also shift to Q10 domain.
|
||||
for (i = 0; i < band_length; i++)
|
||||
{
|
||||
tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
|
||||
half_in1[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
|
||||
tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
|
||||
half_in2[i] = WEBRTC_SPL_LSHIFT_W32(tmp, 10);
|
||||
}
|
||||
|
||||
// all-pass filter the sum and difference channels
|
||||
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
|
||||
WebRtcSpl_kAllPassFilter2, filter_state1);
|
||||
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
|
||||
WebRtcSpl_kAllPassFilter1, filter_state2);
|
||||
|
||||
// The filtered signals are even and odd samples of the output. Combine
|
||||
// them. The signals are Q10 should shift them back to Q0 and take care of
|
||||
// saturation.
|
||||
for (i = 0, k = 0; i < band_length; i++)
|
||||
{
|
||||
tmp = (filter2[i] + 512) >> 10;
|
||||
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
|
||||
tmp = (filter1[i] + 512) >> 10;
|
||||
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
|
||||
}
|
||||
|
||||
}
|
35
third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
vendored
Normal file
35
third_party/webrtc/src/webrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
|
||||
* The description header can be found in signal_processing_library.h
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
|
||||
int16_t *yQ15)
|
||||
{
|
||||
int32_t sq;
|
||||
size_t m;
|
||||
int16_t tmp;
|
||||
|
||||
for (m = 0; m < vector_length; m++)
|
||||
{
|
||||
tmp = xQ15[m];
|
||||
sq = tmp * tmp; // x^2 in Q30
|
||||
sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
|
||||
sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
|
||||
yQ15[m] = (int16_t)sq;
|
||||
}
|
||||
}
|
165
third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
vendored
Normal file
165
third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations.c
vendored
Normal file
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the functions
|
||||
* WebRtcSpl_VectorBitShiftW16()
|
||||
* WebRtcSpl_VectorBitShiftW32()
|
||||
* WebRtcSpl_VectorBitShiftW32ToW16()
|
||||
* WebRtcSpl_ScaleVector()
|
||||
* WebRtcSpl_ScaleVectorWithSat()
|
||||
* WebRtcSpl_ScaleAndAddVectors()
|
||||
* WebRtcSpl_ScaleAndAddVectorsWithRoundC()
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
|
||||
const int16_t *in, int16_t right_shifts)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (right_shifts > 0)
|
||||
{
|
||||
for (i = length; i > 0; i--)
|
||||
{
|
||||
(*res++) = ((*in++) >> right_shifts);
|
||||
}
|
||||
} else
|
||||
{
|
||||
for (i = length; i > 0; i--)
|
||||
{
|
||||
(*res++) = ((*in++) << (-right_shifts));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
|
||||
size_t vector_length,
|
||||
const int32_t *in_vector,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (right_shifts > 0)
|
||||
{
|
||||
for (i = vector_length; i > 0; i--)
|
||||
{
|
||||
(*out_vector++) = ((*in_vector++) >> right_shifts);
|
||||
}
|
||||
} else
|
||||
{
|
||||
for (i = vector_length; i > 0; i--)
|
||||
{
|
||||
(*out_vector++) = ((*in_vector++) << (-right_shifts));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
|
||||
const int32_t* in, int right_shifts) {
|
||||
size_t i;
|
||||
int32_t tmp_w32;
|
||||
|
||||
if (right_shifts >= 0) {
|
||||
for (i = length; i > 0; i--) {
|
||||
tmp_w32 = (*in++) >> right_shifts;
|
||||
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
|
||||
}
|
||||
} else {
|
||||
int left_shifts = -right_shifts;
|
||||
for (i = length; i > 0; i--) {
|
||||
tmp_w32 = (*in++) << left_shifts;
|
||||
(*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
|
||||
int16_t gain, size_t in_vector_length,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
|
||||
size_t i;
|
||||
const int16_t *inptr;
|
||||
int16_t *outptr;
|
||||
|
||||
inptr = in_vector;
|
||||
outptr = out_vector;
|
||||
|
||||
for (i = 0; i < in_vector_length; i++)
|
||||
{
|
||||
*outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
|
||||
int16_t gain, size_t in_vector_length,
|
||||
int16_t right_shifts)
|
||||
{
|
||||
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
|
||||
size_t i;
|
||||
const int16_t *inptr;
|
||||
int16_t *outptr;
|
||||
|
||||
inptr = in_vector;
|
||||
outptr = out_vector;
|
||||
|
||||
for (i = 0; i < in_vector_length; i++) {
|
||||
*outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
|
||||
const int16_t *in2, int16_t gain2, int shift2,
|
||||
int16_t *out, size_t vector_length)
|
||||
{
|
||||
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
|
||||
size_t i;
|
||||
const int16_t *in1ptr;
|
||||
const int16_t *in2ptr;
|
||||
int16_t *outptr;
|
||||
|
||||
in1ptr = in1;
|
||||
in2ptr = in2;
|
||||
outptr = out;
|
||||
|
||||
for (i = 0; i < vector_length; i++)
|
||||
{
|
||||
*outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
|
||||
(int16_t)((gain2 * *in2ptr++) >> shift2);
|
||||
}
|
||||
}
|
||||
|
||||
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
size_t length) {
|
||||
size_t i = 0;
|
||||
int round_value = (1 << right_shifts) >> 1;
|
||||
|
||||
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
|
||||
length == 0 || right_shifts < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
out_vector[i] = (int16_t)((
|
||||
in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
|
||||
round_value) >> right_shifts);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
57
third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
vendored
Normal file
57
third_party/webrtc/src/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file contains implementations of the functions
|
||||
* WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
|
||||
*/
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
|
||||
int16_t in_vector1_scale,
|
||||
const int16_t* in_vector2,
|
||||
int16_t in_vector2_scale,
|
||||
int right_shifts,
|
||||
int16_t* out_vector,
|
||||
size_t length) {
|
||||
int16_t r0 = 0, r1 = 0;
|
||||
int16_t *in1 = (int16_t*)in_vector1;
|
||||
int16_t *in2 = (int16_t*)in_vector2;
|
||||
int16_t *out = out_vector;
|
||||
size_t i = 0;
|
||||
int value32 = 0;
|
||||
|
||||
if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
|
||||
length == 0 || right_shifts < 0) {
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < length; i++) {
|
||||
__asm __volatile (
|
||||
"lh %[r0], 0(%[in1]) \n\t"
|
||||
"lh %[r1], 0(%[in2]) \n\t"
|
||||
"mult %[r0], %[in_vector1_scale] \n\t"
|
||||
"madd %[r1], %[in_vector2_scale] \n\t"
|
||||
"extrv_r.w %[value32], $ac0, %[right_shifts] \n\t"
|
||||
"addiu %[in1], %[in1], 2 \n\t"
|
||||
"addiu %[in2], %[in2], 2 \n\t"
|
||||
"sh %[value32], 0(%[out]) \n\t"
|
||||
"addiu %[out], %[out], 2 \n\t"
|
||||
: [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
|
||||
[in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
|
||||
: [in_vector1_scale] "r" (in_vector1_scale),
|
||||
[in_vector2_scale] "r" (in_vector2_scale),
|
||||
[right_shifts] "r" (right_shifts)
|
||||
: "hi", "lo", "memory"
|
||||
);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_
|
||||
#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Interface to provide access to WAV file parameters.
|
||||
class WavFile {
|
||||
public:
|
||||
virtual ~WavFile() {}
|
||||
|
||||
virtual int sample_rate() const = 0;
|
||||
virtual int num_channels() const = 0;
|
||||
virtual uint32_t num_samples() const = 0;
|
||||
};
|
||||
|
||||
// Simple C++ class for writing 16-bit PCM WAV files. All error handling is
|
||||
// by calls to RTC_CHECK(), making it unsuitable for anything but debug code.
|
||||
class WavWriter final : public WavFile {
|
||||
public:
|
||||
// Open a new WAV file for writing.
|
||||
WavWriter(const std::string& filename, int sample_rate, int num_channels);
|
||||
|
||||
// Close the WAV file, after writing its header.
|
||||
~WavWriter();
|
||||
|
||||
// Write additional samples to the file. Each sample is in the range
|
||||
// [-32768,32767], and there must be the previously specified number of
|
||||
// interleaved channels.
|
||||
void WriteSamples(const float* samples, size_t num_samples);
|
||||
void WriteSamples(const int16_t* samples, size_t num_samples);
|
||||
|
||||
int sample_rate() const override { return sample_rate_; }
|
||||
int num_channels() const override { return num_channels_; }
|
||||
uint32_t num_samples() const override { return num_samples_; }
|
||||
|
||||
private:
|
||||
void Close();
|
||||
const int sample_rate_;
|
||||
const int num_channels_;
|
||||
uint32_t num_samples_; // Total number of samples written to file.
|
||||
FILE* file_handle_; // Output file, owned by this class
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter);
|
||||
};
|
||||
|
||||
// Follows the conventions of WavWriter.
|
||||
class WavReader final : public WavFile {
|
||||
public:
|
||||
// Opens an existing WAV file for reading.
|
||||
explicit WavReader(const std::string& filename);
|
||||
|
||||
// Close the WAV file.
|
||||
~WavReader();
|
||||
|
||||
// Returns the number of samples read. If this is less than requested,
|
||||
// verifies that the end of the file was reached.
|
||||
size_t ReadSamples(size_t num_samples, float* samples);
|
||||
size_t ReadSamples(size_t num_samples, int16_t* samples);
|
||||
|
||||
int sample_rate() const override { return sample_rate_; }
|
||||
int num_channels() const override { return num_channels_; }
|
||||
uint32_t num_samples() const override { return num_samples_; }
|
||||
|
||||
private:
|
||||
void Close();
|
||||
int sample_rate_;
|
||||
int num_channels_;
|
||||
uint32_t num_samples_; // Total number of samples in the file.
|
||||
uint32_t num_samples_remaining_;
|
||||
FILE* file_handle_; // Input file, owned by this class.
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(WavReader);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// C wrappers for the WavWriter class.
|
||||
typedef struct rtc_WavWriter rtc_WavWriter;
|
||||
rtc_WavWriter* rtc_WavOpen(const char* filename,
|
||||
int sample_rate,
|
||||
int num_channels);
|
||||
void rtc_WavClose(rtc_WavWriter* wf);
|
||||
void rtc_WavWriteSamples(rtc_WavWriter* wf,
|
||||
const float* samples,
|
||||
size_t num_samples);
|
||||
int rtc_WavSampleRate(const rtc_WavWriter* wf);
|
||||
int rtc_WavNumChannels(const rtc_WavWriter* wf);
|
||||
uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_WAV_FILE_H_
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#ifdef _MSC_VER /* visual c++ */
|
||||
#define ALIGN16_BEG __declspec(align(16))
|
||||
#define ALIGN16_END
|
||||
#else /* gcc or icc */
|
||||
#define ALIGN16_BEG
|
||||
#define ALIGN16_END __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
|
||||
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
|
||||
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
|
||||
extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
|
||||
extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
|
||||
extern const float WebRtcAec_kMinFarendPSD;
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Specifies the interface for the AEC core.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#define FRAME_LEN 80
|
||||
#define PART_LEN 64 // Length of partition
|
||||
#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
|
||||
#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
|
||||
#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands
|
||||
|
||||
typedef float complex_t[2];
|
||||
// For performance reasons, some arrays of complex numbers are replaced by twice
|
||||
// as long arrays of float, all the real parts followed by all the imaginary
|
||||
// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
|
||||
// is better than two arrays (one for the real parts and one for the imaginary
|
||||
// parts) as this other way would require two pointers instead of one and cause
|
||||
// extra register spilling. This also allows the offsets to be calculated at
|
||||
// compile time.
|
||||
|
||||
// Metrics
|
||||
enum {
|
||||
kOffsetLevel = -100
|
||||
};
|
||||
|
||||
typedef struct Stats {
|
||||
float instant;
|
||||
float average;
|
||||
float min;
|
||||
float max;
|
||||
float sum;
|
||||
float hisum;
|
||||
float himean;
|
||||
int counter;
|
||||
int hicounter;
|
||||
} Stats;
|
||||
|
||||
typedef struct AecCore AecCore;
|
||||
|
||||
AecCore* WebRtcAec_CreateAec(); // Returns NULL on error.
|
||||
void WebRtcAec_FreeAec(AecCore* aec);
|
||||
int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
|
||||
void WebRtcAec_InitAec_SSE2(void);
|
||||
#if defined(MIPS_FPU_LE)
|
||||
void WebRtcAec_InitAec_mips(void);
|
||||
#endif
|
||||
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
|
||||
void WebRtcAec_InitAec_neon(void);
|
||||
#endif
|
||||
|
||||
void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
|
||||
void WebRtcAec_ProcessFrames(AecCore* aec,
|
||||
const float* const* nearend,
|
||||
size_t num_bands,
|
||||
size_t num_samples,
|
||||
int knownDelay,
|
||||
float* const* out);
|
||||
|
||||
// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
|
||||
// Returns the number of elements moved, and adjusts |system_delay| by the
|
||||
// corresponding amount in ms.
|
||||
int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
|
||||
|
||||
// Calculates the median, standard deviation and amount of poor values among the
|
||||
// delay estimates aggregated up to the first call to the function. After that
|
||||
// first call the metrics are aggregated and updated every second. With poor
|
||||
// values we mean values that most likely will cause the AEC to perform poorly.
|
||||
// TODO(bjornv): Consider changing tests and tools to handle constant
|
||||
// constant aggregation window throughout the session instead.
|
||||
int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
|
||||
float* fraction_poor_delays);
|
||||
|
||||
// Returns the echo state (1: echo, 0: no echo).
|
||||
int WebRtcAec_echo_state(AecCore* self);
|
||||
|
||||
// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
|
||||
void WebRtcAec_GetEchoStats(AecCore* self,
|
||||
Stats* erl,
|
||||
Stats* erle,
|
||||
Stats* a_nlp);
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
void* WebRtcAec_far_time_buf(AecCore* self);
|
||||
#endif
|
||||
|
||||
// Sets local configuration modes.
|
||||
void WebRtcAec_SetConfigCore(AecCore* self,
|
||||
int nlp_mode,
|
||||
int metrics_mode,
|
||||
int delay_logging);
|
||||
|
||||
// Non-zero enables, zero disables.
|
||||
void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
|
||||
|
||||
// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
|
||||
// enabled and zero if disabled.
|
||||
int WebRtcAec_delay_agnostic_enabled(AecCore* self);
|
||||
|
||||
// Enables or disables extended filter mode. Non-zero enables, zero disables.
|
||||
void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
|
||||
|
||||
// Returns non-zero if extended filter mode is enabled and zero if disabled.
|
||||
int WebRtcAec_extended_filter_enabled(AecCore* self);
|
||||
|
||||
// Returns the current |system_delay|, i.e., the buffered difference between
|
||||
// far-end and near-end.
|
||||
int WebRtcAec_system_delay(AecCore* self);
|
||||
|
||||
// Sets the |system_delay| to |value|. Note that if the value is changed
|
||||
// improperly, there can be a performance regression. So it should be used with
|
||||
// care.
|
||||
void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
|
202
third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
vendored
Normal file
202
third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_core_internal.h
vendored
Normal file
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/common_audio/wav_file.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_common.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Number of partitions for the extended filter mode. The first one is an enum
|
||||
// to be used in array declarations, as it represents the maximum filter length.
|
||||
enum {
|
||||
kExtendedNumPartitions = 32
|
||||
};
|
||||
static const int kNormalNumPartitions = 12;
|
||||
|
||||
// Delay estimator constants, used for logging and delay compensation if
|
||||
// if reported delays are disabled.
|
||||
enum {
|
||||
kLookaheadBlocks = 15
|
||||
};
|
||||
enum {
|
||||
// 500 ms for 16 kHz which is equivalent with the limit of reported delays.
|
||||
kHistorySizeBlocks = 125
|
||||
};
|
||||
|
||||
// Extended filter adaptation parameters.
|
||||
// TODO(ajm): No narrowband tuning yet.
|
||||
static const float kExtendedMu = 0.4f;
|
||||
static const float kExtendedErrorThreshold = 1.0e-6f;
|
||||
|
||||
typedef struct PowerLevel {
|
||||
float sfrsum;
|
||||
int sfrcounter;
|
||||
float framelevel;
|
||||
float frsum;
|
||||
int frcounter;
|
||||
float minlevel;
|
||||
float averagelevel;
|
||||
} PowerLevel;
|
||||
|
||||
struct AecCore {
|
||||
int farBufWritePos, farBufReadPos;
|
||||
|
||||
int knownDelay;
|
||||
int inSamples, outSamples;
|
||||
int delayEstCtr;
|
||||
|
||||
RingBuffer* nearFrBuf;
|
||||
RingBuffer* outFrBuf;
|
||||
|
||||
RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
|
||||
RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
|
||||
|
||||
float dBuf[PART_LEN2]; // nearend
|
||||
float eBuf[PART_LEN2]; // error
|
||||
|
||||
float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend
|
||||
|
||||
float xPow[PART_LEN1];
|
||||
float dPow[PART_LEN1];
|
||||
float dMinPow[PART_LEN1];
|
||||
float dInitMinPow[PART_LEN1];
|
||||
float* noisePow;
|
||||
|
||||
float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer
|
||||
float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft
|
||||
complex_t sde[PART_LEN1]; // cross-psd of nearend and error
|
||||
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
|
||||
// Farend windowed fft buffer.
|
||||
complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
|
||||
|
||||
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd
|
||||
float hNs[PART_LEN1];
|
||||
float hNlFbMin, hNlFbLocalMin;
|
||||
float hNlXdAvgMin;
|
||||
int hNlNewMin, hNlMinCtr;
|
||||
float overDrive, overDriveSm;
|
||||
int nlp_mode;
|
||||
float outBuf[PART_LEN];
|
||||
int delayIdx;
|
||||
|
||||
short stNearState, echoState;
|
||||
short divergeState;
|
||||
|
||||
int xfBufBlockPos;
|
||||
|
||||
RingBuffer* far_buf;
|
||||
RingBuffer* far_buf_windowed;
|
||||
int system_delay; // Current system delay buffered in AEC.
|
||||
|
||||
int mult; // sampling frequency multiple
|
||||
int sampFreq;
|
||||
size_t num_bands;
|
||||
uint32_t seed;
|
||||
|
||||
float normal_mu; // stepsize
|
||||
float normal_error_threshold; // error threshold
|
||||
|
||||
int noiseEstCtr;
|
||||
|
||||
PowerLevel farlevel;
|
||||
PowerLevel nearlevel;
|
||||
PowerLevel linoutlevel;
|
||||
PowerLevel nlpoutlevel;
|
||||
|
||||
int metricsMode;
|
||||
int stateCounter;
|
||||
Stats erl;
|
||||
Stats erle;
|
||||
Stats aNlp;
|
||||
Stats rerl;
|
||||
|
||||
// Quantities to control H band scaling for SWB input
|
||||
int freq_avg_ic; // initial bin for averaging nlp gain
|
||||
int flag_Hband_cn; // for comfort noise
|
||||
float cn_scale_Hband; // scale for comfort noise in H band
|
||||
|
||||
int delay_metrics_delivered;
|
||||
int delay_histogram[kHistorySizeBlocks];
|
||||
int num_delay_values;
|
||||
int delay_median;
|
||||
int delay_std;
|
||||
float fraction_poor_delays;
|
||||
int delay_logging_enabled;
|
||||
void* delay_estimator_farend;
|
||||
void* delay_estimator;
|
||||
// Variables associated with delay correction through signal based delay
|
||||
// estimation feedback.
|
||||
int signal_delay_correction;
|
||||
int previous_delay;
|
||||
int delay_correction_count;
|
||||
int shift_offset;
|
||||
float delay_quality_threshold;
|
||||
int frame_count;
|
||||
|
||||
// 0 = delay agnostic mode (signal based delay correction) disabled.
|
||||
// Otherwise enabled.
|
||||
int delay_agnostic_enabled;
|
||||
// 1 = extended filter mode enabled, 0 = disabled.
|
||||
int extended_filter_enabled;
|
||||
// Runtime selection of number of filter partitions.
|
||||
int num_partitions;
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
// Sequence number of this AEC instance, so that different instances can
|
||||
// choose different dump file names.
|
||||
int instance_index;
|
||||
|
||||
// Number of times we've restarted dumping; used to pick new dump file names
|
||||
// each time.
|
||||
int debug_dump_count;
|
||||
|
||||
RingBuffer* far_time_buf;
|
||||
rtc_WavWriter* farFile;
|
||||
rtc_WavWriter* nearFile;
|
||||
rtc_WavWriter* outFile;
|
||||
rtc_WavWriter* outLinearFile;
|
||||
FILE* e_fft_file;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
|
||||
extern WebRtcAecFilterFar WebRtcAec_FilterFar;
|
||||
typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
|
||||
extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
|
||||
typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]);
|
||||
extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
|
||||
typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]);
|
||||
extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
|
||||
|
||||
typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
complex_t* comfortNoiseHband,
|
||||
const float* noisePow,
|
||||
const float* lambda);
|
||||
extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
|
||||
|
||||
typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd);
|
||||
extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
|
|
@ -0,0 +1,774 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The core AEC algorithm, which is presented with time-aligned signals.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
static const int flagHbandCn = 1; // flag for adding comfort noise in H band
|
||||
extern const float WebRtcAec_weightCurve[65];
|
||||
extern const float WebRtcAec_overDriveCurve[65];
|
||||
|
||||
void WebRtcAec_ComfortNoise_mips(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
complex_t* comfortNoiseHband,
|
||||
const float* noisePow,
|
||||
const float* lambda) {
|
||||
int i, num;
|
||||
float rand[PART_LEN];
|
||||
float noise, noiseAvg, tmp, tmpAvg;
|
||||
int16_t randW16[PART_LEN];
|
||||
complex_t u[PART_LEN1];
|
||||
|
||||
const float pi2 = 6.28318530717959f;
|
||||
const float pi2t = pi2 / 32768;
|
||||
|
||||
// Generate a uniform random array on [0 1]
|
||||
WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
|
||||
|
||||
int16_t* randWptr = randW16;
|
||||
float randTemp, randTemp2, randTemp3, randTemp4;
|
||||
int32_t tmp1s, tmp2s, tmp3s, tmp4s;
|
||||
|
||||
for (i = 0; i < PART_LEN; i+=4) {
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"lh %[tmp1s], 0(%[randWptr]) \n\t"
|
||||
"lh %[tmp2s], 2(%[randWptr]) \n\t"
|
||||
"lh %[tmp3s], 4(%[randWptr]) \n\t"
|
||||
"lh %[tmp4s], 6(%[randWptr]) \n\t"
|
||||
"mtc1 %[tmp1s], %[randTemp] \n\t"
|
||||
"mtc1 %[tmp2s], %[randTemp2] \n\t"
|
||||
"mtc1 %[tmp3s], %[randTemp3] \n\t"
|
||||
"mtc1 %[tmp4s], %[randTemp4] \n\t"
|
||||
"cvt.s.w %[randTemp], %[randTemp] \n\t"
|
||||
"cvt.s.w %[randTemp2], %[randTemp2] \n\t"
|
||||
"cvt.s.w %[randTemp3], %[randTemp3] \n\t"
|
||||
"cvt.s.w %[randTemp4], %[randTemp4] \n\t"
|
||||
"addiu %[randWptr], %[randWptr], 8 \n\t"
|
||||
"mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
|
||||
"mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
|
||||
"mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
|
||||
"mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
|
||||
".set pop \n\t"
|
||||
: [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
|
||||
[randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
|
||||
[randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
|
||||
[tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
|
||||
[tmp4s] "=&r" (tmp4s)
|
||||
: [pi2t] "f" (pi2t)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
u[i+1][0] = cosf(randTemp);
|
||||
u[i+1][1] = sinf(randTemp);
|
||||
u[i+2][0] = cosf(randTemp2);
|
||||
u[i+2][1] = sinf(randTemp2);
|
||||
u[i+3][0] = cosf(randTemp3);
|
||||
u[i+3][1] = sinf(randTemp3);
|
||||
u[i+4][0] = cosf(randTemp4);
|
||||
u[i+4][1] = sinf(randTemp4);
|
||||
}
|
||||
|
||||
// Reject LF noise
|
||||
float* u_ptr = &u[1][0];
|
||||
float noise2, noise3, noise4;
|
||||
float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
|
||||
|
||||
u[0][0] = 0;
|
||||
u[0][1] = 0;
|
||||
for (i = 1; i < PART_LEN1; i+=4) {
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"lwc1 %[noise], 4(%[noisePow]) \n\t"
|
||||
"lwc1 %[noise2], 8(%[noisePow]) \n\t"
|
||||
"lwc1 %[noise3], 12(%[noisePow]) \n\t"
|
||||
"lwc1 %[noise4], 16(%[noisePow]) \n\t"
|
||||
"sqrt.s %[noise], %[noise] \n\t"
|
||||
"sqrt.s %[noise2], %[noise2] \n\t"
|
||||
"sqrt.s %[noise3], %[noise3] \n\t"
|
||||
"sqrt.s %[noise4], %[noise4] \n\t"
|
||||
"lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
|
||||
"addiu %[noisePow], %[noisePow], 16 \n\t"
|
||||
"mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
|
||||
"mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
|
||||
"mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
|
||||
"mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
|
||||
"mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
|
||||
"swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
|
||||
"mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
|
||||
"mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
|
||||
"neg.s %[tmp2f] \n\t"
|
||||
"neg.s %[tmp4f] \n\t"
|
||||
"neg.s %[tmp6f] \n\t"
|
||||
"neg.s %[tmp8f] \n\t"
|
||||
"swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
|
||||
"swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
|
||||
"addiu %[u_ptr], %[u_ptr], 32 \n\t"
|
||||
".set pop \n\t"
|
||||
: [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
|
||||
[noise] "=&f" (noise), [noise2] "=&f" (noise2),
|
||||
[noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
|
||||
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
|
||||
[tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
|
||||
[tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
|
||||
[tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
u[PART_LEN][1] = 0;
|
||||
noisePow -= PART_LEN;
|
||||
|
||||
u_ptr = &u[0][0];
|
||||
float* u_ptr_end = &u[PART_LEN][0];
|
||||
float* efw_ptr_0 = &efw[0][0];
|
||||
float* efw_ptr_1 = &efw[1][0];
|
||||
float tmp9f, tmp10f;
|
||||
const float tmp1c = 1.0;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[tmp1f], 0(%[lambda]) \n\t"
|
||||
"lwc1 %[tmp6f], 4(%[lambda]) \n\t"
|
||||
"addiu %[lambda], %[lambda], 8 \n\t"
|
||||
"c.lt.s %[tmp1f], %[tmp1c] \n\t"
|
||||
"bc1f 4f \n\t"
|
||||
" nop \n\t"
|
||||
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
|
||||
"bc1f 3f \n\t"
|
||||
" nop \n\t"
|
||||
"2: \n\t"
|
||||
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
|
||||
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
|
||||
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
|
||||
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
|
||||
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
|
||||
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
|
||||
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
|
||||
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
|
||||
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
|
||||
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
|
||||
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
|
||||
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
|
||||
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
|
||||
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
|
||||
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
|
||||
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
|
||||
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
|
||||
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
|
||||
"swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
|
||||
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
|
||||
"b 5f \n\t"
|
||||
" swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
|
||||
"3: \n\t"
|
||||
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
|
||||
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
|
||||
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
|
||||
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
|
||||
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
|
||||
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
|
||||
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
|
||||
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
|
||||
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
|
||||
"b 5f \n\t"
|
||||
" swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
|
||||
"4: \n\t"
|
||||
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
|
||||
"bc1f 5f \n\t"
|
||||
" nop \n\t"
|
||||
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
|
||||
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
|
||||
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
|
||||
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
|
||||
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
|
||||
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
|
||||
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
|
||||
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
|
||||
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
|
||||
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
|
||||
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
|
||||
"swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
|
||||
"5: \n\t"
|
||||
"addiu %[u_ptr], %[u_ptr], 16 \n\t"
|
||||
"addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
|
||||
"bne %[u_ptr], %[u_ptr_end], 1b \n\t"
|
||||
" addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
|
||||
".set pop \n\t"
|
||||
: [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
|
||||
[efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
|
||||
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
|
||||
[tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
|
||||
[tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
|
||||
[tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
|
||||
: [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
lambda -= PART_LEN;
|
||||
tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
|
||||
//tmp = 1 - lambda[i];
|
||||
efw[0][PART_LEN] += tmp * u[PART_LEN][0];
|
||||
efw[1][PART_LEN] += tmp * u[PART_LEN][1];
|
||||
|
||||
// For H band comfort noise
|
||||
// TODO: don't compute noise and "tmp" twice. Use the previous results.
|
||||
noiseAvg = 0.0;
|
||||
tmpAvg = 0.0;
|
||||
num = 0;
|
||||
if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
rand[i] = ((float)randW16[i]) / 32768;
|
||||
}
|
||||
|
||||
// average noise scale
|
||||
// average over second half of freq spectrum (i.e., 4->8khz)
|
||||
// TODO: we shouldn't need num. We know how many elements we're summing.
|
||||
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
|
||||
num++;
|
||||
noiseAvg += sqrtf(noisePow[i]);
|
||||
}
|
||||
noiseAvg /= (float)num;
|
||||
|
||||
// average nlp scale
|
||||
// average over second half of freq spectrum (i.e., 4->8khz)
|
||||
// TODO: we shouldn't need num. We know how many elements we're summing.
|
||||
num = 0;
|
||||
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
|
||||
num++;
|
||||
tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
|
||||
}
|
||||
tmpAvg /= (float)num;
|
||||
|
||||
// Use average noise for H band
|
||||
// TODO: we should probably have a new random vector here.
|
||||
// Reject LF noise
|
||||
u[0][0] = 0;
|
||||
u[0][1] = 0;
|
||||
for (i = 1; i < PART_LEN1; i++) {
|
||||
tmp = pi2 * rand[i - 1];
|
||||
|
||||
// Use average noise for H band
|
||||
u[i][0] = noiseAvg * (float)cos(tmp);
|
||||
u[i][1] = -noiseAvg * (float)sin(tmp);
|
||||
}
|
||||
u[PART_LEN][1] = 0;
|
||||
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
// Use average NLP weight for H band
|
||||
comfortNoiseHband[i][0] = tmpAvg * u[i][0];
|
||||
comfortNoiseHband[i][1] = tmpAvg * u[i][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
|
||||
xPos -= aec->num_partitions * (PART_LEN1);
|
||||
}
|
||||
float* yf0 = yf[0];
|
||||
float* yf1 = yf[1];
|
||||
float* aRe = aec->xfBuf[0] + xPos;
|
||||
float* aIm = aec->xfBuf[1] + xPos;
|
||||
float* bRe = aec->wfBuf[0] + pos;
|
||||
float* bIm = aec->wfBuf[1] + pos;
|
||||
float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
|
||||
int len = PART_LEN1 >> 1;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[bRe]) \n\t"
|
||||
"lwc1 %[f2], 0(%[bIm]) \n\t"
|
||||
"lwc1 %[f3], 0(%[aIm]) \n\t"
|
||||
"lwc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"lwc1 %[f5], 4(%[bRe]) \n\t"
|
||||
"lwc1 %[f6], 4(%[bIm]) \n\t"
|
||||
"mul.s %[f8], %[f0], %[f1] \n\t"
|
||||
"mul.s %[f0], %[f0], %[f2] \n\t"
|
||||
"mul.s %[f9], %[f4], %[f5] \n\t"
|
||||
"mul.s %[f4], %[f4], %[f6] \n\t"
|
||||
"lwc1 %[f7], 4(%[aIm]) \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[f12], %[f2], %[f3] \n\t"
|
||||
"mul.s %[f1], %[f3], %[f1] \n\t"
|
||||
"mul.s %[f11], %[f6], %[f7] \n\t"
|
||||
"addiu %[aRe], %[aRe], 8 \n\t"
|
||||
"addiu %[aIm], %[aIm], 8 \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"sub.s %[f8], %[f8], %[f12] \n\t"
|
||||
"mul.s %[f12], %[f7], %[f5] \n\t"
|
||||
"lwc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"add.s %[f1], %[f0], %[f1] \n\t"
|
||||
"lwc1 %[f3], 0(%[yf1]) \n\t"
|
||||
"sub.s %[f9], %[f9], %[f11] \n\t"
|
||||
"lwc1 %[f6], 4(%[yf0]) \n\t"
|
||||
"add.s %[f4], %[f4], %[f12] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"addiu %[aRe], %[aRe], 8 \n\t"
|
||||
"addiu %[aIm], %[aIm], 8 \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
|
||||
"lwc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
|
||||
"lwc1 %[f3], 0(%[yf1]) \n\t"
|
||||
"nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
|
||||
"lwc1 %[f6], 4(%[yf0]) \n\t"
|
||||
"madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"lwc1 %[f5], 4(%[yf1]) \n\t"
|
||||
"add.s %[f2], %[f2], %[f8] \n\t"
|
||||
"addiu %[bRe], %[bRe], 8 \n\t"
|
||||
"addiu %[bIm], %[bIm], 8 \n\t"
|
||||
"add.s %[f3], %[f3], %[f1] \n\t"
|
||||
"add.s %[f6], %[f6], %[f9] \n\t"
|
||||
"add.s %[f5], %[f5], %[f4] \n\t"
|
||||
"swc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"swc1 %[f3], 0(%[yf1]) \n\t"
|
||||
"swc1 %[f6], 4(%[yf0]) \n\t"
|
||||
"swc1 %[f5], 4(%[yf1]) \n\t"
|
||||
"addiu %[yf0], %[yf0], 8 \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[yf1], %[yf1], 8 \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[bRe]) \n\t"
|
||||
"lwc1 %[f2], 0(%[bIm]) \n\t"
|
||||
"lwc1 %[f3], 0(%[aIm]) \n\t"
|
||||
"mul.s %[f8], %[f0], %[f1] \n\t"
|
||||
"mul.s %[f0], %[f0], %[f2] \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[f12], %[f2], %[f3] \n\t"
|
||||
"mul.s %[f1], %[f3], %[f1] \n\t"
|
||||
"sub.s %[f8], %[f8], %[f12] \n\t"
|
||||
"lwc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"add.s %[f1], %[f0], %[f1] \n\t"
|
||||
"lwc1 %[f3], 0(%[yf1]) \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
|
||||
"lwc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
|
||||
"lwc1 %[f3], 0(%[yf1]) \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"add.s %[f2], %[f2], %[f8] \n\t"
|
||||
"add.s %[f3], %[f3], %[f1] \n\t"
|
||||
"swc1 %[f2], 0(%[yf0]) \n\t"
|
||||
"swc1 %[f3], 0(%[yf1]) \n\t"
|
||||
".set pop \n\t"
|
||||
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
|
||||
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
|
||||
[f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
|
||||
[f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
|
||||
[f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
|
||||
[aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
|
||||
[yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
|
||||
int pos;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
|
||||
xPos -= aec->num_partitions * PART_LEN1;
|
||||
}
|
||||
|
||||
pos = i * PART_LEN1;
|
||||
float* aRe = aec->xfBuf[0] + xPos;
|
||||
float* aIm = aec->xfBuf[1] + xPos;
|
||||
float* bRe = ef[0];
|
||||
float* bIm = ef[1];
|
||||
float* fft_tmp;
|
||||
|
||||
float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
|
||||
int len = PART_LEN >> 1;
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[fft_tmp], %[fft], 0 \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[bRe]) \n\t"
|
||||
"lwc1 %[f2], 0(%[bIm]) \n\t"
|
||||
"lwc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"lwc1 %[f5], 4(%[bRe]) \n\t"
|
||||
"lwc1 %[f6], 4(%[bIm]) \n\t"
|
||||
"addiu %[aRe], %[aRe], 8 \n\t"
|
||||
"addiu %[bRe], %[bRe], 8 \n\t"
|
||||
"mul.s %[f8], %[f0], %[f1] \n\t"
|
||||
"mul.s %[f0], %[f0], %[f2] \n\t"
|
||||
"lwc1 %[f3], 0(%[aIm]) \n\t"
|
||||
"mul.s %[f9], %[f4], %[f5] \n\t"
|
||||
"lwc1 %[f7], 4(%[aIm]) \n\t"
|
||||
"mul.s %[f4], %[f4], %[f6] \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[f10], %[f3], %[f2] \n\t"
|
||||
"mul.s %[f1], %[f3], %[f1] \n\t"
|
||||
"mul.s %[f11], %[f7], %[f6] \n\t"
|
||||
"mul.s %[f5], %[f7], %[f5] \n\t"
|
||||
"addiu %[aIm], %[aIm], 8 \n\t"
|
||||
"addiu %[bIm], %[bIm], 8 \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"add.s %[f8], %[f8], %[f10] \n\t"
|
||||
"sub.s %[f1], %[f0], %[f1] \n\t"
|
||||
"add.s %[f9], %[f9], %[f11] \n\t"
|
||||
"sub.s %[f5], %[f4], %[f5] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"addiu %[aIm], %[aIm], 8 \n\t"
|
||||
"addiu %[bIm], %[bIm], 8 \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
|
||||
"nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
|
||||
"madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
|
||||
"nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[f8], 0(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f1], 4(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f9], 8(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f5], 12(%[fft_tmp]) \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[bRe]) \n\t"
|
||||
"lwc1 %[f2], 0(%[bIm]) \n\t"
|
||||
"lwc1 %[f3], 0(%[aIm]) \n\t"
|
||||
"mul.s %[f8], %[f0], %[f1] \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[f10], %[f3], %[f2] \n\t"
|
||||
"add.s %[f8], %[f8], %[f10] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[f8], 4(%[fft]) \n\t"
|
||||
".set pop \n\t"
|
||||
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
|
||||
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
|
||||
[f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
|
||||
[f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
|
||||
[f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
|
||||
[bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
|
||||
[len] "+r" (len)
|
||||
: [fft] "r" (fft)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
aec_rdft_inverse_128(fft);
|
||||
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
|
||||
|
||||
// fft scaling
|
||||
{
|
||||
float scale = 2.0f / PART_LEN2;
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[fft_tmp], %[fft], 0 \n\t"
|
||||
"addiu %[len], $zero, 8 \n\t"
|
||||
"1: \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"lwc1 %[f0], 0(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f1], 4(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f2], 8(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f3], 12(%[fft_tmp]) \n\t"
|
||||
"mul.s %[f0], %[f0], %[scale] \n\t"
|
||||
"mul.s %[f1], %[f1], %[scale] \n\t"
|
||||
"mul.s %[f2], %[f2], %[scale] \n\t"
|
||||
"mul.s %[f3], %[f3], %[scale] \n\t"
|
||||
"lwc1 %[f4], 16(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f5], 20(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f6], 24(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f7], 28(%[fft_tmp]) \n\t"
|
||||
"mul.s %[f4], %[f4], %[scale] \n\t"
|
||||
"mul.s %[f5], %[f5], %[scale] \n\t"
|
||||
"mul.s %[f6], %[f6], %[scale] \n\t"
|
||||
"mul.s %[f7], %[f7], %[scale] \n\t"
|
||||
"swc1 %[f0], 0(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f1], 4(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f2], 8(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f3], 12(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f4], 16(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f5], 20(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f6], 24(%[fft_tmp]) \n\t"
|
||||
"swc1 %[f7], 28(%[fft_tmp]) \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
|
||||
".set pop \n\t"
|
||||
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
|
||||
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
|
||||
[f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
|
||||
[fft_tmp] "=&r" (fft_tmp)
|
||||
: [scale] "f" (scale), [fft] "r" (fft)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
aec_rdft_forward_128(fft);
|
||||
aRe = aec->wfBuf[0] + pos;
|
||||
aIm = aec->wfBuf[1] + pos;
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"addiu %[fft_tmp], %[fft], 0 \n\t"
|
||||
"addiu %[len], $zero, 31 \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f2], 256(%[aRe]) \n\t"
|
||||
"lwc1 %[f3], 4(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"lwc1 %[f5], 8(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f6], 4(%[aIm]) \n\t"
|
||||
"lwc1 %[f7], 12(%[fft_tmp]) \n\t"
|
||||
"add.s %[f0], %[f0], %[f1] \n\t"
|
||||
"add.s %[f2], %[f2], %[f3] \n\t"
|
||||
"add.s %[f4], %[f4], %[f5] \n\t"
|
||||
"add.s %[f6], %[f6], %[f7] \n\t"
|
||||
"addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
|
||||
"swc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"swc1 %[f2], 256(%[aRe]) \n\t"
|
||||
"swc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"addiu %[aRe], %[aRe], 8 \n\t"
|
||||
"swc1 %[f6], 4(%[aIm]) \n\t"
|
||||
"addiu %[aIm], %[aIm], 8 \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"lwc1 %[f1], 0(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f2], 0(%[aIm]) \n\t"
|
||||
"lwc1 %[f3], 4(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"lwc1 %[f5], 8(%[fft_tmp]) \n\t"
|
||||
"lwc1 %[f6], 4(%[aIm]) \n\t"
|
||||
"lwc1 %[f7], 12(%[fft_tmp]) \n\t"
|
||||
"add.s %[f0], %[f0], %[f1] \n\t"
|
||||
"add.s %[f2], %[f2], %[f3] \n\t"
|
||||
"add.s %[f4], %[f4], %[f5] \n\t"
|
||||
"add.s %[f6], %[f6], %[f7] \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
|
||||
"swc1 %[f0], 0(%[aRe]) \n\t"
|
||||
"swc1 %[f2], 0(%[aIm]) \n\t"
|
||||
"swc1 %[f4], 4(%[aRe]) \n\t"
|
||||
"addiu %[aRe], %[aRe], 8 \n\t"
|
||||
"swc1 %[f6], 4(%[aIm]) \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[aIm], %[aIm], 8 \n\t"
|
||||
".set pop \n\t"
|
||||
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
|
||||
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
|
||||
[f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
|
||||
[fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
|
||||
: [fft] "r" (fft)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]) {
|
||||
int i;
|
||||
const float one = 1.0;
|
||||
float* p_hNl;
|
||||
float* p_efw0;
|
||||
float* p_efw1;
|
||||
float* p_WebRtcAec_wC;
|
||||
float temp1, temp2, temp3, temp4;
|
||||
|
||||
p_hNl = &hNl[0];
|
||||
p_efw0 = &efw[0][0];
|
||||
p_efw1 = &efw[1][0];
|
||||
p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
|
||||
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
// Weight subbands
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"lwc1 %[temp1], 0(%[p_hNl]) \n\t"
|
||||
"lwc1 %[temp2], 0(%[p_wC]) \n\t"
|
||||
"c.lt.s %[hNlFb], %[temp1] \n\t"
|
||||
"bc1f 1f \n\t"
|
||||
" mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
|
||||
"sub.s %[temp4], %[one], %[temp2] \n\t"
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
"mul.s %[temp1], %[temp1], %[temp4] \n\t"
|
||||
"add.s %[temp1], %[temp3], %[temp1] \n\t"
|
||||
#else // #if !defined(MIPS32_R2_LE)
|
||||
"madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
|
||||
#endif // #if !defined(MIPS32_R2_LE)
|
||||
"swc1 %[temp1], 0(%[p_hNl]) \n\t"
|
||||
"1: \n\t"
|
||||
"addiu %[p_wC], %[p_wC], 4 \n\t"
|
||||
".set pop \n\t"
|
||||
: [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
|
||||
[temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
|
||||
: [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
|
||||
|
||||
__asm __volatile (
|
||||
"lwc1 %[temp1], 0(%[p_hNl]) \n\t"
|
||||
"lwc1 %[temp3], 0(%[p_efw1]) \n\t"
|
||||
"lwc1 %[temp2], 0(%[p_efw0]) \n\t"
|
||||
"addiu %[p_hNl], %[p_hNl], 4 \n\t"
|
||||
"mul.s %[temp3], %[temp3], %[temp1] \n\t"
|
||||
"mul.s %[temp2], %[temp2], %[temp1] \n\t"
|
||||
"addiu %[p_efw0], %[p_efw0], 4 \n\t"
|
||||
"addiu %[p_efw1], %[p_efw1], 4 \n\t"
|
||||
"neg.s %[temp4], %[temp3] \n\t"
|
||||
"swc1 %[temp2], -4(%[p_efw0]) \n\t"
|
||||
"swc1 %[temp4], -4(%[p_efw1]) \n\t"
|
||||
: [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
|
||||
[temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
|
||||
[p_hNl] "+r" (p_hNl)
|
||||
:
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
int len = (PART_LEN1);
|
||||
float* ef0 = ef[0];
|
||||
float* ef1 = ef[1];
|
||||
float* xPow = aec->xPow;
|
||||
float fac1 = 1e-10f;
|
||||
float err_th2 = error_threshold * error_threshold;
|
||||
float f0, f1, f2;
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
float f3;
|
||||
#endif
|
||||
|
||||
__asm __volatile (
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[f0], 0(%[xPow]) \n\t"
|
||||
"lwc1 %[f1], 0(%[ef0]) \n\t"
|
||||
"lwc1 %[f2], 0(%[ef1]) \n\t"
|
||||
"add.s %[f0], %[f0], %[fac1] \n\t"
|
||||
"div.s %[f1], %[f1], %[f0] \n\t"
|
||||
"div.s %[f2], %[f2], %[f0] \n\t"
|
||||
"mul.s %[f0], %[f1], %[f1] \n\t"
|
||||
#if defined(MIPS32_R2_LE)
|
||||
"madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
|
||||
#else
|
||||
"mul.s %[f3], %[f2], %[f2] \n\t"
|
||||
"add.s %[f0], %[f0], %[f3] \n\t"
|
||||
#endif
|
||||
"c.le.s %[f0], %[err_th2] \n\t"
|
||||
"nop \n\t"
|
||||
"bc1t 2f \n\t"
|
||||
" nop \n\t"
|
||||
"sqrt.s %[f0], %[f0] \n\t"
|
||||
"add.s %[f0], %[f0], %[fac1] \n\t"
|
||||
"div.s %[f0], %[err_th], %[f0] \n\t"
|
||||
"mul.s %[f1], %[f1], %[f0] \n\t"
|
||||
"mul.s %[f2], %[f2], %[f0] \n\t"
|
||||
"2: \n\t"
|
||||
"mul.s %[f1], %[f1], %[mu] \n\t"
|
||||
"mul.s %[f2], %[f2], %[mu] \n\t"
|
||||
"swc1 %[f1], 0(%[ef0]) \n\t"
|
||||
"swc1 %[f2], 0(%[ef1]) \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"addiu %[xPow], %[xPow], 4 \n\t"
|
||||
"addiu %[ef0], %[ef0], 4 \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[ef1], %[ef1], 4 \n\t"
|
||||
".set pop \n\t"
|
||||
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
[f3] "=&f" (f3),
|
||||
#endif
|
||||
[xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
|
||||
[len] "+r" (len)
|
||||
: [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
|
||||
[err_th] "f" (error_threshold)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void WebRtcAec_InitAec_mips(void) {
|
||||
WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
|
||||
WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
|
||||
WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
|
||||
WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
|
||||
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
|
||||
}
|
||||
|
|
@ -0,0 +1,736 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The core AEC algorithm, neon version of speed-critical functions.
|
||||
*
|
||||
* Based on aec_core_sse2.c.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <math.h>
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_common.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
enum { kShiftExponentIntoTopMantissa = 8 };
|
||||
enum { kFloatExponentShift = 23 };
|
||||
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bRe - aIm * bIm;
|
||||
}
|
||||
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bIm + aIm * bRe;
|
||||
}
|
||||
|
||||
static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
int i;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
int j;
|
||||
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
xPos -= num_partitions * PART_LEN1;
|
||||
}
|
||||
|
||||
// vectorized code (four at once)
|
||||
for (j = 0; j + 3 < PART_LEN1; j += 4) {
|
||||
const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
|
||||
const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
|
||||
const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
|
||||
const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
|
||||
const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
|
||||
const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
|
||||
const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
|
||||
const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
|
||||
const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
|
||||
const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
|
||||
const float32x4_t g = vaddq_f32(yf_re, e);
|
||||
const float32x4_t h = vaddq_f32(yf_im, f);
|
||||
vst1q_f32(&yf[0][j], g);
|
||||
vst1q_f32(&yf[1][j], h);
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; j < PART_LEN1; j++) {
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
|
||||
#if !defined (WEBRTC_ARCH_ARM64)
|
||||
static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
|
||||
int i;
|
||||
float32x4_t x = vrecpeq_f32(b);
|
||||
// from arm documentation
|
||||
// The Newton-Raphson iteration:
|
||||
// x[n+1] = x[n] * (2 - d * x[n])
|
||||
// converges to (1/d) if x0 is the result of VRECPE applied to d.
|
||||
//
|
||||
// Note: The precision did not improve after 2 iterations.
|
||||
for (i = 0; i < 2; i++) {
|
||||
x = vmulq_f32(vrecpsq_f32(b, x), x);
|
||||
}
|
||||
// a/b = a*(1/b)
|
||||
return vmulq_f32(a, x);
|
||||
}
|
||||
|
||||
static float32x4_t vsqrtq_f32(float32x4_t s) {
|
||||
int i;
|
||||
float32x4_t x = vrsqrteq_f32(s);
|
||||
|
||||
// Code to handle sqrt(0).
|
||||
// If the input to sqrtf() is zero, a zero will be returned.
|
||||
// If the input to vrsqrteq_f32() is zero, positive infinity is returned.
|
||||
const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
|
||||
// check for divide by zero
|
||||
const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
|
||||
// zero out the positive infinity results
|
||||
x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
|
||||
vreinterpretq_u32_f32(x)));
|
||||
// from arm documentation
|
||||
// The Newton-Raphson iteration:
|
||||
// x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
|
||||
// converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
|
||||
//
|
||||
// Note: The precision did not improve after 2 iterations.
|
||||
for (i = 0; i < 2; i++) {
|
||||
x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
|
||||
}
|
||||
// sqrt(s) = s * 1/sqrt(s)
|
||||
return vmulq_f32(s, x);;
|
||||
}
|
||||
#endif // WEBRTC_ARCH_ARM64
|
||||
|
||||
static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled ?
|
||||
kExtendedErrorThreshold : aec->normal_error_threshold;
|
||||
const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
|
||||
const float32x4_t kMu = vmovq_n_f32(mu);
|
||||
const float32x4_t kThresh = vmovq_n_f32(error_threshold);
|
||||
int i;
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
|
||||
const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
|
||||
const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
|
||||
const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
|
||||
float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
|
||||
float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
|
||||
const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
|
||||
const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
|
||||
const float32x4_t absEf = vsqrtq_f32(ef_sum2);
|
||||
const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
|
||||
const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
|
||||
const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
|
||||
uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
|
||||
uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
|
||||
uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
|
||||
vreinterpretq_u32_f32(ef_re));
|
||||
uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
|
||||
vreinterpretq_u32_f32(ef_im));
|
||||
ef_re_if = vandq_u32(bigger, ef_re_if);
|
||||
ef_im_if = vandq_u32(bigger, ef_im_if);
|
||||
ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
|
||||
ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
|
||||
ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
|
||||
ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
|
||||
vst1q_f32(&ef[0][i], ef_re);
|
||||
vst1q_f32(&ef[1][i], ef_im);
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
float abs_ef;
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[1][i] /= (aec->xPow[i] + 1e-10f);
|
||||
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
|
||||
|
||||
if (abs_ef > error_threshold) {
|
||||
abs_ef = error_threshold / (abs_ef + 1e-10f);
|
||||
ef[0][i] *= abs_ef;
|
||||
ef[1][i] *= abs_ef;
|
||||
}
|
||||
|
||||
// Stepsize factor
|
||||
ef[0][i] *= mu;
|
||||
ef[1][i] *= mu;
|
||||
}
|
||||
}
|
||||
|
||||
static void FilterAdaptationNEON(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]) {
|
||||
int i;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
|
||||
int pos = i * PART_LEN1;
|
||||
int j;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
xPos -= num_partitions * PART_LEN1;
|
||||
}
|
||||
|
||||
// Process the whole array...
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
// Load xfBuf and ef.
|
||||
const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
|
||||
const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
|
||||
const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
|
||||
const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
|
||||
// Calculate the product of conjugate(xfBuf) by ef.
|
||||
// re(conjugate(a) * b) = aRe * bRe + aIm * bIm
|
||||
// im(conjugate(a) * b)= aRe * bIm - aIm * bRe
|
||||
const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
|
||||
const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
|
||||
const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
|
||||
const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
|
||||
// Interleave real and imaginary parts.
|
||||
const float32x4x2_t g_n_h = vzipq_f32(e, f);
|
||||
// Store
|
||||
vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
|
||||
vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
|
||||
}
|
||||
// ... and fixup the first imaginary entry.
|
||||
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
|
||||
-aec->xfBuf[1][xPos + PART_LEN],
|
||||
ef[0][PART_LEN],
|
||||
ef[1][PART_LEN]);
|
||||
|
||||
aec_rdft_inverse_128(fft);
|
||||
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
|
||||
|
||||
// fft scaling
|
||||
{
|
||||
const float scale = 2.0f / PART_LEN2;
|
||||
const float32x4_t scale_ps = vmovq_n_f32(scale);
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
const float32x4_t fft_ps = vld1q_f32(&fft[j]);
|
||||
const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
|
||||
vst1q_f32(&fft[j], fft_scale);
|
||||
}
|
||||
}
|
||||
aec_rdft_forward_128(fft);
|
||||
|
||||
{
|
||||
const float wt1 = aec->wfBuf[1][pos];
|
||||
aec->wfBuf[0][pos + PART_LEN] += fft[1];
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
|
||||
float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
|
||||
const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
|
||||
const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
|
||||
const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
|
||||
wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
|
||||
wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
|
||||
|
||||
vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
|
||||
vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
|
||||
}
|
||||
aec->wfBuf[1][pos] = wt1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
|
||||
// a^b = exp2(b * log2(a))
|
||||
// exp2(x) and log2(x) are calculated using polynomial approximations.
|
||||
float32x4_t log2_a, b_log2_a, a_exp_b;
|
||||
|
||||
// Calculate log2(x), x = a.
|
||||
{
|
||||
// To calculate log2(x), we decompose x like this:
|
||||
// x = y * 2^n
|
||||
// n is an integer
|
||||
// y is in the [1.0, 2.0) range
|
||||
//
|
||||
// log2(x) = log2(y) + n
|
||||
// n can be evaluated by playing with float representation.
|
||||
// log2(y) in a small range can be approximated, this code uses an order
|
||||
// five polynomial approximation. The coefficients have been
|
||||
// estimated with the Remez algorithm and the resulting
|
||||
// polynomial has a maximum relative error of 0.00086%.
|
||||
|
||||
// Compute n.
|
||||
// This is done by masking the exponent, shifting it into the top bit of
|
||||
// the mantissa, putting eight into the biased exponent (to shift/
|
||||
// compensate the fact that the exponent has been shifted in the top/
|
||||
// fractional part and finally getting rid of the implicit leading one
|
||||
// from the mantissa by substracting it out.
|
||||
const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
|
||||
const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
|
||||
const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
|
||||
const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
|
||||
vec_float_exponent_mask);
|
||||
const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
|
||||
const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
|
||||
const float32x4_t n =
|
||||
vsubq_f32(vreinterpretq_f32_u32(n_0),
|
||||
vreinterpretq_f32_u32(vec_implicit_leading_one));
|
||||
// Compute y.
|
||||
const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
|
||||
const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
|
||||
const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
|
||||
vec_mantissa_mask);
|
||||
const float32x4_t y =
|
||||
vreinterpretq_f32_u32(vorrq_u32(mantissa,
|
||||
vec_zero_biased_exponent_is_one));
|
||||
// Approximate log2(y) ~= (y - 1) * pol5(y).
|
||||
// pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
|
||||
const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
|
||||
const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
|
||||
const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
|
||||
const float32x4_t C2 = vdupq_n_f32(2.5988452f);
|
||||
const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
|
||||
const float32x4_t C0 = vdupq_n_f32(3.1157899f);
|
||||
float32x4_t pol5_y = C5;
|
||||
pol5_y = vmlaq_f32(C4, y, pol5_y);
|
||||
pol5_y = vmlaq_f32(C3, y, pol5_y);
|
||||
pol5_y = vmlaq_f32(C2, y, pol5_y);
|
||||
pol5_y = vmlaq_f32(C1, y, pol5_y);
|
||||
pol5_y = vmlaq_f32(C0, y, pol5_y);
|
||||
const float32x4_t y_minus_one =
|
||||
vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
|
||||
const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
|
||||
|
||||
// Combine parts.
|
||||
log2_a = vaddq_f32(n, log2_y);
|
||||
}
|
||||
|
||||
// b * log2(a)
|
||||
b_log2_a = vmulq_f32(b, log2_a);
|
||||
|
||||
// Calculate exp2(x), x = b * log2(a).
|
||||
{
|
||||
// To calculate 2^x, we decompose x like this:
|
||||
// x = n + y
|
||||
// n is an integer, the value of x - 0.5 rounded down, therefore
|
||||
// y is in the [0.5, 1.5) range
|
||||
//
|
||||
// 2^x = 2^n * 2^y
|
||||
// 2^n can be evaluated by playing with float representation.
|
||||
// 2^y in a small range can be approximated, this code uses an order two
|
||||
// polynomial approximation. The coefficients have been estimated
|
||||
// with the Remez algorithm and the resulting polynomial has a
|
||||
// maximum relative error of 0.17%.
|
||||
// To avoid over/underflow, we reduce the range of input to ]-127, 129].
|
||||
const float32x4_t max_input = vdupq_n_f32(129.f);
|
||||
const float32x4_t min_input = vdupq_n_f32(-126.99999f);
|
||||
const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
|
||||
const float32x4_t x_max = vmaxq_f32(x_min, min_input);
|
||||
// Compute n.
|
||||
const float32x4_t half = vdupq_n_f32(0.5f);
|
||||
const float32x4_t x_minus_half = vsubq_f32(x_max, half);
|
||||
const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
|
||||
|
||||
// Compute 2^n.
|
||||
const int32x4_t float_exponent_bias = vdupq_n_s32(127);
|
||||
const int32x4_t two_n_exponent =
|
||||
vaddq_s32(x_minus_half_floor, float_exponent_bias);
|
||||
const float32x4_t two_n =
|
||||
vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
|
||||
// Compute y.
|
||||
const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
|
||||
|
||||
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
|
||||
const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
|
||||
const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
|
||||
const float32x4_t C0 = vdupq_n_f32(1.0017247f);
|
||||
float32x4_t exp2_y = C2;
|
||||
exp2_y = vmlaq_f32(C1, y, exp2_y);
|
||||
exp2_y = vmlaq_f32(C0, y, exp2_y);
|
||||
|
||||
// Combine parts.
|
||||
a_exp_b = vmulq_f32(exp2_y, two_n);
|
||||
}
|
||||
|
||||
return a_exp_b;
|
||||
}
|
||||
|
||||
static void OverdriveAndSuppressNEON(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]) {
|
||||
int i;
|
||||
const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
|
||||
const float32x4_t vec_one = vdupq_n_f32(1.0f);
|
||||
const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
|
||||
const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
|
||||
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
// Weight subbands
|
||||
float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
|
||||
const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
|
||||
const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
|
||||
const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
|
||||
vec_hNlFb);
|
||||
const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
|
||||
const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
|
||||
vec_hNl);
|
||||
const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
|
||||
vreinterpretq_u32_f32(vec_hNl));
|
||||
const float32x4_t vec_one_weightCurve_add =
|
||||
vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
|
||||
const uint32x4_t vec_if1 =
|
||||
vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
|
||||
|
||||
vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
|
||||
|
||||
{
|
||||
const float32x4_t vec_overDriveCurve =
|
||||
vld1q_f32(&WebRtcAec_overDriveCurve[i]);
|
||||
const float32x4_t vec_overDriveSm_overDriveCurve =
|
||||
vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
|
||||
vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
|
||||
vst1q_f32(&hNl[i], vec_hNl);
|
||||
}
|
||||
|
||||
// Suppress error signal
|
||||
{
|
||||
float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
|
||||
float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
|
||||
vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
|
||||
vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
|
||||
|
||||
// Ooura fft returns incorrect sign on imaginary component. It matters
|
||||
// here because we are making an additive change with comfort noise.
|
||||
vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
|
||||
vst1q_f32(&efw[0][i], vec_efw_re);
|
||||
vst1q_f32(&efw[1][i], vec_efw_im);
|
||||
}
|
||||
}
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
// Weight subbands
|
||||
if (hNl[i] > hNlFb) {
|
||||
hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
|
||||
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
|
||||
}
|
||||
|
||||
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
|
||||
|
||||
// Suppress error signal
|
||||
efw[0][i] *= hNl[i];
|
||||
efw[1][i] *= hNl[i];
|
||||
|
||||
// Ooura fft returns incorrect sign on imaginary component. It matters
|
||||
// here because we are making an additive change with comfort noise.
|
||||
efw[1][i] *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int PartitionDelay(const AecCore* aec) {
|
||||
// Measures the energy in each filter partition and returns the partition with
|
||||
// highest energy.
|
||||
// TODO(bjornv): Spread computational cost by computing one partition per
|
||||
// block?
|
||||
float wfEnMax = 0;
|
||||
int i;
|
||||
int delay = 0;
|
||||
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int j;
|
||||
int pos = i * PART_LEN1;
|
||||
float wfEn = 0;
|
||||
float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
|
||||
// vectorized code (four at once)
|
||||
for (j = 0; j + 3 < PART_LEN1; j += 4) {
|
||||
const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
|
||||
const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
|
||||
vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
|
||||
vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
|
||||
}
|
||||
{
|
||||
float32x2_t vec_total;
|
||||
// A B C D
|
||||
vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
|
||||
// A+B C+D
|
||||
vec_total = vpadd_f32(vec_total, vec_total);
|
||||
// A+B+C+D A+B+C+D
|
||||
wfEn = vget_lane_f32(vec_total, 0);
|
||||
}
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; j < PART_LEN1; j++) {
|
||||
wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
|
||||
aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
|
||||
}
|
||||
|
||||
if (wfEn > wfEnMax) {
|
||||
wfEnMax = wfEn;
|
||||
delay = i;
|
||||
}
|
||||
}
|
||||
return delay;
|
||||
}
|
||||
|
||||
// Updates the following smoothed Power Spectral Densities (PSD):
|
||||
// - sd : near-end
|
||||
// - se : residual echo
|
||||
// - sx : far-end
|
||||
// - sde : cross-PSD of near-end and residual echo
|
||||
// - sxd : cross-PSD of near-end and far-end
|
||||
//
|
||||
// In addition to updating the PSDs, also the filter diverge state is determined
|
||||
// upon actions are taken.
|
||||
static void SmoothedPSD(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1]) {
|
||||
// Power estimate smoothing coefficients.
|
||||
const float* ptrGCoh = aec->extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
int i;
|
||||
float sdSum = 0, seSum = 0;
|
||||
const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);
|
||||
float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
|
||||
float32x4_t vec_seSum = vdupq_n_f32(0.0f);
|
||||
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
|
||||
const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
|
||||
const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
|
||||
const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
|
||||
const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
|
||||
const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
|
||||
float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
|
||||
float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
|
||||
float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
|
||||
|
||||
vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
|
||||
vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
|
||||
vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
|
||||
vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
|
||||
vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
|
||||
vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
|
||||
vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
|
||||
|
||||
vst1q_f32(&aec->sd[i], vec_sd);
|
||||
vst1q_f32(&aec->se[i], vec_se);
|
||||
vst1q_f32(&aec->sx[i], vec_sx);
|
||||
|
||||
{
|
||||
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
|
||||
float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
|
||||
float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
|
||||
vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
|
||||
vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
|
||||
vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
|
||||
vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
|
||||
vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
|
||||
vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
|
||||
vst2q_f32(&aec->sde[i][0], vec_sde);
|
||||
}
|
||||
|
||||
{
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
|
||||
float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
|
||||
float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
|
||||
vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
|
||||
vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
|
||||
vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
|
||||
vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
|
||||
vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
|
||||
vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
|
||||
vst2q_f32(&aec->sxd[i][0], vec_sxd);
|
||||
}
|
||||
|
||||
vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
|
||||
vec_seSum = vaddq_f32(vec_seSum, vec_se);
|
||||
}
|
||||
{
|
||||
float32x2_t vec_sdSum_total;
|
||||
float32x2_t vec_seSum_total;
|
||||
// A B C D
|
||||
vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
|
||||
vget_high_f32(vec_sdSum));
|
||||
vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
|
||||
vget_high_f32(vec_seSum));
|
||||
// A+B C+D
|
||||
vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
|
||||
vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
|
||||
// A+B+C+D A+B+C+D
|
||||
sdSum = vget_lane_f32(vec_sdSum_total, 0);
|
||||
seSum = vget_lane_f32(vec_seSum_total, 0);
|
||||
}
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO(bjornv): investigate further why this is so sensitive.
|
||||
aec->sx[i] =
|
||||
ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] * WEBRTC_SPL_MAX(
|
||||
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
WebRtcAec_kMinFarendPSD);
|
||||
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
}
|
||||
|
||||
// Divergent filter safeguard.
|
||||
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
|
||||
if (aec->divergeState)
|
||||
memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Reset if error is significantly larger than nearend (13 dB).
|
||||
if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
|
||||
memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
|
||||
}
|
||||
|
||||
// Window time domain data to be used by the fft.
|
||||
__inline static void WindowData(float* x_windowed, const float* x) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
|
||||
const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
|
||||
const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
|
||||
// A B C D
|
||||
float32x4_t vec_sqrtHanning_rev =
|
||||
vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
|
||||
// B A D C
|
||||
vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
|
||||
// D C B A
|
||||
vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
|
||||
vget_low_f32(vec_sqrtHanning_rev));
|
||||
vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
|
||||
vst1q_f32(&x_windowed[PART_LEN + i],
|
||||
vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
|
||||
}
|
||||
}
|
||||
|
||||
// Puts fft output data into a complex valued array.
|
||||
__inline static void StoreAsComplex(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
|
||||
vst1q_f32(&data_complex[0][i], vec_data.val[0]);
|
||||
vst1q_f32(&data_complex[1][i], vec_data.val[1]);
|
||||
}
|
||||
// fix beginning/end values
|
||||
data_complex[1][0] = 0;
|
||||
data_complex[1][PART_LEN] = 0;
|
||||
data_complex[0][0] = data[0];
|
||||
data_complex[0][PART_LEN] = data[1];
|
||||
}
|
||||
|
||||
static void SubbandCoherenceNEON(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd) {
|
||||
float dfw[2][PART_LEN1];
|
||||
int i;
|
||||
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Windowed near fft
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed error fft
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw);
|
||||
|
||||
{
|
||||
const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
|
||||
const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
|
||||
const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
|
||||
const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
|
||||
const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
|
||||
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
|
||||
float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
|
||||
float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
|
||||
vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
|
||||
vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
|
||||
vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
|
||||
vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
|
||||
|
||||
vst1q_f32(&cohde[i], vec_cohde);
|
||||
vst1q_f32(&cohxd[i], vec_cohxd);
|
||||
}
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_InitAec_neon(void) {
|
||||
WebRtcAec_FilterFar = FilterFarNEON;
|
||||
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
|
||||
WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
|
||||
}
|
||||
|
|
@ -0,0 +1,731 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The core AEC algorithm, SSE2 version of speed-critical functions.
|
||||
*/
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <math.h>
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_common.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bRe - aIm * bIm;
|
||||
}
|
||||
|
||||
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bIm + aIm * bRe;
|
||||
}
|
||||
|
||||
static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
int i;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
int j;
|
||||
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
xPos -= num_partitions * (PART_LEN1);
|
||||
}
|
||||
|
||||
// vectorized code (four at once)
|
||||
for (j = 0; j + 3 < PART_LEN1; j += 4) {
|
||||
const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
|
||||
const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
|
||||
const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
|
||||
const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
|
||||
const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
|
||||
const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
|
||||
const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
|
||||
const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
|
||||
const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
|
||||
const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
|
||||
const __m128 e = _mm_sub_ps(a, b);
|
||||
const __m128 f = _mm_add_ps(c, d);
|
||||
const __m128 g = _mm_add_ps(yf_re, e);
|
||||
const __m128 h = _mm_add_ps(yf_im, f);
|
||||
_mm_storeu_ps(&yf[0][j], g);
|
||||
_mm_storeu_ps(&yf[1][j], h);
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; j < PART_LEN1; j++) {
|
||||
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
|
||||
aec->xfBuf[1][xPos + j],
|
||||
aec->wfBuf[0][pos + j],
|
||||
aec->wfBuf[1][pos + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const __m128 k1e_10f = _mm_set1_ps(1e-10f);
|
||||
const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
|
||||
: _mm_set1_ps(aec->normal_mu);
|
||||
const __m128 kThresh = aec->extended_filter_enabled
|
||||
? _mm_set1_ps(kExtendedErrorThreshold)
|
||||
: _mm_set1_ps(aec->normal_error_threshold);
|
||||
|
||||
int i;
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
|
||||
const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
|
||||
const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
|
||||
|
||||
const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
|
||||
__m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
|
||||
__m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
|
||||
const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
|
||||
const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
|
||||
const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
|
||||
const __m128 absEf = _mm_sqrt_ps(ef_sum2);
|
||||
const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
|
||||
__m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
|
||||
const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
|
||||
__m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
|
||||
__m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
|
||||
ef_re_if = _mm_and_ps(bigger, ef_re_if);
|
||||
ef_im_if = _mm_and_ps(bigger, ef_im_if);
|
||||
ef_re = _mm_andnot_ps(bigger, ef_re);
|
||||
ef_im = _mm_andnot_ps(bigger, ef_im);
|
||||
ef_re = _mm_or_ps(ef_re, ef_re_if);
|
||||
ef_im = _mm_or_ps(ef_im, ef_im_if);
|
||||
ef_re = _mm_mul_ps(ef_re, kMu);
|
||||
ef_im = _mm_mul_ps(ef_im, kMu);
|
||||
|
||||
_mm_storeu_ps(&ef[0][i], ef_re);
|
||||
_mm_storeu_ps(&ef[1][i], ef_im);
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
{
|
||||
const float mu =
|
||||
aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
for (; i < (PART_LEN1); i++) {
|
||||
float abs_ef;
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[1][i] /= (aec->xPow[i] + 1e-10f);
|
||||
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
|
||||
|
||||
if (abs_ef > error_threshold) {
|
||||
abs_ef = error_threshold / (abs_ef + 1e-10f);
|
||||
ef[0][i] *= abs_ef;
|
||||
ef[1][i] *= abs_ef;
|
||||
}
|
||||
|
||||
// Stepsize factor
|
||||
ef[0][i] *= mu;
|
||||
ef[1][i] *= mu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void FilterAdaptationSSE2(AecCore* aec,
|
||||
float* fft,
|
||||
float ef[2][PART_LEN1]) {
|
||||
int i, j;
|
||||
const int num_partitions = aec->num_partitions;
|
||||
for (i = 0; i < num_partitions; i++) {
|
||||
int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
|
||||
int pos = i * PART_LEN1;
|
||||
// Check for wrap
|
||||
if (i + aec->xfBufBlockPos >= num_partitions) {
|
||||
xPos -= num_partitions * PART_LEN1;
|
||||
}
|
||||
|
||||
// Process the whole array...
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
// Load xfBuf and ef.
|
||||
const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
|
||||
const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
|
||||
const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
|
||||
const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
|
||||
// Calculate the product of conjugate(xfBuf) by ef.
|
||||
// re(conjugate(a) * b) = aRe * bRe + aIm * bIm
|
||||
// im(conjugate(a) * b)= aRe * bIm - aIm * bRe
|
||||
const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
|
||||
const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
|
||||
const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
|
||||
const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
|
||||
const __m128 e = _mm_add_ps(a, b);
|
||||
const __m128 f = _mm_sub_ps(c, d);
|
||||
// Interleave real and imaginary parts.
|
||||
const __m128 g = _mm_unpacklo_ps(e, f);
|
||||
const __m128 h = _mm_unpackhi_ps(e, f);
|
||||
// Store
|
||||
_mm_storeu_ps(&fft[2 * j + 0], g);
|
||||
_mm_storeu_ps(&fft[2 * j + 4], h);
|
||||
}
|
||||
// ... and fixup the first imaginary entry.
|
||||
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
|
||||
-aec->xfBuf[1][xPos + PART_LEN],
|
||||
ef[0][PART_LEN],
|
||||
ef[1][PART_LEN]);
|
||||
|
||||
aec_rdft_inverse_128(fft);
|
||||
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
|
||||
|
||||
// fft scaling
|
||||
{
|
||||
float scale = 2.0f / PART_LEN2;
|
||||
const __m128 scale_ps = _mm_load_ps1(&scale);
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
|
||||
const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
|
||||
_mm_storeu_ps(&fft[j], fft_scale);
|
||||
}
|
||||
}
|
||||
aec_rdft_forward_128(fft);
|
||||
|
||||
{
|
||||
float wt1 = aec->wfBuf[1][pos];
|
||||
aec->wfBuf[0][pos + PART_LEN] += fft[1];
|
||||
for (j = 0; j < PART_LEN; j += 4) {
|
||||
__m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
|
||||
__m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
|
||||
const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
|
||||
const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
|
||||
const __m128 fft_re =
|
||||
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 fft_im =
|
||||
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
|
||||
wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
|
||||
_mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
|
||||
_mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
|
||||
}
|
||||
aec->wfBuf[1][pos] = wt1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __m128 mm_pow_ps(__m128 a, __m128 b) {
|
||||
// a^b = exp2(b * log2(a))
|
||||
// exp2(x) and log2(x) are calculated using polynomial approximations.
|
||||
__m128 log2_a, b_log2_a, a_exp_b;
|
||||
|
||||
// Calculate log2(x), x = a.
|
||||
{
|
||||
// To calculate log2(x), we decompose x like this:
|
||||
// x = y * 2^n
|
||||
// n is an integer
|
||||
// y is in the [1.0, 2.0) range
|
||||
//
|
||||
// log2(x) = log2(y) + n
|
||||
// n can be evaluated by playing with float representation.
|
||||
// log2(y) in a small range can be approximated, this code uses an order
|
||||
// five polynomial approximation. The coefficients have been
|
||||
// estimated with the Remez algorithm and the resulting
|
||||
// polynomial has a maximum relative error of 0.00086%.
|
||||
|
||||
// Compute n.
|
||||
// This is done by masking the exponent, shifting it into the top bit of
|
||||
// the mantissa, putting eight into the biased exponent (to shift/
|
||||
// compensate the fact that the exponent has been shifted in the top/
|
||||
// fractional part and finally getting rid of the implicit leading one
|
||||
// from the mantissa by substracting it out.
|
||||
static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
|
||||
0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
|
||||
static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
|
||||
0x43800000, 0x43800000, 0x43800000, 0x43800000};
|
||||
static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
|
||||
0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
|
||||
static const int shift_exponent_into_top_mantissa = 8;
|
||||
const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
|
||||
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
|
||||
_mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
|
||||
const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
|
||||
const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
|
||||
|
||||
// Compute y.
|
||||
static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
|
||||
0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
|
||||
static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
|
||||
0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
|
||||
const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
|
||||
const __m128 y =
|
||||
_mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
|
||||
|
||||
// Approximate log2(y) ~= (y - 1) * pol5(y).
|
||||
// pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
|
||||
static const ALIGN16_BEG float ALIGN16_END C5[4] = {
|
||||
-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
|
||||
const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
|
||||
const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
|
||||
const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
|
||||
const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
|
||||
const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
|
||||
const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
|
||||
const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
|
||||
const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
|
||||
const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
|
||||
const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
|
||||
const __m128 y_minus_one =
|
||||
_mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
|
||||
const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
|
||||
|
||||
// Combine parts.
|
||||
log2_a = _mm_add_ps(n, log2_y);
|
||||
}
|
||||
|
||||
// b * log2(a)
|
||||
b_log2_a = _mm_mul_ps(b, log2_a);
|
||||
|
||||
// Calculate exp2(x), x = b * log2(a).
|
||||
{
|
||||
// To calculate 2^x, we decompose x like this:
|
||||
// x = n + y
|
||||
// n is an integer, the value of x - 0.5 rounded down, therefore
|
||||
// y is in the [0.5, 1.5) range
|
||||
//
|
||||
// 2^x = 2^n * 2^y
|
||||
// 2^n can be evaluated by playing with float representation.
|
||||
// 2^y in a small range can be approximated, this code uses an order two
|
||||
// polynomial approximation. The coefficients have been estimated
|
||||
// with the Remez algorithm and the resulting polynomial has a
|
||||
// maximum relative error of 0.17%.
|
||||
|
||||
// To avoid over/underflow, we reduce the range of input to ]-127, 129].
|
||||
static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
|
||||
129.f, 129.f};
|
||||
static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
|
||||
-126.99999f, -126.99999f, -126.99999f, -126.99999f};
|
||||
const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
|
||||
const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
|
||||
// Compute n.
|
||||
static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
|
||||
0.5f, 0.5f};
|
||||
const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
|
||||
const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
|
||||
// Compute 2^n.
|
||||
static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
|
||||
127, 127, 127, 127};
|
||||
static const int float_exponent_shift = 23;
|
||||
const __m128i two_n_exponent =
|
||||
_mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
|
||||
const __m128 two_n =
|
||||
_mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
|
||||
// Compute y.
|
||||
const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
|
||||
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
|
||||
static const ALIGN16_BEG float C2[4] ALIGN16_END = {
|
||||
3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
|
||||
static const ALIGN16_BEG float C1[4] ALIGN16_END = {
|
||||
6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
|
||||
static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
|
||||
1.0017247f, 1.0017247f};
|
||||
const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
|
||||
const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
|
||||
const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
|
||||
const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
|
||||
|
||||
// Combine parts.
|
||||
a_exp_b = _mm_mul_ps(exp2_y, two_n);
|
||||
}
|
||||
return a_exp_b;
|
||||
}
|
||||
|
||||
static void OverdriveAndSuppressSSE2(AecCore* aec,
|
||||
float hNl[PART_LEN1],
|
||||
const float hNlFb,
|
||||
float efw[2][PART_LEN1]) {
|
||||
int i;
|
||||
const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
|
||||
const __m128 vec_one = _mm_set1_ps(1.0f);
|
||||
const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
|
||||
const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
// Weight subbands
|
||||
__m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
|
||||
const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
|
||||
const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
|
||||
const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
|
||||
const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
|
||||
const __m128 vec_one_weightCurve_hNl =
|
||||
_mm_mul_ps(vec_one_weightCurve, vec_hNl);
|
||||
const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
|
||||
const __m128 vec_if1 = _mm_and_ps(
|
||||
bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
|
||||
vec_hNl = _mm_or_ps(vec_if0, vec_if1);
|
||||
|
||||
{
|
||||
const __m128 vec_overDriveCurve =
|
||||
_mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
|
||||
const __m128 vec_overDriveSm_overDriveCurve =
|
||||
_mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
|
||||
vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
|
||||
_mm_storeu_ps(&hNl[i], vec_hNl);
|
||||
}
|
||||
|
||||
// Suppress error signal
|
||||
{
|
||||
__m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
|
||||
__m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
|
||||
vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
|
||||
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
|
||||
|
||||
// Ooura fft returns incorrect sign on imaginary component. It matters
|
||||
// here because we are making an additive change with comfort noise.
|
||||
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
|
||||
_mm_storeu_ps(&efw[0][i], vec_efw_re);
|
||||
_mm_storeu_ps(&efw[1][i], vec_efw_im);
|
||||
}
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
// Weight subbands
|
||||
if (hNl[i] > hNlFb) {
|
||||
hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
|
||||
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
|
||||
}
|
||||
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
|
||||
|
||||
// Suppress error signal
|
||||
efw[0][i] *= hNl[i];
|
||||
efw[1][i] *= hNl[i];
|
||||
|
||||
// Ooura fft returns incorrect sign on imaginary component. It matters
|
||||
// here because we are making an additive change with comfort noise.
|
||||
efw[1][i] *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
|
||||
// A+B C+D
|
||||
sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
// A+B+C+D A+B+C+D
|
||||
sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
_mm_store_ss(dst, sum);
|
||||
}
|
||||
static int PartitionDelay(const AecCore* aec) {
|
||||
// Measures the energy in each filter partition and returns the partition with
|
||||
// highest energy.
|
||||
// TODO(bjornv): Spread computational cost by computing one partition per
|
||||
// block?
|
||||
float wfEnMax = 0;
|
||||
int i;
|
||||
int delay = 0;
|
||||
|
||||
for (i = 0; i < aec->num_partitions; i++) {
|
||||
int j;
|
||||
int pos = i * PART_LEN1;
|
||||
float wfEn = 0;
|
||||
__m128 vec_wfEn = _mm_set1_ps(0.0f);
|
||||
// vectorized code (four at once)
|
||||
for (j = 0; j + 3 < PART_LEN1; j += 4) {
|
||||
const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
|
||||
const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
|
||||
vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
|
||||
vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
|
||||
}
|
||||
_mm_add_ps_4x1(vec_wfEn, &wfEn);
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; j < PART_LEN1; j++) {
|
||||
wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
|
||||
aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
|
||||
}
|
||||
|
||||
if (wfEn > wfEnMax) {
|
||||
wfEnMax = wfEn;
|
||||
delay = i;
|
||||
}
|
||||
}
|
||||
return delay;
|
||||
}
|
||||
|
||||
// Updates the following smoothed Power Spectral Densities (PSD):
|
||||
// - sd : near-end
|
||||
// - se : residual echo
|
||||
// - sx : far-end
|
||||
// - sde : cross-PSD of near-end and residual echo
|
||||
// - sxd : cross-PSD of near-end and far-end
|
||||
//
|
||||
// In addition to updating the PSDs, also the filter diverge state is determined
|
||||
// upon actions are taken.
|
||||
static void SmoothedPSD(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1]) {
|
||||
// Power estimate smoothing coefficients.
|
||||
const float* ptrGCoh = aec->extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
int i;
|
||||
float sdSum = 0, seSum = 0;
|
||||
const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD);
|
||||
const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
|
||||
const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
|
||||
__m128 vec_sdSum = _mm_set1_ps(0.0f);
|
||||
__m128 vec_seSum = _mm_set1_ps(0.0f);
|
||||
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
|
||||
const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
|
||||
const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
|
||||
const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
|
||||
const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
|
||||
const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
|
||||
__m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
|
||||
__m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
|
||||
__m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
|
||||
__m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
|
||||
__m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
|
||||
__m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
|
||||
vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
|
||||
vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
|
||||
vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
|
||||
vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
|
||||
vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
|
||||
vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
|
||||
vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sd[i], vec_sd);
|
||||
_mm_storeu_ps(&aec->se[i], vec_se);
|
||||
_mm_storeu_ps(&aec->sx[i], vec_sx);
|
||||
|
||||
{
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
||||
__m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
__m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
|
||||
_MM_SHUFFLE(3, 1, 3, 1));
|
||||
__m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
|
||||
__m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
|
||||
vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
|
||||
vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
|
||||
vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
|
||||
_mm_mul_ps(vec_dfw1, vec_efw1));
|
||||
vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
|
||||
_mm_mul_ps(vec_dfw1, vec_efw0));
|
||||
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
|
||||
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
||||
}
|
||||
|
||||
{
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
||||
__m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
__m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
|
||||
_MM_SHUFFLE(3, 1, 3, 1));
|
||||
__m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
|
||||
__m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
|
||||
vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
|
||||
vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
|
||||
vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
|
||||
_mm_mul_ps(vec_dfw1, vec_xfw1));
|
||||
vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
|
||||
_mm_mul_ps(vec_dfw1, vec_xfw0));
|
||||
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
|
||||
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
||||
}
|
||||
|
||||
vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
|
||||
vec_seSum = _mm_add_ps(vec_seSum, vec_se);
|
||||
}
|
||||
|
||||
_mm_add_ps_4x1(vec_sdSum, &sdSum);
|
||||
_mm_add_ps_4x1(vec_seSum, &seSum);
|
||||
|
||||
for (; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO(bjornv): investigate further why this is so sensitive.
|
||||
aec->sx[i] =
|
||||
ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] * WEBRTC_SPL_MAX(
|
||||
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
WebRtcAec_kMinFarendPSD);
|
||||
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
}
|
||||
|
||||
// Divergent filter safeguard.
|
||||
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
|
||||
if (aec->divergeState)
|
||||
memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Reset if error is significantly larger than nearend (13 dB).
|
||||
if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
|
||||
memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
|
||||
}
|
||||
|
||||
// Window time domain data to be used by the fft.
|
||||
__inline static void WindowData(float* x_windowed, const float* x) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
|
||||
const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
|
||||
const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
|
||||
// A B C D
|
||||
__m128 vec_sqrtHanning_rev =
|
||||
_mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
|
||||
// D C B A
|
||||
vec_sqrtHanning_rev =
|
||||
_mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
|
||||
_MM_SHUFFLE(0, 1, 2, 3));
|
||||
_mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
|
||||
_mm_storeu_ps(&x_windowed[PART_LEN + i],
|
||||
_mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
|
||||
}
|
||||
}
|
||||
|
||||
// Puts fft output data into a complex valued array.
|
||||
__inline static void StoreAsComplex(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
|
||||
const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
|
||||
const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
|
||||
_MM_SHUFFLE(3, 1, 3, 1));
|
||||
_mm_storeu_ps(&data_complex[0][i], vec_a);
|
||||
_mm_storeu_ps(&data_complex[1][i], vec_b);
|
||||
}
|
||||
// fix beginning/end values
|
||||
data_complex[1][0] = 0;
|
||||
data_complex[1][PART_LEN] = 0;
|
||||
data_complex[0][0] = data[0];
|
||||
data_complex[0][PART_LEN] = data[1];
|
||||
}
|
||||
|
||||
static void SubbandCoherenceSSE2(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd) {
|
||||
float dfw[2][PART_LEN1];
|
||||
int i;
|
||||
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Windowed near fft
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed error fft
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw);
|
||||
|
||||
{
|
||||
const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
|
||||
const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
|
||||
const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
|
||||
const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
|
||||
_mm_mul_ps(vec_sd, vec_se));
|
||||
const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
|
||||
_mm_mul_ps(vec_sd, vec_sx));
|
||||
const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
||||
const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
||||
const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
||||
const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
||||
const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
|
||||
_MM_SHUFFLE(3, 1, 3, 1));
|
||||
const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
|
||||
_MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
|
||||
_MM_SHUFFLE(3, 1, 3, 1));
|
||||
__m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
|
||||
__m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
|
||||
vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
|
||||
vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
|
||||
vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
|
||||
vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
|
||||
_mm_storeu_ps(&cohde[i], vec_cohde);
|
||||
_mm_storeu_ps(&cohxd[i], vec_cohxd);
|
||||
}
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_InitAec_SSE2(void) {
|
||||
WebRtcAec_FilterFar = FilterFarSSE2;
|
||||
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
|
||||
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
|
||||
}
|
|
@ -0,0 +1,589 @@
|
|||
/*
|
||||
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
|
||||
* Copyright Takuya OOURA, 1996-2001
|
||||
*
|
||||
* You may use, copy, modify and distribute this code for any purpose (include
|
||||
* commercial use) and without fee. Please refer to this package when you modify
|
||||
* this code.
|
||||
*
|
||||
* Changes by the WebRTC authors:
|
||||
* - Trivial type modifications.
|
||||
* - Minimal code subset to do rdft of length 128.
|
||||
* - Optimizations because of known length.
|
||||
*
|
||||
* All changes are covered by the WebRTC license and IP grant:
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// These tables used to be computed at run-time. For example, refer to:
|
||||
// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
|
||||
// to see the initialization code.
|
||||
const float rdft_w[64] = {
|
||||
1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
|
||||
0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
|
||||
0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
|
||||
0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
|
||||
0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
|
||||
0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
|
||||
0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
|
||||
0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
|
||||
0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
|
||||
0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
|
||||
0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
|
||||
0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
|
||||
0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
|
||||
0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
|
||||
0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
|
||||
0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
|
||||
};
|
||||
const float rdft_wk3ri_first[16] = {
|
||||
1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
|
||||
0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
|
||||
0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
|
||||
0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
|
||||
};
|
||||
const float rdft_wk3ri_second[16] = {
|
||||
-0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
|
||||
-0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
|
||||
-0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
|
||||
-0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
|
||||
1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
|
||||
0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
|
||||
0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
|
||||
0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
|
||||
0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
|
||||
0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
|
||||
0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
|
||||
0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
|
||||
1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
|
||||
0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
|
||||
0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
|
||||
0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
|
||||
0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
|
||||
0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
|
||||
0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
|
||||
0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
|
||||
1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
|
||||
0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
|
||||
0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
|
||||
-0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
|
||||
0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
|
||||
0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
|
||||
0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
|
||||
-0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
|
||||
-0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
|
||||
-0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
|
||||
-0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
|
||||
-0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
|
||||
-0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
|
||||
-0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
|
||||
-0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
|
||||
-0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
|
||||
-0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
|
||||
-0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
|
||||
-0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
|
||||
-0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
|
||||
-0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
|
||||
-0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
|
||||
-0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
|
||||
-0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
|
||||
-0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
|
||||
-0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
|
||||
-0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
|
||||
-0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
|
||||
-0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
|
||||
-0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
|
||||
-0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
|
||||
-0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
|
||||
};
|
||||
ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
|
||||
0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
|
||||
};
|
||||
|
||||
static void bitrv2_128_C(float* a) {
|
||||
/*
|
||||
Following things have been attempted but are no faster:
|
||||
(a) Storing the swap indexes in a LUT (index calculations are done
|
||||
for 'free' while waiting on memory/L1).
|
||||
(b) Consolidate the load/store of two consecutive floats by a 64 bit
|
||||
integer (execution is memory/L1 bound).
|
||||
(c) Do a mix of floats and 64 bit integer to maximize register
|
||||
utilization (execution is memory/L1 bound).
|
||||
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
|
||||
(e) Hard-coding of the offsets to completely eliminates index
|
||||
calculations.
|
||||
*/
|
||||
|
||||
unsigned int j, j1, k, k1;
|
||||
float xr, xi, yr, yi;
|
||||
|
||||
static const int ip[4] = {0, 64, 32, 96};
|
||||
for (k = 0; k < 4; k++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
j1 = 2 * j + ip[k];
|
||||
k1 = 2 * k + ip[j];
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 += 16;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 -= 8;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
j1 += 8;
|
||||
k1 += 16;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
j1 = 2 * k + 8 + ip[k];
|
||||
k1 = j1 + 8;
|
||||
xr = a[j1 + 0];
|
||||
xi = a[j1 + 1];
|
||||
yr = a[k1 + 0];
|
||||
yi = a[k1 + 1];
|
||||
a[j1 + 0] = yr;
|
||||
a[j1 + 1] = yi;
|
||||
a[k1 + 0] = xr;
|
||||
a[k1 + 1] = xi;
|
||||
}
|
||||
}
|
||||
|
||||
static void cft1st_128_C(float* a) {
|
||||
const int n = 128;
|
||||
int j, k1, k2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
// The processing of the first set of elements was simplified in C to avoid
|
||||
// some operations (multiplication by zero or one, addition of two elements
|
||||
// multiplied by the same weight, ...).
|
||||
x0r = a[0] + a[2];
|
||||
x0i = a[1] + a[3];
|
||||
x1r = a[0] - a[2];
|
||||
x1i = a[1] - a[3];
|
||||
x2r = a[4] + a[6];
|
||||
x2i = a[5] + a[7];
|
||||
x3r = a[4] - a[6];
|
||||
x3i = a[5] - a[7];
|
||||
a[0] = x0r + x2r;
|
||||
a[1] = x0i + x2i;
|
||||
a[4] = x0r - x2r;
|
||||
a[5] = x0i - x2i;
|
||||
a[2] = x1r - x3i;
|
||||
a[3] = x1i + x3r;
|
||||
a[6] = x1r + x3i;
|
||||
a[7] = x1i - x3r;
|
||||
wk1r = rdft_w[2];
|
||||
x0r = a[8] + a[10];
|
||||
x0i = a[9] + a[11];
|
||||
x1r = a[8] - a[10];
|
||||
x1i = a[9] - a[11];
|
||||
x2r = a[12] + a[14];
|
||||
x2i = a[13] + a[15];
|
||||
x3r = a[12] - a[14];
|
||||
x3i = a[13] - a[15];
|
||||
a[8] = x0r + x2r;
|
||||
a[9] = x0i + x2i;
|
||||
a[12] = x2i - x0i;
|
||||
a[13] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[10] = wk1r * (x0r - x0i);
|
||||
a[11] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[14] = wk1r * (x0i - x0r);
|
||||
a[15] = wk1r * (x0i + x0r);
|
||||
k1 = 0;
|
||||
for (j = 16; j < n; j += 16) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = rdft_w[k1 + 0];
|
||||
wk2i = rdft_w[k1 + 1];
|
||||
wk1r = rdft_w[k2 + 0];
|
||||
wk1i = rdft_w[k2 + 1];
|
||||
wk3r = rdft_wk3ri_first[k1 + 0];
|
||||
wk3i = rdft_wk3ri_first[k1 + 1];
|
||||
x0r = a[j + 0] + a[j + 2];
|
||||
x0i = a[j + 1] + a[j + 3];
|
||||
x1r = a[j + 0] - a[j + 2];
|
||||
x1i = a[j + 1] - a[j + 3];
|
||||
x2r = a[j + 4] + a[j + 6];
|
||||
x2i = a[j + 5] + a[j + 7];
|
||||
x3r = a[j + 4] - a[j + 6];
|
||||
x3i = a[j + 5] - a[j + 7];
|
||||
a[j + 0] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 4] = wk2r * x0r - wk2i * x0i;
|
||||
a[j + 5] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 2] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 3] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 6] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 7] = wk3r * x0i + wk3i * x0r;
|
||||
wk1r = rdft_w[k2 + 2];
|
||||
wk1i = rdft_w[k2 + 3];
|
||||
wk3r = rdft_wk3ri_second[k1 + 0];
|
||||
wk3i = rdft_wk3ri_second[k1 + 1];
|
||||
x0r = a[j + 8] + a[j + 10];
|
||||
x0i = a[j + 9] + a[j + 11];
|
||||
x1r = a[j + 8] - a[j + 10];
|
||||
x1i = a[j + 9] - a[j + 11];
|
||||
x2r = a[j + 12] + a[j + 14];
|
||||
x2i = a[j + 13] + a[j + 15];
|
||||
x3r = a[j + 12] - a[j + 14];
|
||||
x3i = a[j + 13] - a[j + 15];
|
||||
a[j + 8] = x0r + x2r;
|
||||
a[j + 9] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j + 12] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j + 13] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j + 10] = wk1r * x0r - wk1i * x0i;
|
||||
a[j + 11] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j + 14] = wk3r * x0r - wk3i * x0i;
|
||||
a[j + 15] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_C(float* a) {
|
||||
const int l = 8;
|
||||
const int n = 128;
|
||||
const int m = 32;
|
||||
int j0, j1, j2, j3, k, k1, k2, m2;
|
||||
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
for (j0 = 0; j0 < l; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
a[j2 + 0] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1 + 0] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3 + 0] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
wk1r = rdft_w[2];
|
||||
for (j0 = m; j0 < l + m; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
a[j2 + 0] = x2i - x0i;
|
||||
a[j2 + 1] = x0r - x2r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * (x0r - x0i);
|
||||
a[j1 + 1] = wk1r * (x0r + x0i);
|
||||
x0r = x3i + x1r;
|
||||
x0i = x3r - x1i;
|
||||
a[j3 + 0] = wk1r * (x0i - x0r);
|
||||
a[j3 + 1] = wk1r * (x0i + x0r);
|
||||
}
|
||||
k1 = 0;
|
||||
m2 = 2 * m;
|
||||
for (k = m2; k < n; k += m2) {
|
||||
k1 += 2;
|
||||
k2 = 2 * k1;
|
||||
wk2r = rdft_w[k1 + 0];
|
||||
wk2i = rdft_w[k1 + 1];
|
||||
wk1r = rdft_w[k2 + 0];
|
||||
wk1i = rdft_w[k2 + 1];
|
||||
wk3r = rdft_wk3ri_first[k1 + 0];
|
||||
wk3i = rdft_wk3ri_first[k1 + 1];
|
||||
for (j0 = k; j0 < l + k; j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2 + 0] = wk2r * x0r - wk2i * x0i;
|
||||
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
wk1r = rdft_w[k2 + 2];
|
||||
wk1i = rdft_w[k2 + 3];
|
||||
wk3r = rdft_wk3ri_second[k1 + 0];
|
||||
wk3i = rdft_wk3ri_second[k1 + 1];
|
||||
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
|
||||
j1 = j0 + 8;
|
||||
j2 = j0 + 16;
|
||||
j3 = j0 + 24;
|
||||
x0r = a[j0 + 0] + a[j1 + 0];
|
||||
x0i = a[j0 + 1] + a[j1 + 1];
|
||||
x1r = a[j0 + 0] - a[j1 + 0];
|
||||
x1i = a[j0 + 1] - a[j1 + 1];
|
||||
x2r = a[j2 + 0] + a[j3 + 0];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2 + 0] - a[j3 + 0];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j0 + 0] = x0r + x2r;
|
||||
a[j0 + 1] = x0i + x2i;
|
||||
x0r -= x2r;
|
||||
x0i -= x2i;
|
||||
a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
|
||||
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
|
||||
x0r = x1r - x3i;
|
||||
x0i = x1i + x3r;
|
||||
a[j1 + 0] = wk1r * x0r - wk1i * x0i;
|
||||
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
|
||||
x0r = x1r + x3i;
|
||||
x0i = x1i - x3r;
|
||||
a[j3 + 0] = wk3r * x0r - wk3i * x0i;
|
||||
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void cftfsub_128_C(float* a) {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
cft1st_128(a);
|
||||
cftmdl_128(a);
|
||||
l = 32;
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = a[j + 1] + a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = a[j + 1] - a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i + x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i - x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i + x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i - x3r;
|
||||
}
|
||||
}
|
||||
|
||||
static void cftbsub_128_C(float* a) {
|
||||
int j, j1, j2, j3, l;
|
||||
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
||||
|
||||
cft1st_128(a);
|
||||
cftmdl_128(a);
|
||||
l = 32;
|
||||
|
||||
for (j = 0; j < l; j += 2) {
|
||||
j1 = j + l;
|
||||
j2 = j1 + l;
|
||||
j3 = j2 + l;
|
||||
x0r = a[j] + a[j1];
|
||||
x0i = -a[j + 1] - a[j1 + 1];
|
||||
x1r = a[j] - a[j1];
|
||||
x1i = -a[j + 1] + a[j1 + 1];
|
||||
x2r = a[j2] + a[j3];
|
||||
x2i = a[j2 + 1] + a[j3 + 1];
|
||||
x3r = a[j2] - a[j3];
|
||||
x3i = a[j2 + 1] - a[j3 + 1];
|
||||
a[j] = x0r + x2r;
|
||||
a[j + 1] = x0i - x2i;
|
||||
a[j2] = x0r - x2r;
|
||||
a[j2 + 1] = x0i + x2i;
|
||||
a[j1] = x1r - x3i;
|
||||
a[j1 + 1] = x1i - x3r;
|
||||
a[j3] = x1r + x3i;
|
||||
a[j3 + 1] = x1i + x3r;
|
||||
}
|
||||
}
|
||||
|
||||
static void rftfsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr - wki * xi;
|
||||
yi = wkr * xi + wki * xr;
|
||||
a[j2 + 0] -= yr;
|
||||
a[j2 + 1] -= yi;
|
||||
a[k2 + 0] += yr;
|
||||
a[k2 + 1] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_C(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
a[1] = -a[1];
|
||||
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr + wki * xi;
|
||||
yi = wkr * xi - wki * xr;
|
||||
a[j2 + 0] = a[j2 + 0] - yr;
|
||||
a[j2 + 1] = yi - a[j2 + 1];
|
||||
a[k2 + 0] = yr + a[k2 + 0];
|
||||
a[k2 + 1] = yi - a[k2 + 1];
|
||||
}
|
||||
a[65] = -a[65];
|
||||
}
|
||||
|
||||
void aec_rdft_forward_128(float* a) {
|
||||
float xi;
|
||||
bitrv2_128(a);
|
||||
cftfsub_128(a);
|
||||
rftfsub_128(a);
|
||||
xi = a[0] - a[1];
|
||||
a[0] += a[1];
|
||||
a[1] = xi;
|
||||
}
|
||||
|
||||
void aec_rdft_inverse_128(float* a) {
|
||||
a[1] = 0.5f * (a[0] - a[1]);
|
||||
a[0] -= a[1];
|
||||
rftbsub_128(a);
|
||||
bitrv2_128(a);
|
||||
cftbsub_128(a);
|
||||
}
|
||||
|
||||
// code path selection
|
||||
RftSub128 cft1st_128;
|
||||
RftSub128 cftmdl_128;
|
||||
RftSub128 rftfsub_128;
|
||||
RftSub128 rftbsub_128;
|
||||
RftSub128 cftfsub_128;
|
||||
RftSub128 cftbsub_128;
|
||||
RftSub128 bitrv2_128;
|
||||
|
||||
void aec_rdft_init(void) {
|
||||
cft1st_128 = cft1st_128_C;
|
||||
cftmdl_128 = cftmdl_128_C;
|
||||
rftfsub_128 = rftfsub_128_C;
|
||||
rftbsub_128 = rftbsub_128_C;
|
||||
cftfsub_128 = cftfsub_128_C;
|
||||
cftbsub_128 = cftbsub_128_C;
|
||||
bitrv2_128 = bitrv2_128_C;
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (WebRtc_GetCPUInfo(kSSE2)) {
|
||||
aec_rdft_init_sse2();
|
||||
}
|
||||
#endif
|
||||
#if defined(MIPS_FPU_LE)
|
||||
aec_rdft_init_mips();
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
aec_rdft_init_neon();
|
||||
#elif defined(WEBRTC_DETECT_NEON)
|
||||
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
|
||||
aec_rdft_init_neon();
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_common.h"
|
||||
|
||||
// These intrinsics were unavailable before VS 2008.
|
||||
// TODO(andrew): move to a common file.
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1500
|
||||
#include <emmintrin.h>
|
||||
static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
|
||||
static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
|
||||
#endif
|
||||
|
||||
// Constants shared by all paths (C, SSE2, NEON).
|
||||
extern const float rdft_w[64];
|
||||
// Constants used by the C path.
|
||||
extern const float rdft_wk3ri_first[16];
|
||||
extern const float rdft_wk3ri_second[16];
|
||||
// Constants used by SSE2 and NEON but initialized in the C path.
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
|
||||
extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
|
||||
|
||||
// code path selection function pointers
|
||||
typedef void (*RftSub128)(float* a);
|
||||
extern RftSub128 rftfsub_128;
|
||||
extern RftSub128 rftbsub_128;
|
||||
extern RftSub128 cft1st_128;
|
||||
extern RftSub128 cftmdl_128;
|
||||
extern RftSub128 cftfsub_128;
|
||||
extern RftSub128 cftbsub_128;
|
||||
extern RftSub128 bitrv2_128;
|
||||
|
||||
// entry points
|
||||
void aec_rdft_init(void);
|
||||
void aec_rdft_init_sse2(void);
|
||||
void aec_rdft_forward_128(float* a);
|
||||
void aec_rdft_inverse_128(float* a);
|
||||
|
||||
#if defined(MIPS_FPU_LE)
|
||||
void aec_rdft_init_mips(void);
|
||||
#endif
|
||||
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
|
||||
void aec_rdft_init_neon(void);
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
|
1187
third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
vendored
Normal file
1187
third_party/webrtc/src/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,355 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The rdft AEC algorithm, neon version of speed-critical functions.
|
||||
*
|
||||
* Based on the sse2 version.
|
||||
*/
|
||||
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
|
||||
|
||||
static void cft1st_128_neon(float* a) {
|
||||
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
|
||||
int j, k2;
|
||||
|
||||
for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
|
||||
float32x4_t a00v = vld1q_f32(&a[j + 0]);
|
||||
float32x4_t a04v = vld1q_f32(&a[j + 4]);
|
||||
float32x4_t a08v = vld1q_f32(&a[j + 8]);
|
||||
float32x4_t a12v = vld1q_f32(&a[j + 12]);
|
||||
float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
|
||||
float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
|
||||
float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
|
||||
float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
|
||||
const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
|
||||
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
|
||||
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
|
||||
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
|
||||
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
|
||||
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
|
||||
float32x4_t x0v = vaddq_f32(a01v, a23v);
|
||||
const float32x4_t x1v = vsubq_f32(a01v, a23v);
|
||||
const float32x4_t x2v = vaddq_f32(a45v, a67v);
|
||||
const float32x4_t x3v = vsubq_f32(a45v, a67v);
|
||||
const float32x4_t x3w = vrev64q_f32(x3v);
|
||||
float32x4_t x0w;
|
||||
a01v = vaddq_f32(x0v, x2v);
|
||||
x0v = vsubq_f32(x0v, x2v);
|
||||
x0w = vrev64q_f32(x0v);
|
||||
a45v = vmulq_f32(wk2rv, x0v);
|
||||
a45v = vmlaq_f32(a45v, wk2iv, x0w);
|
||||
x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
|
||||
x0w = vrev64q_f32(x0v);
|
||||
a23v = vmulq_f32(wk1rv, x0v);
|
||||
a23v = vmlaq_f32(a23v, wk1iv, x0w);
|
||||
x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
|
||||
x0w = vrev64q_f32(x0v);
|
||||
a67v = vmulq_f32(wk3rv, x0v);
|
||||
a67v = vmlaq_f32(a67v, wk3iv, x0w);
|
||||
a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
|
||||
a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
|
||||
a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
|
||||
a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
|
||||
vst1q_f32(&a[j + 0], a00v);
|
||||
vst1q_f32(&a[j + 4], a04v);
|
||||
vst1q_f32(&a[j + 8], a08v);
|
||||
vst1q_f32(&a[j + 12], a12v);
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_neon(float* a) {
|
||||
int j;
|
||||
const int l = 8;
|
||||
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
|
||||
float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
|
||||
|
||||
for (j = 0; j < l; j += 2) {
|
||||
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
|
||||
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
|
||||
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
|
||||
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
|
||||
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
|
||||
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
|
||||
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
|
||||
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
|
||||
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
|
||||
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
|
||||
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
|
||||
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
|
||||
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
|
||||
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
|
||||
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
|
||||
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
|
||||
const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
|
||||
const float32x4_t x1_x3_add =
|
||||
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const float32x4_t x1_x3_sub =
|
||||
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
|
||||
const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
|
||||
const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
|
||||
const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
|
||||
const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
|
||||
const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
|
||||
const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
|
||||
const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
|
||||
const float32x4_t xx1_rev = vrev64q_f32(xx1);
|
||||
const float32x4_t yy4_rev = vrev64q_f32(yy4);
|
||||
|
||||
vst1_f32(&a[j + 0], vget_low_f32(xx0));
|
||||
vst1_f32(&a[j + 32], vget_high_f32(xx0));
|
||||
vst1_f32(&a[j + 16], vget_low_f32(xx1));
|
||||
vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
|
||||
|
||||
a[j + 48] = -a[j + 48];
|
||||
|
||||
vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
|
||||
vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
|
||||
vst1_f32(&a[j + 40], vget_low_f32(yy4));
|
||||
vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
|
||||
}
|
||||
|
||||
{
|
||||
const int k = 64;
|
||||
const int k1 = 2;
|
||||
const int k2 = 2 * k1;
|
||||
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
|
||||
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
|
||||
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
|
||||
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
|
||||
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
|
||||
wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
|
||||
for (j = k; j < l + k; j += 2) {
|
||||
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
|
||||
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
|
||||
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
|
||||
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
|
||||
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
|
||||
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
|
||||
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
|
||||
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
|
||||
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
|
||||
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
|
||||
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
|
||||
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
|
||||
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
|
||||
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
|
||||
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
|
||||
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
|
||||
const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
|
||||
const float32x4_t x1_x3_add =
|
||||
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const float32x4_t x1_x3_sub =
|
||||
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
|
||||
float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
|
||||
float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
|
||||
xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
|
||||
xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
|
||||
xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
|
||||
|
||||
vst1_f32(&a[j + 0], vget_low_f32(xx));
|
||||
vst1_f32(&a[j + 32], vget_high_f32(xx));
|
||||
vst1_f32(&a[j + 16], vget_low_f32(xx4));
|
||||
vst1_f32(&a[j + 48], vget_high_f32(xx4));
|
||||
vst1_f32(&a[j + 8], vget_low_f32(xx12));
|
||||
vst1_f32(&a[j + 40], vget_high_f32(xx12));
|
||||
vst1_f32(&a[j + 24], vget_low_f32(xx22));
|
||||
vst1_f32(&a[j + 56], vget_high_f32(xx22));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
|
||||
// A B C D -> C D A B
|
||||
const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
|
||||
// C D A B -> D C B A
|
||||
return vrev64q_f32(rev);
|
||||
}
|
||||
|
||||
static void rftfsub_128_neon(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2;
|
||||
const float32x4_t mm_half = vdupq_n_f32(0.5f);
|
||||
|
||||
// Vectorized code (four at once).
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
|
||||
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
|
||||
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
|
||||
// Load and shuffle 'a'.
|
||||
// 2, 4, 6, 8, 3, 5, 7, 9
|
||||
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
|
||||
// 120, 122, 124, 126, 121, 123, 125, 127,
|
||||
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
|
||||
// 126, 124, 122, 120
|
||||
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
|
||||
// 127, 125, 123, 121
|
||||
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
|
||||
// Calculate 'x'.
|
||||
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
|
||||
// 3-127, 5-125, 7-123, 9-121,
|
||||
// Calculate product into 'y'.
|
||||
// yr = wkr * xr - wki * xi;
|
||||
// yi = wkr * xi + wki * xr;
|
||||
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
|
||||
const float32x4_t b_ = vmulq_f32(wki_, xi_);
|
||||
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
|
||||
const float32x4_t d_ = vmulq_f32(wki_, xr_);
|
||||
const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
|
||||
const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
|
||||
// Update 'a'.
|
||||
// a[j2 + 0] -= yr;
|
||||
// a[j2 + 1] -= yi;
|
||||
// a[k2 + 0] += yr;
|
||||
// a[k2 + 1] -= yi;
|
||||
// 126, 124, 122, 120,
|
||||
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
|
||||
// 127, 125, 123, 121,
|
||||
const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
|
||||
// Shuffle in right order and store.
|
||||
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
|
||||
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
|
||||
// 124, 125, 126, 127, 120, 121, 122, 123
|
||||
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
|
||||
// 2, 4, 6, 8,
|
||||
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
|
||||
// 3, 5, 7, 9,
|
||||
a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
|
||||
// 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
vst2q_f32(&a[0 + j2], a_j2_p);
|
||||
|
||||
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
|
||||
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
|
||||
}
|
||||
|
||||
// Scalar code for the remaining items.
|
||||
for (; j2 < 64; j1 += 1, j2 += 2) {
|
||||
const int k2 = 128 - j2;
|
||||
const int k1 = 32 - j1;
|
||||
const float wkr = 0.5f - c[k1];
|
||||
const float wki = c[j1];
|
||||
const float xr = a[j2 + 0] - a[k2 + 0];
|
||||
const float xi = a[j2 + 1] + a[k2 + 1];
|
||||
const float yr = wkr * xr - wki * xi;
|
||||
const float yi = wkr * xi + wki * xr;
|
||||
a[j2 + 0] -= yr;
|
||||
a[j2 + 1] -= yi;
|
||||
a[k2 + 0] += yr;
|
||||
a[k2 + 1] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_neon(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2;
|
||||
const float32x4_t mm_half = vdupq_n_f32(0.5f);
|
||||
|
||||
a[1] = -a[1];
|
||||
// Vectorized code (four at once).
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
|
||||
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
|
||||
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
|
||||
// Load and shuffle 'a'.
|
||||
// 2, 4, 6, 8, 3, 5, 7, 9
|
||||
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
|
||||
// 120, 122, 124, 126, 121, 123, 125, 127,
|
||||
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
|
||||
// 126, 124, 122, 120
|
||||
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
|
||||
// 127, 125, 123, 121
|
||||
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
|
||||
// Calculate 'x'.
|
||||
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
|
||||
// 3-127, 5-125, 7-123, 9-121,
|
||||
// Calculate product into 'y'.
|
||||
// yr = wkr * xr - wki * xi;
|
||||
// yi = wkr * xi + wki * xr;
|
||||
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
|
||||
const float32x4_t b_ = vmulq_f32(wki_, xi_);
|
||||
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
|
||||
const float32x4_t d_ = vmulq_f32(wki_, xr_);
|
||||
const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
|
||||
const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
|
||||
// Update 'a'.
|
||||
// a[j2 + 0] -= yr;
|
||||
// a[j2 + 1] -= yi;
|
||||
// a[k2 + 0] += yr;
|
||||
// a[k2 + 1] -= yi;
|
||||
// 126, 124, 122, 120,
|
||||
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
|
||||
// 127, 125, 123, 121,
|
||||
const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
|
||||
// Shuffle in right order and store.
|
||||
// 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
|
||||
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
|
||||
// 124, 125, 126, 127, 120, 121, 122, 123
|
||||
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
|
||||
// 2, 4, 6, 8,
|
||||
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
|
||||
// 3, 5, 7, 9,
|
||||
a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
|
||||
// 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
vst2q_f32(&a[0 + j2], a_j2_p);
|
||||
|
||||
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
|
||||
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
|
||||
}
|
||||
|
||||
// Scalar code for the remaining items.
|
||||
for (; j2 < 64; j1 += 1, j2 += 2) {
|
||||
const int k2 = 128 - j2;
|
||||
const int k1 = 32 - j1;
|
||||
const float wkr = 0.5f - c[k1];
|
||||
const float wki = c[j1];
|
||||
const float xr = a[j2 + 0] - a[k2 + 0];
|
||||
const float xi = a[j2 + 1] + a[k2 + 1];
|
||||
const float yr = wkr * xr + wki * xi;
|
||||
const float yi = wkr * xi - wki * xr;
|
||||
a[j2 + 0] = a[j2 + 0] - yr;
|
||||
a[j2 + 1] = yi - a[j2 + 1];
|
||||
a[k2 + 0] = yr + a[k2 + 0];
|
||||
a[k2 + 1] = yi - a[k2 + 1];
|
||||
}
|
||||
a[65] = -a[65];
|
||||
}
|
||||
|
||||
void aec_rdft_init_neon(void) {
|
||||
cft1st_128 = cft1st_128_neon;
|
||||
cftmdl_128 = cftmdl_128_neon;
|
||||
rftfsub_128 = rftfsub_128_neon;
|
||||
rftbsub_128 = rftbsub_128_neon;
|
||||
}
|
||||
|
|
@ -0,0 +1,427 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
|
||||
|
||||
static void cft1st_128_SSE2(float* a) {
|
||||
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
|
||||
int j, k2;
|
||||
|
||||
for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
|
||||
__m128 a00v = _mm_loadu_ps(&a[j + 0]);
|
||||
__m128 a04v = _mm_loadu_ps(&a[j + 4]);
|
||||
__m128 a08v = _mm_loadu_ps(&a[j + 8]);
|
||||
__m128 a12v = _mm_loadu_ps(&a[j + 12]);
|
||||
__m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
__m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
|
||||
const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
|
||||
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
|
||||
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
|
||||
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
|
||||
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
|
||||
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
|
||||
__m128 x0v = _mm_add_ps(a01v, a23v);
|
||||
const __m128 x1v = _mm_sub_ps(a01v, a23v);
|
||||
const __m128 x2v = _mm_add_ps(a45v, a67v);
|
||||
const __m128 x3v = _mm_sub_ps(a45v, a67v);
|
||||
__m128 x0w;
|
||||
a01v = _mm_add_ps(x0v, x2v);
|
||||
x0v = _mm_sub_ps(x0v, x2v);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
{
|
||||
const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
|
||||
const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
|
||||
a45v = _mm_add_ps(a45_0v, a45_1v);
|
||||
}
|
||||
{
|
||||
__m128 a23_0v, a23_1v;
|
||||
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
|
||||
x0v = _mm_add_ps(x1v, x3s);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
a23_0v = _mm_mul_ps(wk1rv, x0v);
|
||||
a23_1v = _mm_mul_ps(wk1iv, x0w);
|
||||
a23v = _mm_add_ps(a23_0v, a23_1v);
|
||||
|
||||
x0v = _mm_sub_ps(x1v, x3s);
|
||||
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
}
|
||||
{
|
||||
const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
|
||||
const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
|
||||
a67v = _mm_add_ps(a67_0v, a67_1v);
|
||||
}
|
||||
|
||||
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
|
||||
a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
|
||||
_mm_storeu_ps(&a[j + 0], a00v);
|
||||
_mm_storeu_ps(&a[j + 4], a04v);
|
||||
_mm_storeu_ps(&a[j + 8], a08v);
|
||||
_mm_storeu_ps(&a[j + 12], a12v);
|
||||
}
|
||||
}
|
||||
|
||||
static void cftmdl_128_SSE2(float* a) {
|
||||
const int l = 8;
|
||||
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
|
||||
int j0;
|
||||
|
||||
__m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
|
||||
for (j0 = 0; j0 < l; j0 += 2) {
|
||||
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
|
||||
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
|
||||
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
|
||||
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
|
||||
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
|
||||
_mm_castsi128_ps(a_32),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
|
||||
_mm_castsi128_ps(a_40),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
|
||||
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
|
||||
|
||||
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
|
||||
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
|
||||
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
|
||||
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
|
||||
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
|
||||
_mm_castsi128_ps(a_48),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
|
||||
_mm_castsi128_ps(a_56),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
|
||||
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
|
||||
|
||||
const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
|
||||
const __m128 yy0 =
|
||||
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
const __m128 yy1 =
|
||||
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
|
||||
const __m128 yy3 = _mm_add_ps(yy0, yy2);
|
||||
const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
|
||||
a[j0 + 48] = -a[j0 + 48];
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
|
||||
}
|
||||
|
||||
{
|
||||
int k = 64;
|
||||
int k1 = 2;
|
||||
int k2 = 2 * k1;
|
||||
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
|
||||
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
|
||||
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
|
||||
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
|
||||
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
|
||||
wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
|
||||
for (j0 = k; j0 < l + k; j0 += 2) {
|
||||
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
|
||||
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
|
||||
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
|
||||
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
|
||||
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
|
||||
_mm_castsi128_ps(a_32),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
|
||||
_mm_castsi128_ps(a_40),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
|
||||
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
|
||||
|
||||
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
|
||||
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
|
||||
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
|
||||
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
|
||||
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
|
||||
_mm_castsi128_ps(a_48),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
|
||||
_mm_castsi128_ps(a_56),
|
||||
_MM_SHUFFLE(1, 0, 1, 0));
|
||||
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
|
||||
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
|
||||
|
||||
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
|
||||
const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
|
||||
const __m128 xx3 =
|
||||
_mm_mul_ps(wk2iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx4 = _mm_add_ps(xx2, xx3);
|
||||
|
||||
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
|
||||
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
|
||||
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
|
||||
|
||||
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
|
||||
const __m128 xx11 = _mm_mul_ps(
|
||||
wk1iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
|
||||
_MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx12 = _mm_add_ps(xx10, xx11);
|
||||
|
||||
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
|
||||
const __m128 xx21 = _mm_mul_ps(
|
||||
wk3iv,
|
||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
|
||||
_MM_SHUFFLE(2, 3, 0, 1))));
|
||||
const __m128 xx22 = _mm_add_ps(xx20, xx21);
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 32],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 48],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 40],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
|
||||
_mm_storel_epi64(
|
||||
(__m128i*)&a[j0 + 56],
|
||||
_mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rftfsub_128_SSE2(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 mm_half = _mm_load_ps(k_half);
|
||||
|
||||
// Vectorized code (four at once).
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const __m128 wkr_ =
|
||||
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
|
||||
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
|
||||
// Load and shuffle 'a'.
|
||||
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
|
||||
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
|
||||
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
|
||||
// Calculate 'x'.
|
||||
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
|
||||
// 3-127, 5-125, 7-123, 9-121,
|
||||
// Calculate product into 'y'.
|
||||
// yr = wkr * xr - wki * xi;
|
||||
// yi = wkr * xi + wki * xr;
|
||||
const __m128 a_ = _mm_mul_ps(wkr_, xr_);
|
||||
const __m128 b_ = _mm_mul_ps(wki_, xi_);
|
||||
const __m128 c_ = _mm_mul_ps(wkr_, xi_);
|
||||
const __m128 d_ = _mm_mul_ps(wki_, xr_);
|
||||
const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
|
||||
const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
|
||||
// Update 'a'.
|
||||
// a[j2 + 0] -= yr;
|
||||
// a[j2 + 1] -= yi;
|
||||
// a[k2 + 0] += yr;
|
||||
// a[k2 + 1] -= yi;
|
||||
const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121,
|
||||
// Shuffle in right order and store.
|
||||
const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
|
||||
// 2, 3, 4, 5,
|
||||
const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
|
||||
// 6, 7, 8, 9,
|
||||
const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 122, 123, 120, 121,
|
||||
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 126, 127, 124, 125,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(
|
||||
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(
|
||||
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
|
||||
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
|
||||
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
|
||||
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
|
||||
_mm_storeu_ps(&a[126 - j2], a_k2_4n);
|
||||
}
|
||||
// Scalar code for the remaining items.
|
||||
for (; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr - wki * xi;
|
||||
yi = wkr * xi + wki * xr;
|
||||
a[j2 + 0] -= yr;
|
||||
a[j2 + 1] -= yi;
|
||||
a[k2 + 0] += yr;
|
||||
a[k2 + 1] -= yi;
|
||||
}
|
||||
}
|
||||
|
||||
static void rftbsub_128_SSE2(float* a) {
|
||||
const float* c = rdft_w + 32;
|
||||
int j1, j2, k1, k2;
|
||||
float wkr, wki, xr, xi, yr, yi;
|
||||
|
||||
static const ALIGN16_BEG float ALIGN16_END
|
||||
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
const __m128 mm_half = _mm_load_ps(k_half);
|
||||
|
||||
a[1] = -a[1];
|
||||
// Vectorized code (four at once).
|
||||
// Note: commented number are indexes for the first iteration of the loop.
|
||||
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
|
||||
// Load 'wk'.
|
||||
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
|
||||
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
|
||||
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
|
||||
const __m128 wkr_ =
|
||||
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
|
||||
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
|
||||
// Load and shuffle 'a'.
|
||||
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
|
||||
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
|
||||
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
|
||||
const __m128 a_j2_p0 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1 = _mm_shuffle_ps(
|
||||
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1 = _mm_shuffle_ps(
|
||||
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
|
||||
// Calculate 'x'.
|
||||
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
|
||||
// 2-126, 4-124, 6-122, 8-120,
|
||||
const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
|
||||
// 3-127, 5-125, 7-123, 9-121,
|
||||
// Calculate product into 'y'.
|
||||
// yr = wkr * xr + wki * xi;
|
||||
// yi = wkr * xi - wki * xr;
|
||||
const __m128 a_ = _mm_mul_ps(wkr_, xr_);
|
||||
const __m128 b_ = _mm_mul_ps(wki_, xi_);
|
||||
const __m128 c_ = _mm_mul_ps(wkr_, xi_);
|
||||
const __m128 d_ = _mm_mul_ps(wki_, xr_);
|
||||
const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
|
||||
const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
|
||||
// Update 'a'.
|
||||
// a[j2 + 0] = a[j2 + 0] - yr;
|
||||
// a[j2 + 1] = yi - a[j2 + 1];
|
||||
// a[k2 + 0] = yr + a[k2 + 0];
|
||||
// a[k2 + 1] = yi - a[k2 + 1];
|
||||
const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
|
||||
const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9,
|
||||
const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120,
|
||||
const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121,
|
||||
// Shuffle in right order and store.
|
||||
const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
|
||||
// 2, 3, 4, 5,
|
||||
const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
|
||||
// 6, 7, 8, 9,
|
||||
const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 122, 123, 120, 121,
|
||||
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
|
||||
// 126, 127, 124, 125,
|
||||
const __m128 a_k2_0n = _mm_shuffle_ps(
|
||||
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
|
||||
const __m128 a_k2_4n = _mm_shuffle_ps(
|
||||
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
|
||||
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
|
||||
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
|
||||
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
|
||||
_mm_storeu_ps(&a[126 - j2], a_k2_4n);
|
||||
}
|
||||
// Scalar code for the remaining items.
|
||||
for (; j2 < 64; j1 += 1, j2 += 2) {
|
||||
k2 = 128 - j2;
|
||||
k1 = 32 - j1;
|
||||
wkr = 0.5f - c[k1];
|
||||
wki = c[j1];
|
||||
xr = a[j2 + 0] - a[k2 + 0];
|
||||
xi = a[j2 + 1] + a[k2 + 1];
|
||||
yr = wkr * xr + wki * xi;
|
||||
yi = wkr * xi - wki * xr;
|
||||
a[j2 + 0] = a[j2 + 0] - yr;
|
||||
a[j2 + 1] = yi - a[j2 + 1];
|
||||
a[k2 + 0] = yr + a[k2 + 0];
|
||||
a[k2 + 1] = yi - a[k2 + 1];
|
||||
}
|
||||
a[65] = -a[65];
|
||||
}
|
||||
|
||||
void aec_rdft_init_sse2(void) {
|
||||
cft1st_128 = cft1st_128_SSE2;
|
||||
cftmdl_128 = cftmdl_128_SSE2;
|
||||
rftfsub_128 = rftfsub_128_SSE2;
|
||||
rftbsub_128 = rftbsub_128_SSE2;
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
|
||||
* clock skew by resampling the farend signal.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
enum {
|
||||
kEstimateLengthFrames = 400
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
float buffer[kResamplerBufferSize];
|
||||
float position;
|
||||
|
||||
int deviceSampleRateHz;
|
||||
int skewData[kEstimateLengthFrames];
|
||||
int skewDataIndex;
|
||||
float skewEstimate;
|
||||
} AecResampler;
|
||||
|
||||
static int EstimateSkew(const int* rawSkew,
|
||||
int size,
|
||||
int absLimit,
|
||||
float* skewEst);
|
||||
|
||||
void* WebRtcAec_CreateResampler() {
|
||||
return malloc(sizeof(AecResampler));
|
||||
}
|
||||
|
||||
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
|
||||
AecResampler* obj = (AecResampler*)resampInst;
|
||||
memset(obj->buffer, 0, sizeof(obj->buffer));
|
||||
obj->position = 0.0;
|
||||
|
||||
obj->deviceSampleRateHz = deviceSampleRateHz;
|
||||
memset(obj->skewData, 0, sizeof(obj->skewData));
|
||||
obj->skewDataIndex = 0;
|
||||
obj->skewEstimate = 0.0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void WebRtcAec_FreeResampler(void* resampInst) {
|
||||
AecResampler* obj = (AecResampler*)resampInst;
|
||||
free(obj);
|
||||
}
|
||||
|
||||
void WebRtcAec_ResampleLinear(void* resampInst,
|
||||
const float* inspeech,
|
||||
size_t size,
|
||||
float skew,
|
||||
float* outspeech,
|
||||
size_t* size_out) {
|
||||
AecResampler* obj = (AecResampler*)resampInst;
|
||||
|
||||
float* y;
|
||||
float be, tnew;
|
||||
size_t tn, mm;
|
||||
|
||||
assert(size <= 2 * FRAME_LEN);
|
||||
assert(resampInst != NULL);
|
||||
assert(inspeech != NULL);
|
||||
assert(outspeech != NULL);
|
||||
assert(size_out != NULL);
|
||||
|
||||
// Add new frame data in lookahead
|
||||
memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
|
||||
inspeech,
|
||||
size * sizeof(inspeech[0]));
|
||||
|
||||
// Sample rate ratio
|
||||
be = 1 + skew;
|
||||
|
||||
// Loop over input frame
|
||||
mm = 0;
|
||||
y = &obj->buffer[FRAME_LEN]; // Point at current frame
|
||||
|
||||
tnew = be * mm + obj->position;
|
||||
tn = (size_t)tnew;
|
||||
|
||||
while (tn < size) {
|
||||
|
||||
// Interpolation
|
||||
outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
|
||||
mm++;
|
||||
|
||||
tnew = be * mm + obj->position;
|
||||
tn = (int)tnew;
|
||||
}
|
||||
|
||||
*size_out = mm;
|
||||
obj->position += (*size_out) * be - size;
|
||||
|
||||
// Shift buffer
|
||||
memmove(obj->buffer,
|
||||
&obj->buffer[size],
|
||||
(kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
|
||||
}
|
||||
|
||||
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
|
||||
AecResampler* obj = (AecResampler*)resampInst;
|
||||
int err = 0;
|
||||
|
||||
if (obj->skewDataIndex < kEstimateLengthFrames) {
|
||||
obj->skewData[obj->skewDataIndex] = rawSkew;
|
||||
obj->skewDataIndex++;
|
||||
} else if (obj->skewDataIndex == kEstimateLengthFrames) {
|
||||
err = EstimateSkew(
|
||||
obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
|
||||
obj->skewEstimate = *skewEst;
|
||||
obj->skewDataIndex++;
|
||||
} else {
|
||||
*skewEst = obj->skewEstimate;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int EstimateSkew(const int* rawSkew,
|
||||
int size,
|
||||
int deviceSampleRateHz,
|
||||
float* skewEst) {
|
||||
const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
|
||||
const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
|
||||
int i = 0;
|
||||
int n = 0;
|
||||
float rawAvg = 0;
|
||||
float err = 0;
|
||||
float rawAbsDev = 0;
|
||||
int upperLimit = 0;
|
||||
int lowerLimit = 0;
|
||||
float cumSum = 0;
|
||||
float x = 0;
|
||||
float x2 = 0;
|
||||
float y = 0;
|
||||
float xy = 0;
|
||||
float xAvg = 0;
|
||||
float denom = 0;
|
||||
float skew = 0;
|
||||
|
||||
*skewEst = 0; // Set in case of error below.
|
||||
for (i = 0; i < size; i++) {
|
||||
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
|
||||
n++;
|
||||
rawAvg += rawSkew[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 0) {
|
||||
return -1;
|
||||
}
|
||||
assert(n > 0);
|
||||
rawAvg /= n;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
|
||||
err = rawSkew[i] - rawAvg;
|
||||
rawAbsDev += err >= 0 ? err : -err;
|
||||
}
|
||||
}
|
||||
assert(n > 0);
|
||||
rawAbsDev /= n;
|
||||
upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling.
|
||||
lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor.
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < size; i++) {
|
||||
if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
|
||||
(rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
|
||||
n++;
|
||||
cumSum += rawSkew[i];
|
||||
x += n;
|
||||
x2 += n * n;
|
||||
y += cumSum;
|
||||
xy += n * cumSum;
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 0) {
|
||||
return -1;
|
||||
}
|
||||
assert(n > 0);
|
||||
xAvg = x / n;
|
||||
denom = x2 - xAvg * x;
|
||||
|
||||
if (denom != 0) {
|
||||
skew = (xy - xAvg * y) / denom;
|
||||
}
|
||||
|
||||
*skewEst = skew;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
enum {
|
||||
kResamplingDelay = 1
|
||||
};
|
||||
enum {
|
||||
kResamplerBufferSize = FRAME_LEN * 4
|
||||
};
|
||||
|
||||
// Unless otherwise specified, functions return 0 on success and -1 on error.
|
||||
void* WebRtcAec_CreateResampler(); // Returns NULL on error.
|
||||
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
|
||||
void WebRtcAec_FreeResampler(void* resampInst);
|
||||
|
||||
// Estimates skew from raw measurement.
|
||||
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
|
||||
|
||||
// Resamples input using linear interpolation.
|
||||
void WebRtcAec_ResampleLinear(void* resampInst,
|
||||
const float* inspeech,
|
||||
size_t size,
|
||||
float skew,
|
||||
float* outspeech,
|
||||
size_t* size_out);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
|
923
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
vendored
Normal file
923
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation.c
vendored
Normal file
|
@ -0,0 +1,923 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Contains the API functions for the AEC.
|
||||
*/
|
||||
#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
|
||||
|
||||
#include <math.h>
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
|
||||
#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Measured delays [ms]
|
||||
// Device Chrome GTP
|
||||
// MacBook Air 10
|
||||
// MacBook Retina 10 100
|
||||
// MacPro 30?
|
||||
//
|
||||
// Win7 Desktop 70 80?
|
||||
// Win7 T430s 110
|
||||
// Win8 T420s 70
|
||||
//
|
||||
// Daisy 50
|
||||
// Pixel (w/ preproc?) 240
|
||||
// Pixel (w/o preproc?) 110 110
|
||||
|
||||
// The extended filter mode gives us the flexibility to ignore the system's
|
||||
// reported delays. We do this for platforms which we believe provide results
|
||||
// which are incompatible with the AEC's expectations. Based on measurements
|
||||
// (some provided above) we set a conservative (i.e. lower than measured)
|
||||
// fixed delay.
|
||||
//
|
||||
// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode|
|
||||
// is enabled. See the note along with |DelayCorrection| in
|
||||
// echo_cancellation_impl.h for more details on the mode.
|
||||
//
|
||||
// Justification:
|
||||
// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays
|
||||
// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms
|
||||
// and then compensate by rewinding by 10 ms (in wideband) through
|
||||
// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind
|
||||
// values, but fortunately this is sufficient.
|
||||
//
|
||||
// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond
|
||||
// well to reality. The variance doesn't match the AEC's buffer changes, and the
|
||||
// bulk values tend to be too low. However, the range across different hardware
|
||||
// appears to be too large to choose a single value.
|
||||
//
|
||||
// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values.
|
||||
#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC)
|
||||
#define WEBRTC_UNTRUSTED_DELAY
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC)
|
||||
static const int kDelayDiffOffsetSamples = -160;
|
||||
#else
|
||||
// Not enabled for now.
|
||||
static const int kDelayDiffOffsetSamples = 0;
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_MAC)
|
||||
static const int kFixedDelayMs = 20;
|
||||
#else
|
||||
static const int kFixedDelayMs = 50;
|
||||
#endif
|
||||
#if !defined(WEBRTC_UNTRUSTED_DELAY)
|
||||
static const int kMinTrustedDelayMs = 20;
|
||||
#endif
|
||||
static const int kMaxTrustedDelayMs = 500;
|
||||
|
||||
// Maximum length of resampled signal. Must be an integer multiple of frames
|
||||
// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
|
||||
// The factor of 2 handles wb, and the + 1 is as a safety margin
|
||||
// TODO(bjornv): Replace with kResamplerBufferSize
|
||||
#define MAX_RESAMP_LEN (5 * FRAME_LEN)
|
||||
|
||||
static const int kMaxBufSizeStart = 62; // In partitions
|
||||
static const int sampMsNb = 8; // samples per ms in nb
|
||||
static const int initCheck = 42;
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
int webrtc_aec_instance_count = 0;
|
||||
#endif
|
||||
|
||||
// Estimates delay to set the position of the far-end buffer read pointer
|
||||
// (controlled by knownDelay)
|
||||
static void EstBufDelayNormal(Aec* aecInst);
|
||||
static void EstBufDelayExtended(Aec* aecInst);
|
||||
static int ProcessNormal(Aec* self,
|
||||
const float* const* near,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew);
|
||||
static void ProcessExtended(Aec* self,
|
||||
const float* const* near,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew);
|
||||
|
||||
void* WebRtcAec_Create() {
|
||||
Aec* aecpc = malloc(sizeof(Aec));
|
||||
|
||||
if (!aecpc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aecpc->aec = WebRtcAec_CreateAec();
|
||||
if (!aecpc->aec) {
|
||||
WebRtcAec_Free(aecpc);
|
||||
return NULL;
|
||||
}
|
||||
aecpc->resampler = WebRtcAec_CreateResampler();
|
||||
if (!aecpc->resampler) {
|
||||
WebRtcAec_Free(aecpc);
|
||||
return NULL;
|
||||
}
|
||||
// Create far-end pre-buffer. The buffer size has to be large enough for
|
||||
// largest possible drift compensation (kResamplerBufferSize) + "almost" an
|
||||
// FFT buffer (PART_LEN2 - 1).
|
||||
aecpc->far_pre_buf =
|
||||
WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
|
||||
if (!aecpc->far_pre_buf) {
|
||||
WebRtcAec_Free(aecpc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aecpc->initFlag = 0;
|
||||
aecpc->lastError = 0;
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
{
|
||||
char filename[64];
|
||||
sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count);
|
||||
aecpc->bufFile = fopen(filename, "wb");
|
||||
sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count);
|
||||
aecpc->skewFile = fopen(filename, "wb");
|
||||
sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count);
|
||||
aecpc->delayFile = fopen(filename, "wb");
|
||||
webrtc_aec_instance_count++;
|
||||
}
|
||||
#endif
|
||||
|
||||
return aecpc;
|
||||
}
|
||||
|
||||
void WebRtcAec_Free(void* aecInst) {
|
||||
Aec* aecpc = aecInst;
|
||||
|
||||
if (aecpc == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
WebRtc_FreeBuffer(aecpc->far_pre_buf);
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
fclose(aecpc->bufFile);
|
||||
fclose(aecpc->skewFile);
|
||||
fclose(aecpc->delayFile);
|
||||
#endif
|
||||
|
||||
WebRtcAec_FreeAec(aecpc->aec);
|
||||
WebRtcAec_FreeResampler(aecpc->resampler);
|
||||
free(aecpc);
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
|
||||
Aec* aecpc = aecInst;
|
||||
AecConfig aecConfig;
|
||||
|
||||
if (sampFreq != 8000 &&
|
||||
sampFreq != 16000 &&
|
||||
sampFreq != 32000 &&
|
||||
sampFreq != 48000) {
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
aecpc->sampFreq = sampFreq;
|
||||
|
||||
if (scSampFreq < 1 || scSampFreq > 96000) {
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
aecpc->scSampFreq = scSampFreq;
|
||||
|
||||
// Initialize echo canceller core
|
||||
if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
|
||||
aecpc->lastError = AEC_UNSPECIFIED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
|
||||
aecpc->lastError = AEC_UNSPECIFIED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebRtc_InitBuffer(aecpc->far_pre_buf);
|
||||
WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap.
|
||||
|
||||
aecpc->initFlag = initCheck; // indicates that initialization has been done
|
||||
|
||||
if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) {
|
||||
aecpc->splitSampFreq = 16000;
|
||||
} else {
|
||||
aecpc->splitSampFreq = sampFreq;
|
||||
}
|
||||
|
||||
aecpc->delayCtr = 0;
|
||||
aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq;
|
||||
// Sampling frequency multiplier (SWB is processed as 160 frame size).
|
||||
aecpc->rate_factor = aecpc->splitSampFreq / 8000;
|
||||
|
||||
aecpc->sum = 0;
|
||||
aecpc->counter = 0;
|
||||
aecpc->checkBuffSize = 1;
|
||||
aecpc->firstVal = 0;
|
||||
|
||||
// We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
|
||||
// but not extended_filter mode.
|
||||
aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
|
||||
!WebRtcAec_delay_agnostic_enabled(aecpc->aec);
|
||||
aecpc->bufSizeStart = 0;
|
||||
aecpc->checkBufSizeCtr = 0;
|
||||
aecpc->msInSndCardBuf = 0;
|
||||
aecpc->filtDelay = -1; // -1 indicates an initialized state.
|
||||
aecpc->timeForDelayChange = 0;
|
||||
aecpc->knownDelay = 0;
|
||||
aecpc->lastDelayDiff = 0;
|
||||
|
||||
aecpc->skewFrCtr = 0;
|
||||
aecpc->resample = kAecFalse;
|
||||
aecpc->highSkewCtr = 0;
|
||||
aecpc->skew = 0;
|
||||
|
||||
aecpc->farend_started = 0;
|
||||
|
||||
// Default settings.
|
||||
aecConfig.nlpMode = kAecNlpModerate;
|
||||
aecConfig.skewMode = kAecFalse;
|
||||
aecConfig.metricsMode = kAecFalse;
|
||||
aecConfig.delay_logging = kAecFalse;
|
||||
|
||||
if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
|
||||
aecpc->lastError = AEC_UNSPECIFIED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// only buffer L band for farend
|
||||
int32_t WebRtcAec_BufferFarend(void* aecInst,
|
||||
const float* farend,
|
||||
size_t nrOfSamples) {
|
||||
Aec* aecpc = aecInst;
|
||||
size_t newNrOfSamples = nrOfSamples;
|
||||
float new_farend[MAX_RESAMP_LEN];
|
||||
const float* farend_ptr = farend;
|
||||
|
||||
if (farend == NULL) {
|
||||
aecpc->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecpc->initFlag != initCheck) {
|
||||
aecpc->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// number of samples == 160 for SWB input
|
||||
if (nrOfSamples != 80 && nrOfSamples != 160) {
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
|
||||
// Resample and get a new number of samples
|
||||
WebRtcAec_ResampleLinear(aecpc->resampler,
|
||||
farend,
|
||||
nrOfSamples,
|
||||
aecpc->skew,
|
||||
new_farend,
|
||||
&newNrOfSamples);
|
||||
farend_ptr = new_farend;
|
||||
}
|
||||
|
||||
aecpc->farend_started = 1;
|
||||
WebRtcAec_SetSystemDelay(
|
||||
aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples);
|
||||
|
||||
// Write the time-domain data to |far_pre_buf|.
|
||||
WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
|
||||
|
||||
// Transform to frequency domain if we have enough data.
|
||||
while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
|
||||
// We have enough data to pass to the FFT, hence read PART_LEN2 samples.
|
||||
{
|
||||
float* ptmp = NULL;
|
||||
float tmp[PART_LEN2];
|
||||
WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
|
||||
WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
WebRtc_WriteBuffer(
|
||||
WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
|
||||
WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_Process(void* aecInst,
|
||||
const float* const* nearend,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew) {
|
||||
Aec* aecpc = aecInst;
|
||||
int32_t retVal = 0;
|
||||
|
||||
if (out == NULL) {
|
||||
aecpc->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecpc->initFlag != initCheck) {
|
||||
aecpc->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// number of samples == 160 for SWB input
|
||||
if (nrOfSamples != 80 && nrOfSamples != 160) {
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (msInSndCardBuf < 0) {
|
||||
msInSndCardBuf = 0;
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
} else if (msInSndCardBuf > kMaxTrustedDelayMs) {
|
||||
// The clamping is now done in ProcessExtended/Normal().
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
}
|
||||
|
||||
// This returns the value of aec->extended_filter_enabled.
|
||||
if (WebRtcAec_extended_filter_enabled(aecpc->aec)) {
|
||||
ProcessExtended(aecpc,
|
||||
nearend,
|
||||
num_bands,
|
||||
out,
|
||||
nrOfSamples,
|
||||
msInSndCardBuf,
|
||||
skew);
|
||||
} else {
|
||||
if (ProcessNormal(aecpc,
|
||||
nearend,
|
||||
num_bands,
|
||||
out,
|
||||
nrOfSamples,
|
||||
msInSndCardBuf,
|
||||
skew) != 0) {
|
||||
retVal = -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
{
|
||||
int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
|
||||
(sampMsNb * aecpc->rate_factor));
|
||||
(void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
|
||||
(void)fwrite(
|
||||
&aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
|
||||
}
|
||||
#endif
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int WebRtcAec_set_config(void* handle, AecConfig config) {
|
||||
Aec* self = (Aec*)handle;
|
||||
if (self->initFlag != initCheck) {
|
||||
self->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
|
||||
self->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
self->skewMode = config.skewMode;
|
||||
|
||||
if (config.nlpMode != kAecNlpConservative &&
|
||||
config.nlpMode != kAecNlpModerate &&
|
||||
config.nlpMode != kAecNlpAggressive) {
|
||||
self->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
|
||||
self->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
|
||||
self->lastError = AEC_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebRtcAec_SetConfigCore(
|
||||
self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_get_echo_status(void* handle, int* status) {
|
||||
Aec* self = (Aec*)handle;
|
||||
if (status == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (self->initFlag != initCheck) {
|
||||
self->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*status = WebRtcAec_echo_state(self->aec);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
|
||||
const float kUpWeight = 0.7f;
|
||||
float dtmp;
|
||||
int stmp;
|
||||
Aec* self = (Aec*)handle;
|
||||
Stats erl;
|
||||
Stats erle;
|
||||
Stats a_nlp;
|
||||
|
||||
if (handle == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (metrics == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (self->initFlag != initCheck) {
|
||||
self->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
|
||||
|
||||
// ERL
|
||||
metrics->erl.instant = (int)erl.instant;
|
||||
|
||||
if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
|
||||
metrics->erl.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->erl.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->erl.max = (int)erl.max;
|
||||
|
||||
if (erl.min < (kOffsetLevel * (-1))) {
|
||||
metrics->erl.min = (int)erl.min;
|
||||
} else {
|
||||
metrics->erl.min = kOffsetLevel;
|
||||
}
|
||||
|
||||
// ERLE
|
||||
metrics->erle.instant = (int)erle.instant;
|
||||
|
||||
if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
|
||||
metrics->erle.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->erle.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->erle.max = (int)erle.max;
|
||||
|
||||
if (erle.min < (kOffsetLevel * (-1))) {
|
||||
metrics->erle.min = (int)erle.min;
|
||||
} else {
|
||||
metrics->erle.min = kOffsetLevel;
|
||||
}
|
||||
|
||||
// RERL
|
||||
if ((metrics->erl.average > kOffsetLevel) &&
|
||||
(metrics->erle.average > kOffsetLevel)) {
|
||||
stmp = metrics->erl.average + metrics->erle.average;
|
||||
} else {
|
||||
stmp = kOffsetLevel;
|
||||
}
|
||||
metrics->rerl.average = stmp;
|
||||
|
||||
// No other statistics needed, but returned for completeness.
|
||||
metrics->rerl.instant = stmp;
|
||||
metrics->rerl.max = stmp;
|
||||
metrics->rerl.min = stmp;
|
||||
|
||||
// A_NLP
|
||||
metrics->aNlp.instant = (int)a_nlp.instant;
|
||||
|
||||
if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
|
||||
// Use a mix between regular average and upper part average.
|
||||
dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
|
||||
metrics->aNlp.average = (int)dtmp;
|
||||
} else {
|
||||
metrics->aNlp.average = kOffsetLevel;
|
||||
}
|
||||
|
||||
metrics->aNlp.max = (int)a_nlp.max;
|
||||
|
||||
if (a_nlp.min < (kOffsetLevel * (-1))) {
|
||||
metrics->aNlp.min = (int)a_nlp.min;
|
||||
} else {
|
||||
metrics->aNlp.min = kOffsetLevel;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WebRtcAec_GetDelayMetrics(void* handle,
|
||||
int* median,
|
||||
int* std,
|
||||
float* fraction_poor_delays) {
|
||||
Aec* self = handle;
|
||||
if (median == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (std == NULL) {
|
||||
self->lastError = AEC_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (self->initFlag != initCheck) {
|
||||
self->lastError = AEC_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
|
||||
fraction_poor_delays) ==
|
||||
-1) {
|
||||
// Logging disabled.
|
||||
self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAec_get_error_code(void* aecInst) {
|
||||
Aec* aecpc = aecInst;
|
||||
return aecpc->lastError;
|
||||
}
|
||||
|
||||
AecCore* WebRtcAec_aec_core(void* handle) {
|
||||
if (!handle) {
|
||||
return NULL;
|
||||
}
|
||||
return ((Aec*)handle)->aec;
|
||||
}
|
||||
|
||||
static int ProcessNormal(Aec* aecpc,
|
||||
const float* const* nearend,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew) {
|
||||
int retVal = 0;
|
||||
size_t i;
|
||||
size_t nBlocks10ms;
|
||||
// Limit resampling to doubling/halving of signal
|
||||
const float minSkewEst = -0.5f;
|
||||
const float maxSkewEst = 1.0f;
|
||||
|
||||
msInSndCardBuf =
|
||||
msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
|
||||
// TODO(andrew): we need to investigate if this +10 is really wanted.
|
||||
msInSndCardBuf += 10;
|
||||
aecpc->msInSndCardBuf = msInSndCardBuf;
|
||||
|
||||
if (aecpc->skewMode == kAecTrue) {
|
||||
if (aecpc->skewFrCtr < 25) {
|
||||
aecpc->skewFrCtr++;
|
||||
} else {
|
||||
retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
|
||||
if (retVal == -1) {
|
||||
aecpc->skew = 0;
|
||||
aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
|
||||
}
|
||||
|
||||
aecpc->skew /= aecpc->sampFactor * nrOfSamples;
|
||||
|
||||
if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
|
||||
aecpc->resample = kAecFalse;
|
||||
} else {
|
||||
aecpc->resample = kAecTrue;
|
||||
}
|
||||
|
||||
if (aecpc->skew < minSkewEst) {
|
||||
aecpc->skew = minSkewEst;
|
||||
} else if (aecpc->skew > maxSkewEst) {
|
||||
aecpc->skew = maxSkewEst;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
(void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor);
|
||||
|
||||
if (aecpc->startup_phase) {
|
||||
for (i = 0; i < num_bands; ++i) {
|
||||
// Only needed if they don't already point to the same place.
|
||||
if (nearend[i] != out[i]) {
|
||||
memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples);
|
||||
}
|
||||
}
|
||||
|
||||
// The AEC is in the start up mode
|
||||
// AEC is disabled until the system delay is OK
|
||||
|
||||
// Mechanism to ensure that the system delay is reasonably stable.
|
||||
if (aecpc->checkBuffSize) {
|
||||
aecpc->checkBufSizeCtr++;
|
||||
// Before we fill up the far-end buffer we require the system delay
|
||||
// to be stable (+/-8 ms) compared to the first value. This
|
||||
// comparison is made during the following 6 consecutive 10 ms
|
||||
// blocks. If it seems to be stable then we start to fill up the
|
||||
// far-end buffer.
|
||||
if (aecpc->counter == 0) {
|
||||
aecpc->firstVal = aecpc->msInSndCardBuf;
|
||||
aecpc->sum = 0;
|
||||
}
|
||||
|
||||
if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) <
|
||||
WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
|
||||
aecpc->sum += aecpc->msInSndCardBuf;
|
||||
aecpc->counter++;
|
||||
} else {
|
||||
aecpc->counter = 0;
|
||||
}
|
||||
|
||||
if (aecpc->counter * nBlocks10ms >= 6) {
|
||||
// The far-end buffer size is determined in partitions of
|
||||
// PART_LEN samples. Use 75% of the average value of the system
|
||||
// delay as buffer size to start with.
|
||||
aecpc->bufSizeStart =
|
||||
WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
|
||||
(4 * aecpc->counter * PART_LEN),
|
||||
kMaxBufSizeStart);
|
||||
// Buffer size has now been determined.
|
||||
aecpc->checkBuffSize = 0;
|
||||
}
|
||||
|
||||
if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
|
||||
// For really bad systems, don't disable the echo canceller for
|
||||
// more than 0.5 sec.
|
||||
aecpc->bufSizeStart = WEBRTC_SPL_MIN(
|
||||
(aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
|
||||
kMaxBufSizeStart);
|
||||
aecpc->checkBuffSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// If |checkBuffSize| changed in the if-statement above.
|
||||
if (!aecpc->checkBuffSize) {
|
||||
// The system delay is now reasonably stable (or has been unstable
|
||||
// for too long). When the far-end buffer is filled with
|
||||
// approximately the same amount of data as reported by the system
|
||||
// we end the startup phase.
|
||||
int overhead_elements =
|
||||
WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart;
|
||||
if (overhead_elements == 0) {
|
||||
// Enable the AEC
|
||||
aecpc->startup_phase = 0;
|
||||
} else if (overhead_elements > 0) {
|
||||
// TODO(bjornv): Do we need a check on how much we actually
|
||||
// moved the read pointer? It should always be possible to move
|
||||
// the pointer |overhead_elements| since we have only added data
|
||||
// to the buffer and no delay compensation nor AEC processing
|
||||
// has been done.
|
||||
WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements);
|
||||
|
||||
// Enable the AEC
|
||||
aecpc->startup_phase = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// AEC is enabled.
|
||||
EstBufDelayNormal(aecpc);
|
||||
|
||||
// Call the AEC.
|
||||
// TODO(bjornv): Re-structure such that we don't have to pass
|
||||
// |aecpc->knownDelay| as input. Change name to something like
|
||||
// |system_buffer_diff|.
|
||||
WebRtcAec_ProcessFrames(aecpc->aec,
|
||||
nearend,
|
||||
num_bands,
|
||||
nrOfSamples,
|
||||
aecpc->knownDelay,
|
||||
out);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
static void ProcessExtended(Aec* self,
|
||||
const float* const* near,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t num_samples,
|
||||
int16_t reported_delay_ms,
|
||||
int32_t skew) {
|
||||
size_t i;
|
||||
const int delay_diff_offset = kDelayDiffOffsetSamples;
|
||||
#if defined(WEBRTC_UNTRUSTED_DELAY)
|
||||
reported_delay_ms = kFixedDelayMs;
|
||||
#else
|
||||
// This is the usual mode where we trust the reported system delay values.
|
||||
// Due to the longer filter, we no longer add 10 ms to the reported delay
|
||||
// to reduce chance of non-causality. Instead we apply a minimum here to avoid
|
||||
// issues with the read pointer jumping around needlessly.
|
||||
reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
|
||||
? kMinTrustedDelayMs
|
||||
: reported_delay_ms;
|
||||
// If the reported delay appears to be bogus, we attempt to recover by using
|
||||
// the measured fixed delay values. We use >= here because higher layers
|
||||
// may already clamp to this maximum value, and we would otherwise not
|
||||
// detect it here.
|
||||
reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
|
||||
? kFixedDelayMs
|
||||
: reported_delay_ms;
|
||||
#endif
|
||||
self->msInSndCardBuf = reported_delay_ms;
|
||||
|
||||
if (!self->farend_started) {
|
||||
for (i = 0; i < num_bands; ++i) {
|
||||
// Only needed if they don't already point to the same place.
|
||||
if (near[i] != out[i]) {
|
||||
memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (self->startup_phase) {
|
||||
// In the extended mode, there isn't a startup "phase", just a special
|
||||
// action on the first frame. In the trusted delay case, we'll take the
|
||||
// current reported delay, unless it's less then our conservative
|
||||
// measurement.
|
||||
int startup_size_ms =
|
||||
reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
|
||||
#if defined(WEBRTC_ANDROID)
|
||||
int target_delay = startup_size_ms * self->rate_factor * 8;
|
||||
#else
|
||||
// To avoid putting the AEC in a non-causal state we're being slightly
|
||||
// conservative and scale by 2. On Android we use a fixed delay and
|
||||
// therefore there is no need to scale the target_delay.
|
||||
int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
|
||||
#endif
|
||||
int overhead_elements =
|
||||
(WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
|
||||
WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
|
||||
self->startup_phase = 0;
|
||||
}
|
||||
|
||||
EstBufDelayExtended(self);
|
||||
|
||||
{
|
||||
// |delay_diff_offset| gives us the option to manually rewind the delay on
|
||||
// very low delay platforms which can't be expressed purely through
|
||||
// |reported_delay_ms|.
|
||||
const int adjusted_known_delay =
|
||||
WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
|
||||
|
||||
WebRtcAec_ProcessFrames(self->aec,
|
||||
near,
|
||||
num_bands,
|
||||
num_samples,
|
||||
adjusted_known_delay,
|
||||
out);
|
||||
}
|
||||
}
|
||||
|
||||
static void EstBufDelayNormal(Aec* aecpc) {
|
||||
int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor;
|
||||
int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec);
|
||||
int delay_difference = 0;
|
||||
|
||||
// Before we proceed with the delay estimate filtering we:
|
||||
// 1) Compensate for the frame that will be read.
|
||||
// 2) Compensate for drift resampling.
|
||||
// 3) Compensate for non-causality if needed, since the estimated delay can't
|
||||
// be negative.
|
||||
|
||||
// 1) Compensating for the frame(s) that will be read/processed.
|
||||
current_delay += FRAME_LEN * aecpc->rate_factor;
|
||||
|
||||
// 2) Account for resampling frame delay.
|
||||
if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
|
||||
current_delay -= kResamplingDelay;
|
||||
}
|
||||
|
||||
// 3) Compensate for non-causality, if needed, by flushing one block.
|
||||
if (current_delay < PART_LEN) {
|
||||
current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN;
|
||||
}
|
||||
|
||||
// We use -1 to signal an initialized state in the "extended" implementation;
|
||||
// compensate for that.
|
||||
aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
|
||||
aecpc->filtDelay =
|
||||
WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
|
||||
|
||||
delay_difference = aecpc->filtDelay - aecpc->knownDelay;
|
||||
if (delay_difference > 224) {
|
||||
if (aecpc->lastDelayDiff < 96) {
|
||||
aecpc->timeForDelayChange = 0;
|
||||
} else {
|
||||
aecpc->timeForDelayChange++;
|
||||
}
|
||||
} else if (delay_difference < 96 && aecpc->knownDelay > 0) {
|
||||
if (aecpc->lastDelayDiff > 224) {
|
||||
aecpc->timeForDelayChange = 0;
|
||||
} else {
|
||||
aecpc->timeForDelayChange++;
|
||||
}
|
||||
} else {
|
||||
aecpc->timeForDelayChange = 0;
|
||||
}
|
||||
aecpc->lastDelayDiff = delay_difference;
|
||||
|
||||
if (aecpc->timeForDelayChange > 25) {
|
||||
aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void EstBufDelayExtended(Aec* self) {
|
||||
int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor;
|
||||
int current_delay = reported_delay - WebRtcAec_system_delay(self->aec);
|
||||
int delay_difference = 0;
|
||||
|
||||
// Before we proceed with the delay estimate filtering we:
|
||||
// 1) Compensate for the frame that will be read.
|
||||
// 2) Compensate for drift resampling.
|
||||
// 3) Compensate for non-causality if needed, since the estimated delay can't
|
||||
// be negative.
|
||||
|
||||
// 1) Compensating for the frame(s) that will be read/processed.
|
||||
current_delay += FRAME_LEN * self->rate_factor;
|
||||
|
||||
// 2) Account for resampling frame delay.
|
||||
if (self->skewMode == kAecTrue && self->resample == kAecTrue) {
|
||||
current_delay -= kResamplingDelay;
|
||||
}
|
||||
|
||||
// 3) Compensate for non-causality, if needed, by flushing two blocks.
|
||||
if (current_delay < PART_LEN) {
|
||||
current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN;
|
||||
}
|
||||
|
||||
if (self->filtDelay == -1) {
|
||||
self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
|
||||
} else {
|
||||
self->filtDelay = WEBRTC_SPL_MAX(
|
||||
0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
|
||||
}
|
||||
|
||||
delay_difference = self->filtDelay - self->knownDelay;
|
||||
if (delay_difference > 384) {
|
||||
if (self->lastDelayDiff < 128) {
|
||||
self->timeForDelayChange = 0;
|
||||
} else {
|
||||
self->timeForDelayChange++;
|
||||
}
|
||||
} else if (delay_difference < 128 && self->knownDelay > 0) {
|
||||
if (self->lastDelayDiff > 384) {
|
||||
self->timeForDelayChange = 0;
|
||||
} else {
|
||||
self->timeForDelayChange++;
|
||||
}
|
||||
} else {
|
||||
self->timeForDelayChange = 0;
|
||||
}
|
||||
self->lastDelayDiff = delay_difference;
|
||||
|
||||
if (self->timeForDelayChange > 25) {
|
||||
self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
|
||||
}
|
||||
}
|
67
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
vendored
Normal file
67
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
|
||||
typedef struct {
|
||||
int delayCtr;
|
||||
int sampFreq;
|
||||
int splitSampFreq;
|
||||
int scSampFreq;
|
||||
float sampFactor; // scSampRate / sampFreq
|
||||
short skewMode;
|
||||
int bufSizeStart;
|
||||
int knownDelay;
|
||||
int rate_factor;
|
||||
|
||||
short initFlag; // indicates if AEC has been initialized
|
||||
|
||||
// Variables used for averaging far end buffer size
|
||||
short counter;
|
||||
int sum;
|
||||
short firstVal;
|
||||
short checkBufSizeCtr;
|
||||
|
||||
// Variables used for delay shifts
|
||||
short msInSndCardBuf;
|
||||
short filtDelay; // Filtered delay estimate.
|
||||
int timeForDelayChange;
|
||||
int startup_phase;
|
||||
int checkBuffSize;
|
||||
short lastDelayDiff;
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
FILE* bufFile;
|
||||
FILE* delayFile;
|
||||
FILE* skewFile;
|
||||
#endif
|
||||
|
||||
// Structures
|
||||
void* resampler;
|
||||
|
||||
int skewFrCtr;
|
||||
int resample; // if the skew is small enough we don't resample
|
||||
int highSkewCtr;
|
||||
float skew;
|
||||
|
||||
RingBuffer* far_pre_buf; // Time domain far-end pre-buffer.
|
||||
|
||||
int lastError;
|
||||
|
||||
int farend_started;
|
||||
|
||||
AecCore* aec;
|
||||
} Aec;
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
|
48
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
vendored
Normal file
48
third_party/webrtc/src/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// TODO(bjornv): Make this a comprehensive test.
|
||||
|
||||
#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
extern "C" {
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
}
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) {
|
||||
void* handle = WebRtcAec_Create();
|
||||
ASSERT_TRUE(handle);
|
||||
WebRtcAec_Free(nullptr);
|
||||
WebRtcAec_Free(handle);
|
||||
}
|
||||
|
||||
TEST(EchoCancellationTest, ApplyAecCoreHandle) {
|
||||
void* handle = WebRtcAec_Create();
|
||||
ASSERT_TRUE(handle);
|
||||
EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL);
|
||||
AecCore* aec_core = WebRtcAec_aec_core(handle);
|
||||
EXPECT_TRUE(aec_core != NULL);
|
||||
// A simple test to verify that we can set and get a value from the lower
|
||||
// level |aec_core| handle.
|
||||
int delay = 111;
|
||||
WebRtcAec_SetSystemDelay(aec_core, delay);
|
||||
EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core));
|
||||
WebRtcAec_Free(handle);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
245
third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
vendored
Normal file
245
third_party/webrtc/src/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
vendored
Normal file
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Errors
|
||||
#define AEC_UNSPECIFIED_ERROR 12000
|
||||
#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
|
||||
#define AEC_UNINITIALIZED_ERROR 12002
|
||||
#define AEC_NULL_POINTER_ERROR 12003
|
||||
#define AEC_BAD_PARAMETER_ERROR 12004
|
||||
|
||||
// Warnings
|
||||
#define AEC_BAD_PARAMETER_WARNING 12050
|
||||
|
||||
enum {
|
||||
kAecNlpConservative = 0,
|
||||
kAecNlpModerate,
|
||||
kAecNlpAggressive
|
||||
};
|
||||
|
||||
enum {
|
||||
kAecFalse = 0,
|
||||
kAecTrue
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int16_t nlpMode; // default kAecNlpModerate
|
||||
int16_t skewMode; // default kAecFalse
|
||||
int16_t metricsMode; // default kAecFalse
|
||||
int delay_logging; // default kAecFalse
|
||||
// float realSkew;
|
||||
} AecConfig;
|
||||
|
||||
typedef struct {
|
||||
int instant;
|
||||
int average;
|
||||
int max;
|
||||
int min;
|
||||
} AecLevel;
|
||||
|
||||
typedef struct {
|
||||
AecLevel rerl;
|
||||
AecLevel erl;
|
||||
AecLevel erle;
|
||||
AecLevel aNlp;
|
||||
} AecMetrics;
|
||||
|
||||
struct AecCore;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocates the memory needed by the AEC. The memory needs to be initialized
|
||||
* separately using the WebRtcAec_Init() function. Returns a pointer to the
|
||||
* object or NULL on error.
|
||||
*/
|
||||
void* WebRtcAec_Create();
|
||||
|
||||
/*
|
||||
* This function releases the memory allocated by WebRtcAec_Create().
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecInst Pointer to the AEC instance
|
||||
*/
|
||||
void WebRtcAec_Free(void* aecInst);
|
||||
|
||||
/*
|
||||
* Initializes an AEC instance.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecInst Pointer to the AEC instance
|
||||
* int32_t sampFreq Sampling frequency of data
|
||||
* int32_t scSampFreq Soundcard sampling frequency
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
|
||||
|
||||
/*
|
||||
* Inserts an 80 or 160 sample block of data into the farend buffer.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecInst Pointer to the AEC instance
|
||||
* const float* farend In buffer containing one frame of
|
||||
* farend signal for L band
|
||||
* int16_t nrOfSamples Number of samples in farend buffer
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_BufferFarend(void* aecInst,
|
||||
const float* farend,
|
||||
size_t nrOfSamples);
|
||||
|
||||
/*
|
||||
* Runs the echo canceller on an 80 or 160 sample blocks of data.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecInst Pointer to the AEC instance
|
||||
* float* const* nearend In buffer containing one frame of
|
||||
* nearend+echo signal for each band
|
||||
* int num_bands Number of bands in nearend buffer
|
||||
* int16_t nrOfSamples Number of samples in nearend buffer
|
||||
* int16_t msInSndCardBuf Delay estimate for sound card and
|
||||
* system buffers
|
||||
* int16_t skew Difference between number of samples played
|
||||
* and recorded at the soundcard (for clock skew
|
||||
* compensation)
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* float* const* out Out buffer, one frame of processed nearend
|
||||
* for each band
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAec_Process(void* aecInst,
|
||||
const float* const* nearend,
|
||||
size_t num_bands,
|
||||
float* const* out,
|
||||
size_t nrOfSamples,
|
||||
int16_t msInSndCardBuf,
|
||||
int32_t skew);
|
||||
|
||||
/*
|
||||
* This function enables the user to set certain parameters on-the-fly.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* handle Pointer to the AEC instance
|
||||
* AecConfig config Config instance that contains all
|
||||
* properties to be set
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int WebRtcAec_set_config(void* handle, AecConfig config);
|
||||
|
||||
/*
|
||||
* Gets the current echo status of the nearend signal.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* handle Pointer to the AEC instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int* status 0: Almost certainly nearend single-talk
|
||||
* 1: Might not be neared single-talk
|
||||
* int return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int WebRtcAec_get_echo_status(void* handle, int* status);
|
||||
|
||||
/*
|
||||
* Gets the current echo metrics for the session.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* handle Pointer to the AEC instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* AecMetrics* metrics Struct which will be filled out with the
|
||||
* current echo metrics.
|
||||
* int return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
|
||||
|
||||
/*
|
||||
* Gets the current delay metrics for the session.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* handle Pointer to the AEC instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int* median Delay median value.
|
||||
* int* std Delay standard deviation.
|
||||
* float* fraction_poor_delays Fraction of the delay estimates that may
|
||||
* cause the AEC to perform poorly.
|
||||
*
|
||||
* int return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int WebRtcAec_GetDelayMetrics(void* handle,
|
||||
int* median,
|
||||
int* std,
|
||||
float* fraction_poor_delays);
|
||||
|
||||
/*
|
||||
* Gets the last error code.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecInst Pointer to the AEC instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 11000-11100: error code
|
||||
*/
|
||||
int32_t WebRtcAec_get_error_code(void* aecInst);
|
||||
|
||||
// Returns a pointer to the low level AEC handle.
|
||||
//
|
||||
// Input:
|
||||
// - handle : Pointer to the AEC instance.
|
||||
//
|
||||
// Return value:
|
||||
// - AecCore pointer : NULL for error.
|
||||
//
|
||||
struct AecCore* WebRtcAec_aec_core(void* handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
|
602
third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
vendored
Normal file
602
third_party/webrtc/src/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
vendored
Normal file
|
@ -0,0 +1,602 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "webrtc/modules/audio_processing/aec/aec_core.h"
|
||||
}
|
||||
#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
|
||||
#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
|
||||
#include "webrtc/test/testsupport/gtest_disable.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class SystemDelayTest : public ::testing::Test {
|
||||
protected:
|
||||
SystemDelayTest();
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
// Initialization of AEC handle with respect to |sample_rate_hz|. Since the
|
||||
// device sample rate is unimportant we set that value to 48000 Hz.
|
||||
void Init(int sample_rate_hz);
|
||||
|
||||
// Makes one render call and one capture call in that specific order.
|
||||
void RenderAndCapture(int device_buffer_ms);
|
||||
|
||||
// Fills up the far-end buffer with respect to the default device buffer size.
|
||||
size_t BufferFillUp();
|
||||
|
||||
// Runs and verifies the behavior in a stable startup procedure.
|
||||
void RunStableStartup();
|
||||
|
||||
// Maps buffer size in ms into samples, taking the unprocessed frame into
|
||||
// account.
|
||||
int MapBufferSizeToSamples(int size_in_ms, bool extended_filter);
|
||||
|
||||
void* handle_;
|
||||
Aec* self_;
|
||||
size_t samples_per_frame_;
|
||||
// Dummy input/output speech data.
|
||||
static const int kSamplesPerChunk = 160;
|
||||
float far_[kSamplesPerChunk];
|
||||
float near_[kSamplesPerChunk];
|
||||
float out_[kSamplesPerChunk];
|
||||
const float* near_ptr_;
|
||||
float* out_ptr_;
|
||||
};
|
||||
|
||||
SystemDelayTest::SystemDelayTest()
|
||||
: handle_(NULL), self_(NULL), samples_per_frame_(0) {
|
||||
// Dummy input data are set with more or less arbitrary non-zero values.
|
||||
for (int i = 0; i < kSamplesPerChunk; i++) {
|
||||
far_[i] = 257.0;
|
||||
near_[i] = 514.0;
|
||||
}
|
||||
memset(out_, 0, sizeof(out_));
|
||||
near_ptr_ = near_;
|
||||
out_ptr_ = out_;
|
||||
}
|
||||
|
||||
void SystemDelayTest::SetUp() {
|
||||
handle_ = WebRtcAec_Create();
|
||||
ASSERT_TRUE(handle_);
|
||||
self_ = reinterpret_cast<Aec*>(handle_);
|
||||
}
|
||||
|
||||
void SystemDelayTest::TearDown() {
|
||||
// Free AEC
|
||||
WebRtcAec_Free(handle_);
|
||||
handle_ = NULL;
|
||||
}
|
||||
|
||||
// In SWB mode nothing is added to the buffer handling with respect to
|
||||
// functionality compared to WB. We therefore only verify behavior in NB and WB.
|
||||
static const int kSampleRateHz[] = {8000, 16000};
|
||||
static const size_t kNumSampleRates =
|
||||
sizeof(kSampleRateHz) / sizeof(*kSampleRateHz);
|
||||
|
||||
// Default audio device buffer size used.
|
||||
static const int kDeviceBufMs = 100;
|
||||
|
||||
// Requirement for a stable device convergence time in ms. Should converge in
|
||||
// less than |kStableConvergenceMs|.
|
||||
static const int kStableConvergenceMs = 100;
|
||||
|
||||
// Maximum convergence time in ms. This means that we should leave the startup
|
||||
// phase after |kMaxConvergenceMs| independent of device buffer stability
|
||||
// conditions.
|
||||
static const int kMaxConvergenceMs = 500;
|
||||
|
||||
void SystemDelayTest::Init(int sample_rate_hz) {
|
||||
// Initialize AEC
|
||||
EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000));
|
||||
EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// One frame equals 10 ms of data.
|
||||
samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100);
|
||||
}
|
||||
|
||||
void SystemDelayTest::RenderAndCapture(int device_buffer_ms) {
|
||||
EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_Process(handle_,
|
||||
&near_ptr_,
|
||||
1,
|
||||
&out_ptr_,
|
||||
samples_per_frame_,
|
||||
device_buffer_ms,
|
||||
0));
|
||||
}
|
||||
|
||||
size_t SystemDelayTest::BufferFillUp() {
|
||||
// To make sure we have a full buffer when we verify stability we first fill
|
||||
// up the far-end buffer with the same amount as we will report in through
|
||||
// Process().
|
||||
size_t buffer_size = 0;
|
||||
for (int i = 0; i < kDeviceBufMs / 10; i++) {
|
||||
EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
|
||||
buffer_size += samples_per_frame_;
|
||||
EXPECT_EQ(static_cast<int>(buffer_size),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
void SystemDelayTest::RunStableStartup() {
|
||||
// To make sure we have a full buffer when we verify stability we first fill
|
||||
// up the far-end buffer with the same amount as we will report in through
|
||||
// Process().
|
||||
size_t buffer_size = BufferFillUp();
|
||||
|
||||
if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) {
|
||||
// In extended_filter mode we set the buffer size after the first processed
|
||||
// 10 ms chunk. Hence, we don't need to wait for the reported system delay
|
||||
// values to become stable.
|
||||
RenderAndCapture(kDeviceBufMs);
|
||||
buffer_size += samples_per_frame_;
|
||||
EXPECT_EQ(0, self_->startup_phase);
|
||||
} else {
|
||||
// A stable device should be accepted and put in a regular process mode
|
||||
// within |kStableConvergenceMs|.
|
||||
int process_time_ms = 0;
|
||||
for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
|
||||
RenderAndCapture(kDeviceBufMs);
|
||||
buffer_size += samples_per_frame_;
|
||||
if (self_->startup_phase == 0) {
|
||||
// We have left the startup phase.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Verify convergence time.
|
||||
EXPECT_GT(kStableConvergenceMs, process_time_ms);
|
||||
}
|
||||
// Verify that the buffer has been flushed.
|
||||
EXPECT_GE(static_cast<int>(buffer_size),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
|
||||
int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms,
|
||||
bool extended_filter) {
|
||||
// If extended_filter is disabled we add an extra 10 ms for the unprocessed
|
||||
// frame. That is simply how the algorithm is constructed.
|
||||
return static_cast<int>(
|
||||
(size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10);
|
||||
}
|
||||
|
||||
// The tests should meet basic requirements and not be adjusted to what is
|
||||
// actually implemented. If we don't get good code coverage this way we either
|
||||
// lack in tests or have unnecessary code.
|
||||
// General requirements:
|
||||
// 1) If we add far-end data the system delay should be increased with the same
|
||||
// amount we add.
|
||||
// 2) If the far-end buffer is full we should flush the oldest data to make room
|
||||
// for the new. In this case the system delay is unaffected.
|
||||
// 3) There should exist a startup phase in which the buffer size is to be
|
||||
// determined. In this phase no cancellation should be performed.
|
||||
// 4) Under stable conditions (small variations in device buffer sizes) the AEC
|
||||
// should determine an appropriate local buffer size within
|
||||
// |kStableConvergenceMs| ms.
|
||||
// 5) Under unstable conditions the AEC should make a decision within
|
||||
// |kMaxConvergenceMs| ms.
|
||||
// 6) If the local buffer runs out of data we should stuff the buffer with older
|
||||
// frames.
|
||||
// 7) The system delay should within |kMaxConvergenceMs| ms heal from
|
||||
// disturbances like drift, data glitches, toggling events and outliers.
|
||||
// 8) The system delay should never become negative.
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) {
|
||||
// When we add data to the AEC buffer the internal system delay should be
|
||||
// incremented with the same amount as the size of data.
|
||||
// This process should be independent of DA-AEC and extended_filter mode.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
// Loop through a couple of calls to make sure the system delay
|
||||
// increments correctly.
|
||||
for (int j = 1; j <= 5; j++) {
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
|
||||
EXPECT_EQ(static_cast<int>(j * samples_per_frame_),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full
|
||||
// when adding new data.
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) {
|
||||
// We run the system in a stable startup. After that we verify that the system
|
||||
// delay meets the requirements.
|
||||
// This process should be independent of DA-AEC and extended_filter mode.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
|
||||
// Verify system delay with respect to requirements, i.e., the
|
||||
// |system_delay| is in the interval [75%, 100%] of what's reported on
|
||||
// the average.
|
||||
// In extended_filter mode we target 50% and measure after one processed
|
||||
// 10 ms chunk.
|
||||
int average_reported_delay =
|
||||
static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10);
|
||||
EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec));
|
||||
int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec)
|
||||
? average_reported_delay / 2 - samples_per_frame_
|
||||
: average_reported_delay * 3 / 4;
|
||||
EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) {
|
||||
// This test does not apply in extended_filter mode, since we only use the
|
||||
// the first 10 ms chunk to determine a reasonable buffer size. Neither does
|
||||
// it apply if DA-AEC is on because that overrides the startup procedure.
|
||||
WebRtcAec_enable_extended_filter(self_->aec, 0);
|
||||
EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, 0);
|
||||
EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
|
||||
// In an unstable system we would start processing after |kMaxConvergenceMs|.
|
||||
// On the last frame the AEC buffer is adjusted to 60% of the last reported
|
||||
// device buffer size.
|
||||
// We construct an unstable system by altering the device buffer size between
|
||||
// two values |kDeviceBufMs| +- 25 ms.
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
|
||||
// To make sure we have a full buffer when we verify stability we first fill
|
||||
// up the far-end buffer with the same amount as we will report in on the
|
||||
// average through Process().
|
||||
size_t buffer_size = BufferFillUp();
|
||||
|
||||
int buffer_offset_ms = 25;
|
||||
int reported_delay_ms = 0;
|
||||
int process_time_ms = 0;
|
||||
for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
|
||||
reported_delay_ms = kDeviceBufMs + buffer_offset_ms;
|
||||
RenderAndCapture(reported_delay_ms);
|
||||
buffer_size += samples_per_frame_;
|
||||
buffer_offset_ms = -buffer_offset_ms;
|
||||
if (self_->startup_phase == 0) {
|
||||
// We have left the startup phase.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Verify convergence time.
|
||||
EXPECT_GE(kMaxConvergenceMs, process_time_ms);
|
||||
// Verify that the buffer has been flushed.
|
||||
EXPECT_GE(static_cast<int>(buffer_size),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Verify system delay with respect to requirements, i.e., the
|
||||
// |system_delay| is in the interval [60%, 100%] of what's last reported.
|
||||
EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
EXPECT_LE(
|
||||
static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) {
|
||||
// This test does not apply in extended_filter mode, since we only use the
|
||||
// the first 10 ms chunk to determine a reasonable buffer size. Neither does
|
||||
// it apply if DA-AEC is on because that overrides the startup procedure.
|
||||
WebRtcAec_enable_extended_filter(self_->aec, 0);
|
||||
EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, 0);
|
||||
EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
|
||||
// In this test we start by establishing the device buffer size during stable
|
||||
// conditions, but with an empty internal far-end buffer. Once that is done we
|
||||
// verify that the system delay is increased correctly until we have reach an
|
||||
// internal buffer size of 75% of what's been reported.
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
|
||||
// We assume that running |kStableConvergenceMs| calls will put the
|
||||
// algorithm in a state where the device buffer size has been determined. We
|
||||
// can make that assumption since we have a separate stability test.
|
||||
int process_time_ms = 0;
|
||||
for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) {
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_Process(handle_,
|
||||
&near_ptr_,
|
||||
1,
|
||||
&out_ptr_,
|
||||
samples_per_frame_,
|
||||
kDeviceBufMs,
|
||||
0));
|
||||
}
|
||||
// Verify that a buffer size has been established.
|
||||
EXPECT_EQ(0, self_->checkBuffSize);
|
||||
|
||||
// We now have established the required buffer size. Let us verify that we
|
||||
// fill up before leaving the startup phase for normal processing.
|
||||
size_t buffer_size = 0;
|
||||
size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4;
|
||||
process_time_ms = 0;
|
||||
for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) {
|
||||
RenderAndCapture(kDeviceBufMs);
|
||||
buffer_size += samples_per_frame_;
|
||||
if (self_->startup_phase == 0) {
|
||||
// We have left the startup phase.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Verify convergence time.
|
||||
EXPECT_GT(kMaxConvergenceMs, process_time_ms);
|
||||
// Verify that the buffer has reached the desired size.
|
||||
EXPECT_LE(static_cast<int>(target_buffer_size),
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Verify normal behavior (system delay is kept constant) after startup by
|
||||
// running a couple of calls to BufferFarend() and Process().
|
||||
for (int j = 0; j < 6; j++) {
|
||||
int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
|
||||
RenderAndCapture(kDeviceBufMs);
|
||||
EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) {
|
||||
// Here we test a buffer under run scenario. If we keep on calling
|
||||
// WebRtcAec_Process() we will finally run out of data, but should
|
||||
// automatically stuff the buffer. We verify this behavior by checking if the
|
||||
// system delay goes negative.
|
||||
// This process should be independent of DA-AEC and extended_filter mode.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
|
||||
// The AEC has now left the Startup phase. We now have at most
|
||||
// |kStableConvergenceMs| in the buffer. Keep on calling Process() until
|
||||
// we run out of data and verify that the system delay is non-negative.
|
||||
for (int j = 0; j <= kStableConvergenceMs; j += 10) {
|
||||
EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_,
|
||||
samples_per_frame_, kDeviceBufMs, 0));
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectDelayDuringDrift) {
|
||||
// This drift test should verify that the system delay is never exceeding the
|
||||
// device buffer. The drift is simulated by decreasing the reported device
|
||||
// buffer size by 1 ms every 100 ms. If the device buffer size goes below 30
|
||||
// ms we jump (add) 10 ms to give a repeated pattern.
|
||||
|
||||
// This process should be independent of DA-AEC and extended_filter mode.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
|
||||
// We have left the startup phase and proceed with normal processing.
|
||||
int jump = 0;
|
||||
for (int j = 0; j < 1000; j++) {
|
||||
// Drift = -1 ms per 100 ms of data.
|
||||
int device_buf_ms = kDeviceBufMs - (j / 10) + jump;
|
||||
int device_buf = MapBufferSizeToSamples(device_buf_ms,
|
||||
extended_filter == 1);
|
||||
|
||||
if (device_buf_ms < 30) {
|
||||
// Add 10 ms data, taking affect next frame.
|
||||
jump += 10;
|
||||
}
|
||||
RenderAndCapture(device_buf_ms);
|
||||
|
||||
// Verify that the system delay does not exceed the device buffer.
|
||||
EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Verify that the system delay is non-negative.
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) {
|
||||
// This glitch test should verify that the system delay recovers if there is
|
||||
// a glitch in data. The data glitch is constructed as 200 ms of buffering
|
||||
// after which the stable procedure continues. The glitch is never reported by
|
||||
// the device.
|
||||
// The system is said to be in a non-causal state if the difference between
|
||||
// the device buffer and system delay is less than a block (64 samples).
|
||||
|
||||
// This process should be independent of DA-AEC and extended_filter mode.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
|
||||
extended_filter == 1);
|
||||
// Glitch state.
|
||||
for (int j = 0; j < 20; j++) {
|
||||
EXPECT_EQ(0,
|
||||
WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_));
|
||||
// No need to verify system delay, since that is done in a separate
|
||||
// test.
|
||||
}
|
||||
// Verify that we are in a non-causal state, i.e.,
|
||||
// |system_delay| > |device_buf|.
|
||||
EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Recover state. Should recover at least 4 ms of data per 10 ms, hence
|
||||
// a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover
|
||||
// from.
|
||||
bool non_causal = true; // We are currently in a non-causal state.
|
||||
for (int j = 0; j < 50; j++) {
|
||||
int system_delay_before = WebRtcAec_system_delay(self_->aec);
|
||||
RenderAndCapture(kDeviceBufMs);
|
||||
int system_delay_after = WebRtcAec_system_delay(self_->aec);
|
||||
// We have recovered if
|
||||
// |device_buf| - |system_delay_after| >= PART_LEN (1 block).
|
||||
// During recovery, |system_delay_after| < |system_delay_before|,
|
||||
// otherwise they are equal.
|
||||
if (non_causal) {
|
||||
EXPECT_LT(system_delay_after, system_delay_before);
|
||||
if (device_buf - system_delay_after >= PART_LEN) {
|
||||
non_causal = false;
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(system_delay_before, system_delay_after);
|
||||
}
|
||||
// Verify that the system delay is non-negative.
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
// Check that we have recovered.
|
||||
EXPECT_FALSE(non_causal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) {
|
||||
// This test does not apply in extended_filter mode, since we only use the
|
||||
// the first 10 ms chunk to determine a reasonable buffer size.
|
||||
const int extended_filter = 0;
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
|
||||
// Should be DA-AEC independent.
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
// This spurious device buffer data test aims at verifying that the system
|
||||
// delay is unaffected by large outliers.
|
||||
// The system is said to be in a non-causal state if the difference between
|
||||
// the device buffer and system delay is less than a block (64 samples).
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
|
||||
extended_filter == 1);
|
||||
|
||||
// Normal state. We are currently not in a non-causal state.
|
||||
bool non_causal = false;
|
||||
|
||||
// Run 1 s and replace device buffer size with 500 ms every 100 ms.
|
||||
for (int j = 0; j < 100; j++) {
|
||||
int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
|
||||
int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs;
|
||||
RenderAndCapture(device_buf_ms);
|
||||
|
||||
// Check for non-causality.
|
||||
if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) {
|
||||
non_causal = true;
|
||||
}
|
||||
EXPECT_FALSE(non_causal);
|
||||
EXPECT_EQ(system_delay_before_calls,
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Verify that the system delay is non-negative.
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) {
|
||||
// This test aims at verifying that the system delay is "unaffected" by
|
||||
// toggling values reported by the device.
|
||||
// The test is constructed such that every other device buffer value is zero
|
||||
// and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The
|
||||
// zero values will force us into a non-causal state and thereby lowering the
|
||||
// system delay until we basically run out of data. Once that happens the
|
||||
// buffer will be stuffed.
|
||||
// TODO(bjornv): This test will have a better impact if we verified that the
|
||||
// delay estimate goes up when the system delay goes down to meet the average
|
||||
// device buffer size.
|
||||
|
||||
// This test does not apply if DA-AEC is enabled and extended_filter mode
|
||||
// disabled.
|
||||
for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) {
|
||||
WebRtcAec_enable_extended_filter(self_->aec, extended_filter);
|
||||
EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec));
|
||||
for (int da_aec = 0; da_aec <= 1; ++da_aec) {
|
||||
WebRtcAec_enable_delay_agnostic(self_->aec, da_aec);
|
||||
EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec));
|
||||
if (extended_filter == 0 && da_aec == 1) {
|
||||
continue;
|
||||
}
|
||||
for (size_t i = 0; i < kNumSampleRates; i++) {
|
||||
Init(kSampleRateHz[i]);
|
||||
RunStableStartup();
|
||||
const int device_buf = MapBufferSizeToSamples(kDeviceBufMs,
|
||||
extended_filter == 1);
|
||||
|
||||
// Normal state. We are currently not in a non-causal state.
|
||||
bool non_causal = false;
|
||||
|
||||
// Loop through 100 frames (both render and capture), which equals 1 s
|
||||
// of data. Every odd frame we set the device buffer size to
|
||||
// 2 * |kDeviceBufMs| and even frames we set the device buffer size to
|
||||
// zero.
|
||||
for (int j = 0; j < 100; j++) {
|
||||
int system_delay_before_calls = WebRtcAec_system_delay(self_->aec);
|
||||
int device_buf_ms = 2 * (j % 2) * kDeviceBufMs;
|
||||
RenderAndCapture(device_buf_ms);
|
||||
|
||||
// Check for non-causality, compared with the average device buffer
|
||||
// size.
|
||||
non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64);
|
||||
EXPECT_GE(system_delay_before_calls,
|
||||
WebRtcAec_system_delay(self_->aec));
|
||||
|
||||
// Verify that the system delay is non-negative.
|
||||
EXPECT_LE(0, WebRtcAec_system_delay(self_->aec));
|
||||
}
|
||||
// Verify we are not in a non-causal state.
|
||||
EXPECT_FALSE(non_causal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Performs echo control (suppression) with fft routines in fixed-point.
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#ifdef _MSC_VER // visual c++
|
||||
#define ALIGN8_BEG __declspec(align(8))
|
||||
#define ALIGN8_END
|
||||
#else // gcc or icc
|
||||
#define ALIGN8_BEG
|
||||
#define ALIGN8_END __attribute__((aligned(8)))
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int16_t real;
|
||||
int16_t imag;
|
||||
} ComplexInt16;
|
||||
|
||||
typedef struct {
|
||||
int farBufWritePos;
|
||||
int farBufReadPos;
|
||||
int knownDelay;
|
||||
int lastKnownDelay;
|
||||
int firstVAD; // Parameter to control poorly initialized channels
|
||||
|
||||
RingBuffer* farFrameBuf;
|
||||
RingBuffer* nearNoisyFrameBuf;
|
||||
RingBuffer* nearCleanFrameBuf;
|
||||
RingBuffer* outFrameBuf;
|
||||
|
||||
int16_t farBuf[FAR_BUF_LEN];
|
||||
|
||||
int16_t mult;
|
||||
uint32_t seed;
|
||||
|
||||
// Delay estimation variables
|
||||
void* delay_estimator_farend;
|
||||
void* delay_estimator;
|
||||
uint16_t currentDelay;
|
||||
// Far end history variables
|
||||
// TODO(bjornv): Replace |far_history| with ring_buffer.
|
||||
uint16_t far_history[PART_LEN1 * MAX_DELAY];
|
||||
int far_history_pos;
|
||||
int far_q_domains[MAX_DELAY];
|
||||
|
||||
int16_t nlpFlag;
|
||||
int16_t fixedDelay;
|
||||
|
||||
uint32_t totCount;
|
||||
|
||||
int16_t dfaCleanQDomain;
|
||||
int16_t dfaCleanQDomainOld;
|
||||
int16_t dfaNoisyQDomain;
|
||||
int16_t dfaNoisyQDomainOld;
|
||||
|
||||
int16_t nearLogEnergy[MAX_BUF_LEN];
|
||||
int16_t farLogEnergy;
|
||||
int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
|
||||
int16_t echoStoredLogEnergy[MAX_BUF_LEN];
|
||||
|
||||
// The extra 16 or 32 bytes in the following buffers are for alignment based
|
||||
// Neon code.
|
||||
// It's designed this way since the current GCC compiler can't align a
|
||||
// buffer in 16 or 32 byte boundaries properly.
|
||||
int16_t channelStored_buf[PART_LEN1 + 8];
|
||||
int16_t channelAdapt16_buf[PART_LEN1 + 8];
|
||||
int32_t channelAdapt32_buf[PART_LEN1 + 8];
|
||||
int16_t xBuf_buf[PART_LEN2 + 16]; // farend
|
||||
int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend
|
||||
int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend
|
||||
int16_t outBuf_buf[PART_LEN + 8];
|
||||
|
||||
// Pointers to the above buffers
|
||||
int16_t *channelStored;
|
||||
int16_t *channelAdapt16;
|
||||
int32_t *channelAdapt32;
|
||||
int16_t *xBuf;
|
||||
int16_t *dBufClean;
|
||||
int16_t *dBufNoisy;
|
||||
int16_t *outBuf;
|
||||
|
||||
int32_t echoFilt[PART_LEN1];
|
||||
int16_t nearFilt[PART_LEN1];
|
||||
int32_t noiseEst[PART_LEN1];
|
||||
int noiseEstTooLowCtr[PART_LEN1];
|
||||
int noiseEstTooHighCtr[PART_LEN1];
|
||||
int16_t noiseEstCtr;
|
||||
int16_t cngMode;
|
||||
|
||||
int32_t mseAdaptOld;
|
||||
int32_t mseStoredOld;
|
||||
int32_t mseThreshold;
|
||||
|
||||
int16_t farEnergyMin;
|
||||
int16_t farEnergyMax;
|
||||
int16_t farEnergyMaxMin;
|
||||
int16_t farEnergyVAD;
|
||||
int16_t farEnergyMSE;
|
||||
int currentVADValue;
|
||||
int16_t vadUpdateCount;
|
||||
|
||||
int16_t startupState;
|
||||
int16_t mseChannelCount;
|
||||
int16_t supGain;
|
||||
int16_t supGainOld;
|
||||
|
||||
int16_t supGainErrParamA;
|
||||
int16_t supGainErrParamD;
|
||||
int16_t supGainErrParamDiffAB;
|
||||
int16_t supGainErrParamDiffBD;
|
||||
|
||||
struct RealFFT* real_fft;
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
FILE *farFile;
|
||||
FILE *nearFile;
|
||||
FILE *outFile;
|
||||
#endif
|
||||
} AecmCore;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_CreateCore()
|
||||
//
|
||||
// Allocates the memory needed by the AECM. The memory needs to be
|
||||
// initialized separately using the WebRtcAecm_InitCore() function.
|
||||
// Returns a pointer to the instance and a nullptr at failure.
|
||||
AecmCore* WebRtcAecm_CreateCore();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_InitCore(...)
|
||||
//
|
||||
// This function initializes the AECM instant created with
|
||||
// WebRtcAecm_CreateCore()
|
||||
// Input:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - samplingFreq : Sampling Frequency
|
||||
//
|
||||
// Output:
|
||||
// - aecm : Initialized instance
|
||||
//
|
||||
// Return value : 0 - Ok
|
||||
// -1 - Error
|
||||
//
|
||||
int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_FreeCore(...)
|
||||
//
|
||||
// This function releases the memory allocated by WebRtcAecm_CreateCore()
|
||||
// Input:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
//
|
||||
void WebRtcAecm_FreeCore(AecmCore* aecm);
|
||||
|
||||
int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_InitEchoPathCore(...)
|
||||
//
|
||||
// This function resets the echo channel adaptation with the specified channel.
|
||||
// Input:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - echo_path : Pointer to the data that should initialize the echo
|
||||
// path
|
||||
//
|
||||
// Output:
|
||||
// - aecm : Initialized instance
|
||||
//
|
||||
void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_ProcessFrame(...)
|
||||
//
|
||||
// This function processes frames and sends blocks to
|
||||
// WebRtcAecm_ProcessBlock(...)
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - farend : In buffer containing one frame of echo signal
|
||||
// - nearendNoisy : In buffer containing one frame of nearend+echo signal
|
||||
// without NS
|
||||
// - nearendClean : In buffer containing one frame of nearend+echo signal
|
||||
// with NS
|
||||
//
|
||||
// Output:
|
||||
// - out : Out buffer, one frame of nearend signal :
|
||||
//
|
||||
//
|
||||
int WebRtcAecm_ProcessFrame(AecmCore* aecm,
|
||||
const int16_t* farend,
|
||||
const int16_t* nearendNoisy,
|
||||
const int16_t* nearendClean,
|
||||
int16_t* out);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_ProcessBlock(...)
|
||||
//
|
||||
// This function is called for every block within one frame
|
||||
// This function is called by WebRtcAecm_ProcessFrame(...)
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - farend : In buffer containing one block of echo signal
|
||||
// - nearendNoisy : In buffer containing one frame of nearend+echo signal
|
||||
// without NS
|
||||
// - nearendClean : In buffer containing one frame of nearend+echo signal
|
||||
// with NS
|
||||
//
|
||||
// Output:
|
||||
// - out : Out buffer, one block of nearend signal :
|
||||
//
|
||||
//
|
||||
int WebRtcAecm_ProcessBlock(AecmCore* aecm,
|
||||
const int16_t* farend,
|
||||
const int16_t* nearendNoisy,
|
||||
const int16_t* noisyClean,
|
||||
int16_t* out);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_BufferFarFrame()
|
||||
//
|
||||
// Inserts a frame of data into farend buffer.
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - farend : In buffer containing one frame of farend signal
|
||||
// - farLen : Length of frame
|
||||
//
|
||||
void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
|
||||
const int16_t* const farend,
|
||||
const int farLen);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_FetchFarFrame()
|
||||
//
|
||||
// Read the farend buffer to account for known delay
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance
|
||||
// - farend : In buffer containing one frame of farend signal
|
||||
// - farLen : Length of frame
|
||||
// - knownDelay : known delay
|
||||
//
|
||||
void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
|
||||
int16_t* const farend,
|
||||
const int farLen,
|
||||
const int knownDelay);
|
||||
|
||||
// All the functions below are intended to be private
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_UpdateFarHistory()
|
||||
//
|
||||
// Moves the pointer to the next entry and inserts |far_spectrum| and
|
||||
// corresponding Q-domain in its buffer.
|
||||
//
|
||||
// Inputs:
|
||||
// - self : Pointer to the delay estimation instance
|
||||
// - far_spectrum : Pointer to the far end spectrum
|
||||
// - far_q : Q-domain of far end spectrum
|
||||
//
|
||||
void WebRtcAecm_UpdateFarHistory(AecmCore* self,
|
||||
uint16_t* far_spectrum,
|
||||
int far_q);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_AlignedFarend()
|
||||
//
|
||||
// Returns a pointer to the far end spectrum aligned to current near end
|
||||
// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
|
||||
// called before AlignedFarend(...). Otherwise, you get the pointer to the
|
||||
// previous frame. The memory is only valid until the next call of
|
||||
// WebRtc_DelayEstimatorProcessFix(...).
|
||||
//
|
||||
// Inputs:
|
||||
// - self : Pointer to the AECM instance.
|
||||
// - delay : Current delay estimate.
|
||||
//
|
||||
// Output:
|
||||
// - far_q : The Q-domain of the aligned far end spectrum
|
||||
//
|
||||
// Return value:
|
||||
// - far_spectrum : Pointer to the aligned far end spectrum
|
||||
// NULL - Error
|
||||
//
|
||||
const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_CalcSuppressionGain()
|
||||
//
|
||||
// This function calculates the suppression gain that is used in the
|
||||
// Wiener filter.
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance.
|
||||
//
|
||||
// Return value:
|
||||
// - supGain : Suppression gain with which to scale the noise
|
||||
// level (Q14).
|
||||
//
|
||||
int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_CalcEnergies()
|
||||
//
|
||||
// This function calculates the log of energies for nearend, farend and
|
||||
// estimated echoes. There is also an update of energy decision levels,
|
||||
// i.e. internal VAD.
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance.
|
||||
// - far_spectrum : Pointer to farend spectrum.
|
||||
// - far_q : Q-domain of farend spectrum.
|
||||
// - nearEner : Near end energy for current block in
|
||||
// Q(aecm->dfaQDomain).
|
||||
//
|
||||
// Output:
|
||||
// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
|
||||
//
|
||||
void WebRtcAecm_CalcEnergies(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
const int16_t far_q,
|
||||
const uint32_t nearEner,
|
||||
int32_t* echoEst);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_CalcStepSize()
|
||||
//
|
||||
// This function calculates the step size used in channel estimation
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance.
|
||||
//
|
||||
// Return value:
|
||||
// - mu : Stepsize in log2(), i.e. number of shifts.
|
||||
//
|
||||
int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// WebRtcAecm_UpdateChannel(...)
|
||||
//
|
||||
// This function performs channel estimation.
|
||||
// NLMS and decision on channel storage.
|
||||
//
|
||||
// Inputs:
|
||||
// - aecm : Pointer to the AECM instance.
|
||||
// - far_spectrum : Absolute value of the farend signal in Q(far_q)
|
||||
// - far_q : Q-domain of the farend signal
|
||||
// - dfa : Absolute value of the nearend signal
|
||||
// (Q[aecm->dfaQDomain])
|
||||
// - mu : NLMS step size.
|
||||
// Input/Output:
|
||||
// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
|
||||
//
|
||||
void WebRtcAecm_UpdateChannel(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
const int16_t far_q,
|
||||
const uint16_t* const dfa,
|
||||
const int16_t mu,
|
||||
int32_t* echoEst);
|
||||
|
||||
extern const int16_t WebRtcAecm_kCosTable[];
|
||||
extern const int16_t WebRtcAecm_kSinTable[];
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Some function pointers, for internal functions shared by ARM NEON and
|
||||
// generic C code.
|
||||
//
|
||||
typedef void (*CalcLinearEnergies)(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echoEst,
|
||||
uint32_t* far_energy,
|
||||
uint32_t* echo_energy_adapt,
|
||||
uint32_t* echo_energy_stored);
|
||||
extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
|
||||
|
||||
typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est);
|
||||
extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
|
||||
|
||||
typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
|
||||
extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
|
||||
|
||||
// For the above function pointers, functions for generic platforms are declared
|
||||
// and defined as static in file aecm_core.c, while those for ARM Neon platforms
|
||||
// are declared below and defined in file aecm_core_neon.c.
|
||||
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
|
||||
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est,
|
||||
uint32_t* far_energy,
|
||||
uint32_t* echo_energy_adapt,
|
||||
uint32_t* echo_energy_stored);
|
||||
|
||||
void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est);
|
||||
|
||||
void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
|
||||
#endif
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est,
|
||||
uint32_t* far_energy,
|
||||
uint32_t* echo_energy_adapt,
|
||||
uint32_t* echo_energy_stored);
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est);
|
||||
|
||||
void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,771 @@
|
|||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
|
||||
#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
|
||||
#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert_c.h"
|
||||
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Square root of Hanning window in Q14.
|
||||
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
|
||||
// Table is defined in an ARM assembly file.
|
||||
extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
|
||||
#else
|
||||
static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
|
||||
0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
|
||||
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
|
||||
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
|
||||
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
|
||||
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
|
||||
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
|
||||
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
|
||||
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef AECM_WITH_ABS_APPROX
|
||||
//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation
|
||||
static const uint16_t kAlpha1 = 32584;
|
||||
//Q15 beta = 0.12967166976970 const Factor for magnitude approximation
|
||||
static const uint16_t kBeta1 = 4249;
|
||||
//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation
|
||||
static const uint16_t kAlpha2 = 30879;
|
||||
//Q15 beta = 0.33787806009150 const Factor for magnitude approximation
|
||||
static const uint16_t kBeta2 = 11072;
|
||||
//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation
|
||||
static const uint16_t kAlpha3 = 26951;
|
||||
//Q15 beta = 0.57762063060713 const Factor for magnitude approximation
|
||||
static const uint16_t kBeta3 = 18927;
|
||||
#endif
|
||||
|
||||
static const int16_t kNoiseEstQDomain = 15;
|
||||
static const int16_t kNoiseEstIncCount = 5;
|
||||
|
||||
static void ComfortNoise(AecmCore* aecm,
|
||||
const uint16_t* dfa,
|
||||
ComplexInt16* out,
|
||||
const int16_t* lambda);
|
||||
|
||||
static void WindowAndFFT(AecmCore* aecm,
|
||||
int16_t* fft,
|
||||
const int16_t* time_signal,
|
||||
ComplexInt16* freq_signal,
|
||||
int time_signal_scaling) {
|
||||
int i = 0;
|
||||
|
||||
// FFT of signal
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
// Window time domain signal and insert into real part of
|
||||
// transformation array |fft|
|
||||
int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
|
||||
fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
|
||||
scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
|
||||
fft[PART_LEN + i] = (int16_t)((
|
||||
scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
|
||||
}
|
||||
|
||||
// Do forward FFT, then take only the first PART_LEN complex samples,
|
||||
// and change signs of the imaginary parts.
|
||||
WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
freq_signal[i].imag = -freq_signal[i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
static void InverseFFTAndWindow(AecmCore* aecm,
|
||||
int16_t* fft,
|
||||
ComplexInt16* efw,
|
||||
int16_t* output,
|
||||
const int16_t* nearendClean) {
|
||||
int i, j, outCFFT;
|
||||
int32_t tmp32no1;
|
||||
// Reuse |efw| for the inverse FFT output after transferring
|
||||
// the contents to |fft|.
|
||||
int16_t* ifft_out = (int16_t*)efw;
|
||||
|
||||
// Synthesis
|
||||
for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
|
||||
fft[j] = efw[i].real;
|
||||
fft[j + 1] = -efw[i].imag;
|
||||
}
|
||||
fft[0] = efw[0].real;
|
||||
fft[1] = -efw[0].imag;
|
||||
|
||||
fft[PART_LEN2] = efw[PART_LEN].real;
|
||||
fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
|
||||
|
||||
// Inverse FFT. Keep outCFFT to scale the samples in the next block.
|
||||
outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
|
||||
outCFFT - aecm->dfaCleanQDomain);
|
||||
output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||
tmp32no1 + aecm->outBuf[i],
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
|
||||
tmp32no1 = (ifft_out[PART_LEN + i] *
|
||||
WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
|
||||
outCFFT - aecm->dfaCleanQDomain);
|
||||
aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||
tmp32no1,
|
||||
WEBRTC_SPL_WORD16_MIN);
|
||||
}
|
||||
|
||||
// Copy the current block to the old position
|
||||
// (aecm->outBuf is shifted elsewhere)
|
||||
memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
|
||||
memcpy(aecm->dBufNoisy,
|
||||
aecm->dBufNoisy + PART_LEN,
|
||||
sizeof(int16_t) * PART_LEN);
|
||||
if (nearendClean != NULL)
|
||||
{
|
||||
memcpy(aecm->dBufClean,
|
||||
aecm->dBufClean + PART_LEN,
|
||||
sizeof(int16_t) * PART_LEN);
|
||||
}
|
||||
}
|
||||
|
||||
// Transforms a time domain signal into the frequency domain, outputting the
|
||||
// complex valued signal, absolute value and sum of absolute values.
|
||||
//
|
||||
// time_signal [in] Pointer to time domain signal
|
||||
// freq_signal_real [out] Pointer to real part of frequency domain array
|
||||
// freq_signal_imag [out] Pointer to imaginary part of frequency domain
|
||||
// array
|
||||
// freq_signal_abs [out] Pointer to absolute value of frequency domain
|
||||
// array
|
||||
// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
|
||||
// the frequency domain array
|
||||
// return value The Q-domain of current frequency values
|
||||
//
|
||||
static int TimeToFrequencyDomain(AecmCore* aecm,
|
||||
const int16_t* time_signal,
|
||||
ComplexInt16* freq_signal,
|
||||
uint16_t* freq_signal_abs,
|
||||
uint32_t* freq_signal_sum_abs) {
|
||||
int i = 0;
|
||||
int time_signal_scaling = 0;
|
||||
|
||||
int32_t tmp32no1 = 0;
|
||||
int32_t tmp32no2 = 0;
|
||||
|
||||
// In fft_buf, +16 for 32-byte alignment.
|
||||
int16_t fft_buf[PART_LEN4 + 16];
|
||||
int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
|
||||
|
||||
int16_t tmp16no1;
|
||||
#ifndef WEBRTC_ARCH_ARM_V7
|
||||
int16_t tmp16no2;
|
||||
#endif
|
||||
#ifdef AECM_WITH_ABS_APPROX
|
||||
int16_t max_value = 0;
|
||||
int16_t min_value = 0;
|
||||
uint16_t alpha = 0;
|
||||
uint16_t beta = 0;
|
||||
#endif
|
||||
|
||||
#ifdef AECM_DYNAMIC_Q
|
||||
tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
|
||||
time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
|
||||
#endif
|
||||
|
||||
WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
|
||||
|
||||
// Extract imaginary and real part, calculate the magnitude for
|
||||
// all frequency bins
|
||||
freq_signal[0].imag = 0;
|
||||
freq_signal[PART_LEN].imag = 0;
|
||||
freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
|
||||
freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
|
||||
freq_signal[PART_LEN].real);
|
||||
(*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
|
||||
(uint32_t)(freq_signal_abs[PART_LEN]);
|
||||
|
||||
for (i = 1; i < PART_LEN; i++)
|
||||
{
|
||||
if (freq_signal[i].real == 0)
|
||||
{
|
||||
freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
|
||||
}
|
||||
else if (freq_signal[i].imag == 0)
|
||||
{
|
||||
freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Approximation for magnitude of complex fft output
|
||||
// magn = sqrt(real^2 + imag^2)
|
||||
// magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
|
||||
//
|
||||
// The parameters alpha and beta are stored in Q15
|
||||
|
||||
#ifdef AECM_WITH_ABS_APPROX
|
||||
tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
|
||||
tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
|
||||
|
||||
if(tmp16no1 > tmp16no2)
|
||||
{
|
||||
max_value = tmp16no1;
|
||||
min_value = tmp16no2;
|
||||
} else
|
||||
{
|
||||
max_value = tmp16no2;
|
||||
min_value = tmp16no1;
|
||||
}
|
||||
|
||||
// Magnitude in Q(-6)
|
||||
if ((max_value >> 2) > min_value)
|
||||
{
|
||||
alpha = kAlpha1;
|
||||
beta = kBeta1;
|
||||
} else if ((max_value >> 1) > min_value)
|
||||
{
|
||||
alpha = kAlpha2;
|
||||
beta = kBeta2;
|
||||
} else
|
||||
{
|
||||
alpha = kAlpha3;
|
||||
beta = kBeta3;
|
||||
}
|
||||
tmp16no1 = (int16_t)((max_value * alpha) >> 15);
|
||||
tmp16no2 = (int16_t)((min_value * beta) >> 15);
|
||||
freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
|
||||
#else
|
||||
#ifdef WEBRTC_ARCH_ARM_V7
|
||||
__asm __volatile(
|
||||
"smulbb %[tmp32no1], %[real], %[real]\n\t"
|
||||
"smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
|
||||
:[tmp32no1]"+&r"(tmp32no1),
|
||||
[tmp32no2]"=r"(tmp32no2)
|
||||
:[real]"r"(freq_signal[i].real),
|
||||
[imag]"r"(freq_signal[i].imag)
|
||||
);
|
||||
#else
|
||||
tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
|
||||
tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
|
||||
tmp32no1 = tmp16no1 * tmp16no1;
|
||||
tmp32no2 = tmp16no2 * tmp16no2;
|
||||
tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
|
||||
#endif // WEBRTC_ARCH_ARM_V7
|
||||
tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
|
||||
|
||||
freq_signal_abs[i] = (uint16_t)tmp32no1;
|
||||
#endif // AECM_WITH_ABS_APPROX
|
||||
}
|
||||
(*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
|
||||
}
|
||||
|
||||
return time_signal_scaling;
|
||||
}
|
||||
|
||||
int WebRtcAecm_ProcessBlock(AecmCore* aecm,
|
||||
const int16_t* farend,
|
||||
const int16_t* nearendNoisy,
|
||||
const int16_t* nearendClean,
|
||||
int16_t* output) {
|
||||
int i;
|
||||
|
||||
uint32_t xfaSum;
|
||||
uint32_t dfaNoisySum;
|
||||
uint32_t dfaCleanSum;
|
||||
uint32_t echoEst32Gained;
|
||||
uint32_t tmpU32;
|
||||
|
||||
int32_t tmp32no1;
|
||||
|
||||
uint16_t xfa[PART_LEN1];
|
||||
uint16_t dfaNoisy[PART_LEN1];
|
||||
uint16_t dfaClean[PART_LEN1];
|
||||
uint16_t* ptrDfaClean = dfaClean;
|
||||
const uint16_t* far_spectrum_ptr = NULL;
|
||||
|
||||
// 32 byte aligned buffers (with +8 or +16).
|
||||
// TODO(kma): define fft with ComplexInt16.
|
||||
int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
|
||||
int32_t echoEst32_buf[PART_LEN1 + 8];
|
||||
int32_t dfw_buf[PART_LEN2 + 8];
|
||||
int32_t efw_buf[PART_LEN2 + 8];
|
||||
|
||||
int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
|
||||
int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
|
||||
ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
|
||||
ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
|
||||
|
||||
int16_t hnl[PART_LEN1];
|
||||
int16_t numPosCoef = 0;
|
||||
int16_t nlpGain = ONE_Q14;
|
||||
int delay;
|
||||
int16_t tmp16no1;
|
||||
int16_t tmp16no2;
|
||||
int16_t mu;
|
||||
int16_t supGain;
|
||||
int16_t zeros32, zeros16;
|
||||
int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
|
||||
int far_q;
|
||||
int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
|
||||
|
||||
const int kMinPrefBand = 4;
|
||||
const int kMaxPrefBand = 24;
|
||||
int32_t avgHnl32 = 0;
|
||||
|
||||
// Determine startup state. There are three states:
|
||||
// (0) the first CONV_LEN blocks
|
||||
// (1) another CONV_LEN blocks
|
||||
// (2) the rest
|
||||
|
||||
if (aecm->startupState < 2)
|
||||
{
|
||||
aecm->startupState = (aecm->totCount >= CONV_LEN) +
|
||||
(aecm->totCount >= CONV_LEN2);
|
||||
}
|
||||
// END: Determine startup state
|
||||
|
||||
// Buffer near and far end signals
|
||||
memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
|
||||
memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
|
||||
if (nearendClean != NULL)
|
||||
{
|
||||
memcpy(aecm->dBufClean + PART_LEN,
|
||||
nearendClean,
|
||||
sizeof(int16_t) * PART_LEN);
|
||||
}
|
||||
|
||||
// Transform far end signal from time domain to frequency domain.
|
||||
far_q = TimeToFrequencyDomain(aecm,
|
||||
aecm->xBuf,
|
||||
dfw,
|
||||
xfa,
|
||||
&xfaSum);
|
||||
|
||||
// Transform noisy near end signal from time domain to frequency domain.
|
||||
zerosDBufNoisy = TimeToFrequencyDomain(aecm,
|
||||
aecm->dBufNoisy,
|
||||
dfw,
|
||||
dfaNoisy,
|
||||
&dfaNoisySum);
|
||||
aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
|
||||
aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
|
||||
|
||||
|
||||
if (nearendClean == NULL)
|
||||
{
|
||||
ptrDfaClean = dfaNoisy;
|
||||
aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
|
||||
aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
|
||||
dfaCleanSum = dfaNoisySum;
|
||||
} else
|
||||
{
|
||||
// Transform clean near end signal from time domain to frequency domain.
|
||||
zerosDBufClean = TimeToFrequencyDomain(aecm,
|
||||
aecm->dBufClean,
|
||||
dfw,
|
||||
dfaClean,
|
||||
&dfaCleanSum);
|
||||
aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
|
||||
aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
|
||||
}
|
||||
|
||||
// Get the delay
|
||||
// Save far-end history and estimate delay
|
||||
WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
|
||||
if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
|
||||
xfa,
|
||||
PART_LEN1,
|
||||
far_q) == -1) {
|
||||
return -1;
|
||||
}
|
||||
delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
|
||||
dfaNoisy,
|
||||
PART_LEN1,
|
||||
zerosDBufNoisy);
|
||||
if (delay == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else if (delay == -2)
|
||||
{
|
||||
// If the delay is unknown, we assume zero.
|
||||
// NOTE: this will have to be adjusted if we ever add lookahead.
|
||||
delay = 0;
|
||||
}
|
||||
|
||||
if (aecm->fixedDelay >= 0)
|
||||
{
|
||||
// Use fixed delay
|
||||
delay = aecm->fixedDelay;
|
||||
}
|
||||
|
||||
// Get aligned far end spectrum
|
||||
far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
|
||||
zerosXBuf = (int16_t) far_q;
|
||||
if (far_spectrum_ptr == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate log(energy) and update energy threshold levels
|
||||
WebRtcAecm_CalcEnergies(aecm,
|
||||
far_spectrum_ptr,
|
||||
zerosXBuf,
|
||||
dfaNoisySum,
|
||||
echoEst32);
|
||||
|
||||
// Calculate stepsize
|
||||
mu = WebRtcAecm_CalcStepSize(aecm);
|
||||
|
||||
// Update counters
|
||||
aecm->totCount++;
|
||||
|
||||
// This is the channel estimation algorithm.
|
||||
// It is base on NLMS but has a variable step length,
|
||||
// which was calculated above.
|
||||
WebRtcAecm_UpdateChannel(aecm,
|
||||
far_spectrum_ptr,
|
||||
zerosXBuf,
|
||||
dfaNoisy,
|
||||
mu,
|
||||
echoEst32);
|
||||
supGain = WebRtcAecm_CalcSuppressionGain(aecm);
|
||||
|
||||
|
||||
// Calculate Wiener filter hnl[]
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
// Far end signal through channel estimate in Q8
|
||||
// How much can we shift right to preserve resolution
|
||||
tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
|
||||
aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
|
||||
|
||||
zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
|
||||
zeros16 = WebRtcSpl_NormW16(supGain) + 1;
|
||||
if (zeros32 + zeros16 > 16)
|
||||
{
|
||||
// Multiplication is safe
|
||||
// Result in
|
||||
// Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
|
||||
// aecm->xfaQDomainBuf[diff])
|
||||
echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
|
||||
(uint16_t)supGain);
|
||||
resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
|
||||
resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
|
||||
} else
|
||||
{
|
||||
tmp16no1 = 17 - zeros32 - zeros16;
|
||||
resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
|
||||
RESOLUTION_SUPGAIN;
|
||||
resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
|
||||
if (zeros32 > tmp16no1)
|
||||
{
|
||||
echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
|
||||
supGain >> tmp16no1);
|
||||
} else
|
||||
{
|
||||
// Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
|
||||
echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
|
||||
}
|
||||
}
|
||||
|
||||
zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
|
||||
assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative.
|
||||
dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
|
||||
if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
|
||||
tmp16no1 = aecm->nearFilt[i] << zeros16;
|
||||
qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
|
||||
tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
|
||||
} else {
|
||||
tmp16no1 = dfa_clean_q_domain_diff < 0
|
||||
? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
|
||||
: aecm->nearFilt[i] << dfa_clean_q_domain_diff;
|
||||
qDomainDiff = 0;
|
||||
tmp16no2 = ptrDfaClean[i];
|
||||
}
|
||||
tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
|
||||
tmp16no2 = (int16_t)(tmp32no1 >> 4);
|
||||
tmp16no2 += tmp16no1;
|
||||
zeros16 = WebRtcSpl_NormW16(tmp16no2);
|
||||
if ((tmp16no2) & (-qDomainDiff > zeros16)) {
|
||||
aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
|
||||
} else {
|
||||
aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
|
||||
: tmp16no2 >> qDomainDiff;
|
||||
}
|
||||
|
||||
// Wiener filter coefficients, resulting hnl in Q14
|
||||
if (echoEst32Gained == 0)
|
||||
{
|
||||
hnl[i] = ONE_Q14;
|
||||
} else if (aecm->nearFilt[i] == 0)
|
||||
{
|
||||
hnl[i] = 0;
|
||||
} else
|
||||
{
|
||||
// Multiply the suppression gain
|
||||
// Rounding
|
||||
echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
|
||||
tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
|
||||
(uint16_t)aecm->nearFilt[i]);
|
||||
|
||||
// Current resolution is
|
||||
// Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
|
||||
// Make sure we are in Q14
|
||||
tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
|
||||
if (tmp32no1 > ONE_Q14)
|
||||
{
|
||||
hnl[i] = 0;
|
||||
} else if (tmp32no1 < 0)
|
||||
{
|
||||
hnl[i] = ONE_Q14;
|
||||
} else
|
||||
{
|
||||
// 1-echoEst/dfa
|
||||
hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
|
||||
if (hnl[i] < 0)
|
||||
{
|
||||
hnl[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hnl[i])
|
||||
{
|
||||
numPosCoef++;
|
||||
}
|
||||
}
|
||||
// Only in wideband. Prevent the gain in upper band from being larger than
|
||||
// in lower band.
|
||||
if (aecm->mult == 2)
|
||||
{
|
||||
// TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
|
||||
// speech distortion in double-talk.
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
|
||||
}
|
||||
|
||||
for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
|
||||
{
|
||||
avgHnl32 += (int32_t)hnl[i];
|
||||
}
|
||||
assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
|
||||
avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
|
||||
|
||||
for (i = kMaxPrefBand; i < PART_LEN1; i++)
|
||||
{
|
||||
if (hnl[i] > (int16_t)avgHnl32)
|
||||
{
|
||||
hnl[i] = (int16_t)avgHnl32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate NLP gain, result is in Q14
|
||||
if (aecm->nlpFlag)
|
||||
{
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
// Truncate values close to zero and one.
|
||||
if (hnl[i] > NLP_COMP_HIGH)
|
||||
{
|
||||
hnl[i] = ONE_Q14;
|
||||
} else if (hnl[i] < NLP_COMP_LOW)
|
||||
{
|
||||
hnl[i] = 0;
|
||||
}
|
||||
|
||||
// Remove outliers
|
||||
if (numPosCoef < 3)
|
||||
{
|
||||
nlpGain = 0;
|
||||
} else
|
||||
{
|
||||
nlpGain = ONE_Q14;
|
||||
}
|
||||
|
||||
// NLP
|
||||
if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
|
||||
{
|
||||
hnl[i] = ONE_Q14;
|
||||
} else
|
||||
{
|
||||
hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
|
||||
}
|
||||
|
||||
// multiply with Wiener coefficients
|
||||
efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
|
||||
hnl[i], 14));
|
||||
efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
|
||||
hnl[i], 14));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// multiply with Wiener coefficients
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
|
||||
hnl[i], 14));
|
||||
efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
|
||||
hnl[i], 14));
|
||||
}
|
||||
}
|
||||
|
||||
if (aecm->cngMode == AecmTrue)
|
||||
{
|
||||
ComfortNoise(aecm, ptrDfaClean, efw, hnl);
|
||||
}
|
||||
|
||||
InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ComfortNoise(AecmCore* aecm,
|
||||
const uint16_t* dfa,
|
||||
ComplexInt16* out,
|
||||
const int16_t* lambda) {
|
||||
int16_t i;
|
||||
int16_t tmp16;
|
||||
int32_t tmp32;
|
||||
|
||||
int16_t randW16[PART_LEN];
|
||||
int16_t uReal[PART_LEN1];
|
||||
int16_t uImag[PART_LEN1];
|
||||
int32_t outLShift32;
|
||||
int16_t noiseRShift16[PART_LEN1];
|
||||
|
||||
int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
|
||||
int16_t minTrackShift;
|
||||
|
||||
assert(shiftFromNearToNoise >= 0);
|
||||
assert(shiftFromNearToNoise < 16);
|
||||
|
||||
if (aecm->noiseEstCtr < 100)
|
||||
{
|
||||
// Track the minimum more quickly initially.
|
||||
aecm->noiseEstCtr++;
|
||||
minTrackShift = 6;
|
||||
} else
|
||||
{
|
||||
minTrackShift = 9;
|
||||
}
|
||||
|
||||
// Estimate noise power.
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
// Shift to the noise domain.
|
||||
tmp32 = (int32_t)dfa[i];
|
||||
outLShift32 = tmp32 << shiftFromNearToNoise;
|
||||
|
||||
if (outLShift32 < aecm->noiseEst[i])
|
||||
{
|
||||
// Reset "too low" counter
|
||||
aecm->noiseEstTooLowCtr[i] = 0;
|
||||
// Track the minimum.
|
||||
if (aecm->noiseEst[i] < (1 << minTrackShift))
|
||||
{
|
||||
// For small values, decrease noiseEst[i] every
|
||||
// |kNoiseEstIncCount| block. The regular approach below can not
|
||||
// go further down due to truncation.
|
||||
aecm->noiseEstTooHighCtr[i]++;
|
||||
if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
|
||||
{
|
||||
aecm->noiseEst[i]--;
|
||||
aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
|
||||
>> minTrackShift);
|
||||
}
|
||||
} else
|
||||
{
|
||||
// Reset "too high" counter
|
||||
aecm->noiseEstTooHighCtr[i] = 0;
|
||||
// Ramp slowly upwards until we hit the minimum again.
|
||||
if ((aecm->noiseEst[i] >> 19) > 0)
|
||||
{
|
||||
// Avoid overflow.
|
||||
// Multiplication with 2049 will cause wrap around. Scale
|
||||
// down first and then multiply
|
||||
aecm->noiseEst[i] >>= 11;
|
||||
aecm->noiseEst[i] *= 2049;
|
||||
}
|
||||
else if ((aecm->noiseEst[i] >> 11) > 0)
|
||||
{
|
||||
// Large enough for relative increase
|
||||
aecm->noiseEst[i] *= 2049;
|
||||
aecm->noiseEst[i] >>= 11;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make incremental increases based on size every
|
||||
// |kNoiseEstIncCount| block
|
||||
aecm->noiseEstTooLowCtr[i]++;
|
||||
if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
|
||||
{
|
||||
aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
|
||||
aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
|
||||
if (tmp32 > 32767)
|
||||
{
|
||||
tmp32 = 32767;
|
||||
aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
|
||||
}
|
||||
noiseRShift16[i] = (int16_t)tmp32;
|
||||
|
||||
tmp16 = ONE_Q14 - lambda[i];
|
||||
noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
|
||||
}
|
||||
|
||||
// Generate a uniform random array on [0 2^15-1].
|
||||
WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
|
||||
|
||||
// Generate noise according to estimated energy.
|
||||
uReal[0] = 0; // Reject LF noise.
|
||||
uImag[0] = 0;
|
||||
for (i = 1; i < PART_LEN1; i++)
|
||||
{
|
||||
// Get a random index for the cos and sin tables over [0 359].
|
||||
tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
|
||||
|
||||
// Tables are in Q13.
|
||||
uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
|
||||
13);
|
||||
uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
|
||||
13);
|
||||
}
|
||||
uImag[PART_LEN] = 0;
|
||||
|
||||
for (i = 0; i < PART_LEN1; i++)
|
||||
{
|
||||
out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
|
||||
out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
|
||||
}
|
||||
}
|
||||
|
1566
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
vendored
Normal file
1566
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
212
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
vendored
Normal file
212
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
vendored
Normal file
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
|
||||
|
||||
// TODO(kma): Re-write the corresponding assembly file, the offset
|
||||
// generating script and makefile, to replace these C functions.
|
||||
|
||||
// Square root of Hanning window in Q14.
|
||||
const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
|
||||
0,
|
||||
399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
|
||||
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
|
||||
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
|
||||
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
|
||||
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
|
||||
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
|
||||
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
|
||||
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
|
||||
};
|
||||
|
||||
static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
*(ptr) = vaddvq_u32(v);
|
||||
#else
|
||||
uint32x2_t tmp_v;
|
||||
tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
|
||||
tmp_v = vpadd_u32(tmp_v, tmp_v);
|
||||
*(ptr) = vget_lane_u32(tmp_v, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est,
|
||||
uint32_t* far_energy,
|
||||
uint32_t* echo_energy_adapt,
|
||||
uint32_t* echo_energy_stored) {
|
||||
int16_t* start_stored_p = aecm->channelStored;
|
||||
int16_t* start_adapt_p = aecm->channelAdapt16;
|
||||
int32_t* echo_est_p = echo_est;
|
||||
const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
|
||||
const uint16_t* far_spectrum_p = far_spectrum;
|
||||
int16x8_t store_v, adapt_v;
|
||||
uint16x8_t spectrum_v;
|
||||
uint32x4_t echo_est_v_low, echo_est_v_high;
|
||||
uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;
|
||||
|
||||
far_energy_v = vdupq_n_u32(0);
|
||||
echo_adapt_v = vdupq_n_u32(0);
|
||||
echo_stored_v = vdupq_n_u32(0);
|
||||
|
||||
// Get energy for the delayed far end signal and estimated
|
||||
// echo using both stored and adapted channels.
|
||||
// The C code:
|
||||
// for (i = 0; i < PART_LEN1; i++) {
|
||||
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
|
||||
// far_spectrum[i]);
|
||||
// (*far_energy) += (uint32_t)(far_spectrum[i]);
|
||||
// *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
|
||||
// (*echo_energy_stored) += (uint32_t)echo_est[i];
|
||||
// }
|
||||
while (start_stored_p < end_stored_p) {
|
||||
spectrum_v = vld1q_u16(far_spectrum_p);
|
||||
adapt_v = vld1q_s16(start_adapt_p);
|
||||
store_v = vld1q_s16(start_stored_p);
|
||||
|
||||
far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
|
||||
far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));
|
||||
|
||||
echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
|
||||
vget_low_u16(spectrum_v));
|
||||
echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
|
||||
vget_high_u16(spectrum_v));
|
||||
vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
|
||||
vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
|
||||
|
||||
echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
|
||||
echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);
|
||||
|
||||
echo_adapt_v = vmlal_u16(echo_adapt_v,
|
||||
vreinterpret_u16_s16(vget_low_s16(adapt_v)),
|
||||
vget_low_u16(spectrum_v));
|
||||
echo_adapt_v = vmlal_u16(echo_adapt_v,
|
||||
vreinterpret_u16_s16(vget_high_s16(adapt_v)),
|
||||
vget_high_u16(spectrum_v));
|
||||
|
||||
start_stored_p += 8;
|
||||
start_adapt_p += 8;
|
||||
far_spectrum_p += 8;
|
||||
echo_est_p += 8;
|
||||
}
|
||||
|
||||
AddLanes(far_energy, far_energy_v);
|
||||
AddLanes(echo_energy_stored, echo_stored_v);
|
||||
AddLanes(echo_energy_adapt, echo_adapt_v);
|
||||
|
||||
echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
|
||||
far_spectrum[PART_LEN]);
|
||||
*echo_energy_stored += (uint32_t)echo_est[PART_LEN];
|
||||
*far_energy += (uint32_t)far_spectrum[PART_LEN];
|
||||
*echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
|
||||
}
|
||||
|
||||
void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
|
||||
const uint16_t* far_spectrum,
|
||||
int32_t* echo_est) {
|
||||
assert((uintptr_t)echo_est % 32 == 0);
|
||||
assert((uintptr_t)(aecm->channelStored) % 16 == 0);
|
||||
assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
|
||||
|
||||
// This is C code of following optimized code.
|
||||
// During startup we store the channel every block.
|
||||
// memcpy(aecm->channelStored,
|
||||
// aecm->channelAdapt16,
|
||||
// sizeof(int16_t) * PART_LEN1);
|
||||
// Recalculate echo estimate
|
||||
// for (i = 0; i < PART_LEN; i += 4) {
|
||||
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
|
||||
// far_spectrum[i]);
|
||||
// echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
|
||||
// far_spectrum[i + 1]);
|
||||
// echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
|
||||
// far_spectrum[i + 2]);
|
||||
// echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
|
||||
// far_spectrum[i + 3]);
|
||||
// }
|
||||
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
|
||||
// far_spectrum[i]);
|
||||
const uint16_t* far_spectrum_p = far_spectrum;
|
||||
int16_t* start_adapt_p = aecm->channelAdapt16;
|
||||
int16_t* start_stored_p = aecm->channelStored;
|
||||
const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
|
||||
int32_t* echo_est_p = echo_est;
|
||||
|
||||
uint16x8_t far_spectrum_v;
|
||||
int16x8_t adapt_v;
|
||||
uint32x4_t echo_est_v_low, echo_est_v_high;
|
||||
|
||||
while (start_stored_p < end_stored_p) {
|
||||
far_spectrum_v = vld1q_u16(far_spectrum_p);
|
||||
adapt_v = vld1q_s16(start_adapt_p);
|
||||
|
||||
vst1q_s16(start_stored_p, adapt_v);
|
||||
|
||||
echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
|
||||
vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
|
||||
echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
|
||||
vget_high_u16(vreinterpretq_u16_s16(adapt_v)));
|
||||
|
||||
vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
|
||||
vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
|
||||
|
||||
far_spectrum_p += 8;
|
||||
start_adapt_p += 8;
|
||||
start_stored_p += 8;
|
||||
echo_est_p += 8;
|
||||
}
|
||||
aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
|
||||
echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
|
||||
far_spectrum[PART_LEN]);
|
||||
}
|
||||
|
||||
void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
|
||||
assert((uintptr_t)(aecm->channelStored) % 16 == 0);
|
||||
assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
|
||||
assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);
|
||||
|
||||
// The C code of following optimized code.
|
||||
// for (i = 0; i < PART_LEN1; i++) {
|
||||
// aecm->channelAdapt16[i] = aecm->channelStored[i];
|
||||
// aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
|
||||
// (int32_t)aecm->channelStored[i], 16);
|
||||
// }
|
||||
|
||||
int16_t* start_stored_p = aecm->channelStored;
|
||||
int16_t* start_adapt16_p = aecm->channelAdapt16;
|
||||
int32_t* start_adapt32_p = aecm->channelAdapt32;
|
||||
const int16_t* end_stored_p = start_stored_p + PART_LEN;
|
||||
|
||||
int16x8_t stored_v;
|
||||
int32x4_t adapt32_v_low, adapt32_v_high;
|
||||
|
||||
while (start_stored_p < end_stored_p) {
|
||||
stored_v = vld1q_s16(start_stored_p);
|
||||
vst1q_s16(start_adapt16_p, stored_v);
|
||||
|
||||
adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
|
||||
adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
|
||||
|
||||
vst1q_s32(start_adapt32_p, adapt32_v_low);
|
||||
vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
|
||||
|
||||
start_stored_p += 8;
|
||||
start_adapt16_p += 8;
|
||||
start_adapt32_p += 8;
|
||||
}
|
||||
aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
|
||||
aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
|
||||
|
||||
#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */
|
||||
|
||||
/* Algorithm parameters */
|
||||
#define FRAME_LEN 80 /* Total frame length, 10 ms. */
|
||||
|
||||
#define PART_LEN 64 /* Length of partition. */
|
||||
#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */
|
||||
|
||||
#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */
|
||||
#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */
|
||||
#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */
|
||||
#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */
|
||||
#define MAX_DELAY 100
|
||||
|
||||
/* Counter parameters */
|
||||
#define CONV_LEN 512 /* Convergence length used at startup. */
|
||||
#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */
|
||||
|
||||
/* Energy parameters */
|
||||
#define MAX_BUF_LEN 64 /* History length of energy signals. */
|
||||
#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */
|
||||
/* in energy. */
|
||||
#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */
|
||||
/* and min. */
|
||||
#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */
|
||||
#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */
|
||||
#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */
|
||||
|
||||
/* Stepsize parameters */
|
||||
#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */
|
||||
/* dependent). */
|
||||
#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */
|
||||
/* dependent). */
|
||||
#define MU_DIFF 9 /* MU_MIN - MU_MAX */
|
||||
|
||||
/* Channel parameters */
|
||||
#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */
|
||||
/* far end energy to compare channel estimates. */
|
||||
#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */
|
||||
/* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
|
||||
#define MSE_RESOLUTION 5 /* MSE parameter resolution. */
|
||||
#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
|
||||
#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */
|
||||
#define CHANNEL_VAD 16 /* Minimum energy in frequency band */
|
||||
/* to update channel. */
|
||||
|
||||
/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
|
||||
#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */
|
||||
#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */
|
||||
#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */
|
||||
/* (Maximum gain) (8 in Q8). */
|
||||
#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */
|
||||
/* (Gain before going down). */
|
||||
#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */
|
||||
/* (Should be the same as Default) (1 in Q8). */
|
||||
#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
|
||||
|
||||
/* Defines for "check delay estimation" */
|
||||
#define CORR_WIDTH 31 /* Number of samples to correlate over. */
|
||||
#define CORR_MAX 16 /* Maximum correlation offset. */
|
||||
#define CORR_MAX_BUF 63
|
||||
#define CORR_DEV 4
|
||||
#define CORR_MAX_LEVEL 20
|
||||
#define CORR_MAX_LOW 4
|
||||
#define CORR_BUF_LEN (CORR_MAX << 1) + 1
|
||||
/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
|
||||
|
||||
#define ONE_Q14 (1 << 14)
|
||||
|
||||
/* NLP defines */
|
||||
#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */
|
||||
#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */
|
||||
|
||||
#endif
|
702
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
vendored
Normal file
702
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
vendored
Normal file
|
@ -0,0 +1,702 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/common_audio/ring_buffer.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
|
||||
|
||||
#define BUF_SIZE_FRAMES 50 // buffer size (frames)
|
||||
// Maximum length of resampled signal. Must be an integer multiple of frames
|
||||
// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
|
||||
// The factor of 2 handles wb, and the + 1 is as a safety margin
|
||||
#define MAX_RESAMP_LEN (5 * FRAME_LEN)
|
||||
|
||||
static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
|
||||
static const int kSampMsNb = 8; // samples per ms in nb
|
||||
// Target suppression levels for nlp modes
|
||||
// log{0.001, 0.00001, 0.00000001}
|
||||
static const int kInitCheck = 42;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int sampFreq;
|
||||
int scSampFreq;
|
||||
short bufSizeStart;
|
||||
int knownDelay;
|
||||
|
||||
// Stores the last frame added to the farend buffer
|
||||
short farendOld[2][FRAME_LEN];
|
||||
short initFlag; // indicates if AEC has been initialized
|
||||
|
||||
// Variables used for averaging far end buffer size
|
||||
short counter;
|
||||
short sum;
|
||||
short firstVal;
|
||||
short checkBufSizeCtr;
|
||||
|
||||
// Variables used for delay shifts
|
||||
short msInSndCardBuf;
|
||||
short filtDelay;
|
||||
int timeForDelayChange;
|
||||
int ECstartup;
|
||||
int checkBuffSize;
|
||||
int delayChange;
|
||||
short lastDelayDiff;
|
||||
|
||||
int16_t echoMode;
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
FILE *bufFile;
|
||||
FILE *delayFile;
|
||||
FILE *preCompFile;
|
||||
FILE *postCompFile;
|
||||
#endif // AEC_DEBUG
|
||||
// Structures
|
||||
RingBuffer *farendBuf;
|
||||
|
||||
int lastError;
|
||||
|
||||
AecmCore* aecmCore;
|
||||
} AecMobile;
|
||||
|
||||
// Estimates delay to set the position of the farend buffer read pointer
|
||||
// (controlled by knownDelay)
|
||||
static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);
|
||||
|
||||
// Stuffs the farend buffer if the estimated delay is too large
|
||||
static int WebRtcAecm_DelayComp(AecMobile* aecmInst);
|
||||
|
||||
void* WebRtcAecm_Create() {
|
||||
AecMobile* aecm = malloc(sizeof(AecMobile));
|
||||
|
||||
WebRtcSpl_Init();
|
||||
|
||||
aecm->aecmCore = WebRtcAecm_CreateCore();
|
||||
if (!aecm->aecmCore) {
|
||||
WebRtcAecm_Free(aecm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
|
||||
sizeof(int16_t));
|
||||
if (!aecm->farendBuf)
|
||||
{
|
||||
WebRtcAecm_Free(aecm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aecm->initFlag = 0;
|
||||
aecm->lastError = 0;
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
aecm->aecmCore->farFile = fopen("aecFar.pcm","wb");
|
||||
aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb");
|
||||
aecm->aecmCore->outFile = fopen("aecOut.pcm","wb");
|
||||
//aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb");
|
||||
|
||||
aecm->bufFile = fopen("aecBuf.dat", "wb");
|
||||
aecm->delayFile = fopen("aecDelay.dat", "wb");
|
||||
aecm->preCompFile = fopen("preComp.pcm", "wb");
|
||||
aecm->postCompFile = fopen("postComp.pcm", "wb");
|
||||
#endif // AEC_DEBUG
|
||||
return aecm;
|
||||
}
|
||||
|
||||
void WebRtcAecm_Free(void* aecmInst) {
|
||||
AecMobile* aecm = aecmInst;
|
||||
|
||||
if (aecm == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
fclose(aecm->aecmCore->farFile);
|
||||
fclose(aecm->aecmCore->nearFile);
|
||||
fclose(aecm->aecmCore->outFile);
|
||||
//fclose(aecm->aecmCore->outLpFile);
|
||||
|
||||
fclose(aecm->bufFile);
|
||||
fclose(aecm->delayFile);
|
||||
fclose(aecm->preCompFile);
|
||||
fclose(aecm->postCompFile);
|
||||
#endif // AEC_DEBUG
|
||||
WebRtcAecm_FreeCore(aecm->aecmCore);
|
||||
WebRtc_FreeBuffer(aecm->farendBuf);
|
||||
free(aecm);
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
AecmConfig aecConfig;
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sampFreq != 8000 && sampFreq != 16000)
|
||||
{
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
aecm->sampFreq = sampFreq;
|
||||
|
||||
// Initialize AECM core
|
||||
if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
|
||||
{
|
||||
aecm->lastError = AECM_UNSPECIFIED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize farend buffer
|
||||
WebRtc_InitBuffer(aecm->farendBuf);
|
||||
|
||||
aecm->initFlag = kInitCheck; // indicates that initialization has been done
|
||||
|
||||
aecm->delayChange = 1;
|
||||
|
||||
aecm->sum = 0;
|
||||
aecm->counter = 0;
|
||||
aecm->checkBuffSize = 1;
|
||||
aecm->firstVal = 0;
|
||||
|
||||
aecm->ECstartup = 1;
|
||||
aecm->bufSizeStart = 0;
|
||||
aecm->checkBufSizeCtr = 0;
|
||||
aecm->filtDelay = 0;
|
||||
aecm->timeForDelayChange = 0;
|
||||
aecm->knownDelay = 0;
|
||||
aecm->lastDelayDiff = 0;
|
||||
|
||||
memset(&aecm->farendOld[0][0], 0, 160);
|
||||
|
||||
// Default settings.
|
||||
aecConfig.cngMode = AecmTrue;
|
||||
aecConfig.echoMode = 3;
|
||||
|
||||
if (WebRtcAecm_set_config(aecm, aecConfig) == -1)
|
||||
{
|
||||
aecm->lastError = AECM_UNSPECIFIED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
|
||||
size_t nrOfSamples)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
int32_t retVal = 0;
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (farend == NULL)
|
||||
{
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nrOfSamples != 80 && nrOfSamples != 160)
|
||||
{
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO: Is this really a good idea?
|
||||
if (!aecm->ECstartup)
|
||||
{
|
||||
WebRtcAecm_DelayComp(aecm);
|
||||
}
|
||||
|
||||
WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
|
||||
const int16_t *nearendClean, int16_t *out,
|
||||
size_t nrOfSamples, int16_t msInSndCardBuf)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
int32_t retVal = 0;
|
||||
size_t i;
|
||||
short nmbrOfFilledBuffers;
|
||||
size_t nBlocks10ms;
|
||||
size_t nFrames;
|
||||
#ifdef AEC_DEBUG
|
||||
short msInAECBuf;
|
||||
#endif
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nearendNoisy == NULL)
|
||||
{
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (out == NULL)
|
||||
{
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nrOfSamples != 80 && nrOfSamples != 160)
|
||||
{
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (msInSndCardBuf < 0)
|
||||
{
|
||||
msInSndCardBuf = 0;
|
||||
aecm->lastError = AECM_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
} else if (msInSndCardBuf > 500)
|
||||
{
|
||||
msInSndCardBuf = 500;
|
||||
aecm->lastError = AECM_BAD_PARAMETER_WARNING;
|
||||
retVal = -1;
|
||||
}
|
||||
msInSndCardBuf += 10;
|
||||
aecm->msInSndCardBuf = msInSndCardBuf;
|
||||
|
||||
nFrames = nrOfSamples / FRAME_LEN;
|
||||
nBlocks10ms = nFrames / aecm->aecmCore->mult;
|
||||
|
||||
if (aecm->ECstartup)
|
||||
{
|
||||
if (nearendClean == NULL)
|
||||
{
|
||||
if (out != nearendNoisy)
|
||||
{
|
||||
memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
|
||||
}
|
||||
} else if (out != nearendClean)
|
||||
{
|
||||
memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
|
||||
}
|
||||
|
||||
nmbrOfFilledBuffers =
|
||||
(short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
|
||||
// The AECM is in the start up mode
|
||||
// AECM is disabled until the soundcard buffer and farend buffers are OK
|
||||
|
||||
// Mechanism to ensure that the soundcard buffer is reasonably stable.
|
||||
if (aecm->checkBuffSize)
|
||||
{
|
||||
aecm->checkBufSizeCtr++;
|
||||
// Before we fill up the far end buffer we require the amount of data on the
|
||||
// sound card to be stable (+/-8 ms) compared to the first value. This
|
||||
// comparison is made during the following 4 consecutive frames. If it seems
|
||||
// to be stable then we start to fill up the far end buffer.
|
||||
|
||||
if (aecm->counter == 0)
|
||||
{
|
||||
aecm->firstVal = aecm->msInSndCardBuf;
|
||||
aecm->sum = 0;
|
||||
}
|
||||
|
||||
if (abs(aecm->firstVal - aecm->msInSndCardBuf)
|
||||
< WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb))
|
||||
{
|
||||
aecm->sum += aecm->msInSndCardBuf;
|
||||
aecm->counter++;
|
||||
} else
|
||||
{
|
||||
aecm->counter = 0;
|
||||
}
|
||||
|
||||
if (aecm->counter * nBlocks10ms >= 6)
|
||||
{
|
||||
// The farend buffer size is determined in blocks of 80 samples
|
||||
// Use 75% of the average value of the soundcard buffer
|
||||
aecm->bufSizeStart
|
||||
= WEBRTC_SPL_MIN((3 * aecm->sum
|
||||
* aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES);
|
||||
// buffersize has now been determined
|
||||
aecm->checkBuffSize = 0;
|
||||
}
|
||||
|
||||
if (aecm->checkBufSizeCtr * nBlocks10ms > 50)
|
||||
{
|
||||
// for really bad sound cards, don't disable echocanceller for more than 0.5 sec
|
||||
aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf
|
||||
* aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES);
|
||||
aecm->checkBuffSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// if checkBuffSize changed in the if-statement above
|
||||
if (!aecm->checkBuffSize)
|
||||
{
|
||||
// soundcard buffer is now reasonably stable
|
||||
// When the far end buffer is filled with approximately the same amount of
|
||||
// data as the amount on the sound card we end the start up phase and start
|
||||
// to cancel echoes.
|
||||
|
||||
if (nmbrOfFilledBuffers == aecm->bufSizeStart)
|
||||
{
|
||||
aecm->ECstartup = 0; // Enable the AECM
|
||||
} else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
|
||||
{
|
||||
WebRtc_MoveReadPtr(aecm->farendBuf,
|
||||
(int) WebRtc_available_read(aecm->farendBuf)
|
||||
- (int) aecm->bufSizeStart * FRAME_LEN);
|
||||
aecm->ECstartup = 0;
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
// AECM is enabled
|
||||
|
||||
// Note only 1 block supported for nb and 2 blocks for wb
|
||||
for (i = 0; i < nFrames; i++)
|
||||
{
|
||||
int16_t farend[FRAME_LEN];
|
||||
const int16_t* farend_ptr = NULL;
|
||||
|
||||
nmbrOfFilledBuffers =
|
||||
(short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
|
||||
|
||||
// Check that there is data in the far end buffer
|
||||
if (nmbrOfFilledBuffers > 0)
|
||||
{
|
||||
// Get the next 80 samples from the farend buffer
|
||||
WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
|
||||
FRAME_LEN);
|
||||
|
||||
// Always store the last frame for use when we run out of data
|
||||
memcpy(&(aecm->farendOld[i][0]), farend_ptr,
|
||||
FRAME_LEN * sizeof(short));
|
||||
} else
|
||||
{
|
||||
// We have no data so we use the last played frame
|
||||
memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
|
||||
farend_ptr = farend;
|
||||
}
|
||||
|
||||
// Call buffer delay estimator when all data is extracted,
|
||||
// i,e. i = 0 for NB and i = 1 for WB
|
||||
if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000))
|
||||
{
|
||||
WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
|
||||
}
|
||||
|
||||
// Call the AECM
|
||||
/*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
|
||||
&out[FRAME_LEN * i], aecm->knownDelay);*/
|
||||
if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
|
||||
farend_ptr,
|
||||
&nearendNoisy[FRAME_LEN * i],
|
||||
(nearendClean
|
||||
? &nearendClean[FRAME_LEN * i]
|
||||
: NULL),
|
||||
&out[FRAME_LEN * i]) == -1)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef AEC_DEBUG
|
||||
msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
|
||||
(kSampMsNb * aecm->aecmCore->mult);
|
||||
fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
|
||||
fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
|
||||
#endif
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (config.cngMode != AecmFalse && config.cngMode != AecmTrue)
|
||||
{
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
aecm->aecmCore->cngMode = config.cngMode;
|
||||
|
||||
if (config.echoMode < 0 || config.echoMode > 4)
|
||||
{
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
aecm->echoMode = config.echoMode;
|
||||
|
||||
if (aecm->echoMode == 0)
|
||||
{
|
||||
aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3;
|
||||
aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3;
|
||||
aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3;
|
||||
aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3;
|
||||
aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3)
|
||||
- (SUPGAIN_ERROR_PARAM_B >> 3);
|
||||
aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3)
|
||||
- (SUPGAIN_ERROR_PARAM_D >> 3);
|
||||
} else if (aecm->echoMode == 1)
|
||||
{
|
||||
aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2;
|
||||
aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2;
|
||||
aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2;
|
||||
aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2;
|
||||
aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2)
|
||||
- (SUPGAIN_ERROR_PARAM_B >> 2);
|
||||
aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2)
|
||||
- (SUPGAIN_ERROR_PARAM_D >> 2);
|
||||
} else if (aecm->echoMode == 2)
|
||||
{
|
||||
aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1;
|
||||
aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1;
|
||||
aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1;
|
||||
aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1;
|
||||
aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1)
|
||||
- (SUPGAIN_ERROR_PARAM_B >> 1);
|
||||
aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1)
|
||||
- (SUPGAIN_ERROR_PARAM_D >> 1);
|
||||
} else if (aecm->echoMode == 3)
|
||||
{
|
||||
aecm->aecmCore->supGain = SUPGAIN_DEFAULT;
|
||||
aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT;
|
||||
aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
|
||||
aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
|
||||
aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
|
||||
aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
|
||||
} else if (aecm->echoMode == 4)
|
||||
{
|
||||
aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1;
|
||||
aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1;
|
||||
aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1;
|
||||
aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1;
|
||||
aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1)
|
||||
- (SUPGAIN_ERROR_PARAM_B << 1);
|
||||
aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1)
|
||||
- (SUPGAIN_ERROR_PARAM_D << 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (config == NULL)
|
||||
{
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
config->cngMode = aecm->aecmCore->cngMode;
|
||||
config->echoMode = aecm->echoMode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
|
||||
const void* echo_path,
|
||||
size_t size_bytes)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
const int16_t* echo_path_ptr = echo_path;
|
||||
|
||||
if (aecmInst == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (echo_path == NULL) {
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (size_bytes != WebRtcAecm_echo_path_size_bytes())
|
||||
{
|
||||
// Input channel size does not match the size of AECM
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
|
||||
void* echo_path,
|
||||
size_t size_bytes)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
int16_t* echo_path_ptr = echo_path;
|
||||
|
||||
if (aecmInst == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (echo_path == NULL) {
|
||||
aecm->lastError = AECM_NULL_POINTER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (size_bytes != WebRtcAecm_echo_path_size_bytes())
|
||||
{
|
||||
// Input channel size does not match the size of AECM
|
||||
aecm->lastError = AECM_BAD_PARAMETER_ERROR;
|
||||
return -1;
|
||||
}
|
||||
if (aecm->initFlag != kInitCheck)
|
||||
{
|
||||
aecm->lastError = AECM_UNINITIALIZED_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t WebRtcAecm_echo_path_size_bytes()
|
||||
{
|
||||
return (PART_LEN1 * sizeof(int16_t));
|
||||
}
|
||||
|
||||
int32_t WebRtcAecm_get_error_code(void *aecmInst)
|
||||
{
|
||||
AecMobile* aecm = aecmInst;
|
||||
|
||||
if (aecm == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
return aecm->lastError;
|
||||
}
|
||||
|
||||
static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
|
||||
short delayNew, nSampSndCard;
|
||||
short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
|
||||
short diff;
|
||||
|
||||
nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
|
||||
|
||||
delayNew = nSampSndCard - nSampFar;
|
||||
|
||||
if (delayNew < FRAME_LEN)
|
||||
{
|
||||
WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
|
||||
delayNew += FRAME_LEN;
|
||||
}
|
||||
|
||||
aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10);
|
||||
|
||||
diff = aecm->filtDelay - aecm->knownDelay;
|
||||
if (diff > 224)
|
||||
{
|
||||
if (aecm->lastDelayDiff < 96)
|
||||
{
|
||||
aecm->timeForDelayChange = 0;
|
||||
} else
|
||||
{
|
||||
aecm->timeForDelayChange++;
|
||||
}
|
||||
} else if (diff < 96 && aecm->knownDelay > 0)
|
||||
{
|
||||
if (aecm->lastDelayDiff > 224)
|
||||
{
|
||||
aecm->timeForDelayChange = 0;
|
||||
} else
|
||||
{
|
||||
aecm->timeForDelayChange++;
|
||||
}
|
||||
} else
|
||||
{
|
||||
aecm->timeForDelayChange = 0;
|
||||
}
|
||||
aecm->lastDelayDiff = diff;
|
||||
|
||||
if (aecm->timeForDelayChange > 25)
|
||||
{
|
||||
aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int WebRtcAecm_DelayComp(AecMobile* aecm) {
|
||||
int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
|
||||
int nSampSndCard, delayNew, nSampAdd;
|
||||
const int maxStuffSamp = 10 * FRAME_LEN;
|
||||
|
||||
nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
|
||||
delayNew = nSampSndCard - nSampFar;
|
||||
|
||||
if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult)
|
||||
{
|
||||
// The difference of the buffer sizes is larger than the maximum
|
||||
// allowed known delay. Compensate by stuffing the buffer.
|
||||
nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar),
|
||||
FRAME_LEN));
|
||||
nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);
|
||||
|
||||
WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
|
||||
aecm->delayChange = 1; // the delay needs to be updated
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
218
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
vendored
Normal file
218
third_party/webrtc/src/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h
vendored
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
enum {
|
||||
AecmFalse = 0,
|
||||
AecmTrue
|
||||
};
|
||||
|
||||
// Errors
|
||||
#define AECM_UNSPECIFIED_ERROR 12000
|
||||
#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001
|
||||
#define AECM_UNINITIALIZED_ERROR 12002
|
||||
#define AECM_NULL_POINTER_ERROR 12003
|
||||
#define AECM_BAD_PARAMETER_ERROR 12004
|
||||
|
||||
// Warnings
|
||||
#define AECM_BAD_PARAMETER_WARNING 12100
|
||||
|
||||
typedef struct {
|
||||
int16_t cngMode; // AECM_FALSE, AECM_TRUE (default)
|
||||
int16_t echoMode; // 0, 1, 2, 3 (default), 4
|
||||
} AecmConfig;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocates the memory needed by the AECM. The memory needs to be
|
||||
* initialized separately using the WebRtcAecm_Init() function.
|
||||
* Returns a pointer to the instance and a nullptr at failure.
|
||||
*/
|
||||
void* WebRtcAecm_Create();
|
||||
|
||||
/*
|
||||
* This function releases the memory allocated by WebRtcAecm_Create()
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
*/
|
||||
void WebRtcAecm_Free(void* aecmInst);
|
||||
|
||||
/*
|
||||
* Initializes an AECM instance.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* int32_t sampFreq Sampling frequency of data
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);
|
||||
|
||||
/*
|
||||
* Inserts an 80 or 160 sample block of data into the farend buffer.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* int16_t* farend In buffer containing one frame of
|
||||
* farend signal
|
||||
* int16_t nrOfSamples Number of samples in farend buffer
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_BufferFarend(void* aecmInst,
|
||||
const int16_t* farend,
|
||||
size_t nrOfSamples);
|
||||
|
||||
/*
|
||||
* Runs the AECM on an 80 or 160 sample blocks of data.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* int16_t* nearendNoisy In buffer containing one frame of
|
||||
* reference nearend+echo signal. If
|
||||
* noise reduction is active, provide
|
||||
* the noisy signal here.
|
||||
* int16_t* nearendClean In buffer containing one frame of
|
||||
* nearend+echo signal. If noise
|
||||
* reduction is active, provide the
|
||||
* clean signal here. Otherwise pass a
|
||||
* NULL pointer.
|
||||
* int16_t nrOfSamples Number of samples in nearend buffer
|
||||
* int16_t msInSndCardBuf Delay estimate for sound card and
|
||||
* system buffers
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int16_t* out Out buffer, one frame of processed nearend
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_Process(void* aecmInst,
|
||||
const int16_t* nearendNoisy,
|
||||
const int16_t* nearendClean,
|
||||
int16_t* out,
|
||||
size_t nrOfSamples,
|
||||
int16_t msInSndCardBuf);
|
||||
|
||||
/*
|
||||
* This function enables the user to set certain parameters on-the-fly
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* AecmConfig config Config instance that contains all
|
||||
* properties to be set
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);
|
||||
|
||||
/*
|
||||
* This function enables the user to set certain parameters on-the-fly
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* AecmConfig* config Pointer to the config instance that
|
||||
* all properties will be written to
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);
|
||||
|
||||
/*
|
||||
* This function enables the user to set the echo path on-the-fly.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* void* echo_path Pointer to the echo path to be set
|
||||
* size_t size_bytes Size in bytes of the echo path
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
|
||||
const void* echo_path,
|
||||
size_t size_bytes);
|
||||
|
||||
/*
|
||||
* This function enables the user to get the currently used echo path
|
||||
* on-the-fly
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
* void* echo_path Pointer to echo path
|
||||
* size_t size_bytes Size in bytes of the echo path
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 0: OK
|
||||
* -1: error
|
||||
*/
|
||||
int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
|
||||
void* echo_path,
|
||||
size_t size_bytes);
|
||||
|
||||
/*
|
||||
* This function enables the user to get the echo path size in bytes
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* size_t return Size in bytes
|
||||
*/
|
||||
size_t WebRtcAecm_echo_path_size_bytes();
|
||||
|
||||
/*
|
||||
* Gets the last error code.
|
||||
*
|
||||
* Inputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* void* aecmInst Pointer to the AECM instance
|
||||
*
|
||||
* Outputs Description
|
||||
* -------------------------------------------------------------------
|
||||
* int32_t return 11000-11100: error code
|
||||
*/
|
||||
int32_t WebRtcAecm_get_error_code(void *aecmInst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
|
86
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
vendored
Normal file
86
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging.h
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
|
||||
|
||||
// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
// Dumps a wav data to file.
|
||||
#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
|
||||
do { \
|
||||
rtc_WavWriteSamples(file, data, num_samples); \
|
||||
} while (0)
|
||||
|
||||
// (Re)opens a wav file for writing using the specified sample rate.
|
||||
#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate, \
|
||||
sample_rate, wav_file) \
|
||||
do { \
|
||||
WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
|
||||
wav_file); \
|
||||
} while (0)
|
||||
|
||||
// Closes a wav file.
|
||||
#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
|
||||
do { \
|
||||
rtc_WavClose(wav_file); \
|
||||
} while (0)
|
||||
|
||||
// Dumps a raw data to file.
|
||||
#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
|
||||
do { \
|
||||
(void) fwrite(data, data_size, 1, file); \
|
||||
} while (0)
|
||||
|
||||
// Opens a raw data file for writing using the specified sample rate.
|
||||
#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
|
||||
do { \
|
||||
WebRtcAec_RawFileOpen(name, instance_counter, file); \
|
||||
} while (0)
|
||||
|
||||
// Closes a raw data file.
|
||||
#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
|
||||
do { \
|
||||
fclose(file); \
|
||||
} while (0)
|
||||
|
||||
#else // RTC_AEC_DEBUG_DUMP
|
||||
#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
|
||||
sample_rate) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
|
||||
do { \
|
||||
} while (0)
|
||||
|
||||
#endif // WEBRTC_AEC_DEBUG_DUMP
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
|
57
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
vendored
Normal file
57
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/stringutils.h"
|
||||
#include "webrtc/common_audio/wav_file.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
void WebRtcAec_ReopenWav(const char* name,
|
||||
int instance_index,
|
||||
int process_rate,
|
||||
int sample_rate,
|
||||
rtc_WavWriter** wav_file) {
|
||||
if (*wav_file) {
|
||||
if (rtc_WavSampleRate(*wav_file) == sample_rate)
|
||||
return;
|
||||
rtc_WavClose(*wav_file);
|
||||
}
|
||||
char filename[64];
|
||||
int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
|
||||
instance_index, process_rate);
|
||||
|
||||
// Ensure there was no buffer output error.
|
||||
RTC_DCHECK_GE(written, 0);
|
||||
// Ensure that the buffer size was sufficient.
|
||||
RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
|
||||
|
||||
*wav_file = rtc_WavOpen(filename, sample_rate, 1);
|
||||
}
|
||||
|
||||
void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
|
||||
char filename[64];
|
||||
int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
|
||||
instance_index);
|
||||
|
||||
// Ensure there was no buffer output error.
|
||||
RTC_DCHECK_GE(written, 0);
|
||||
// Ensure that the buffer size was sufficient.
|
||||
RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
|
||||
|
||||
*file = fopen(filename, "wb");
|
||||
}
|
||||
|
||||
#endif // WEBRTC_AEC_DEBUG_DUMP
|
41
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
vendored
Normal file
41
third_party/webrtc/src/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "webrtc/common_audio/wav_file.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef WEBRTC_AEC_DEBUG_DUMP
|
||||
// Opens a new Wav file for writing. If it was already open with a different
|
||||
// sample frequency, it closes it first.
|
||||
void WebRtcAec_ReopenWav(const char* name,
|
||||
int instance_index,
|
||||
int process_rate,
|
||||
int sample_rate,
|
||||
rtc_WavWriter** wav_file);
|
||||
|
||||
// Opens dumpfile with instance-specific filename.
|
||||
void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
|
||||
|
||||
#endif // WEBRTC_AEC_DEBUG_DUMP
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue