Re #2065: We still need to modify some source code to avoid build error on

Visual Studio 2005.



git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@5708 74dad513-b988-da41-8d7b-12977e46ad98
This commit is contained in:
Riza Sulistyo 2017-12-04 07:23:36 +00:00
parent a39e60661e
commit 126a428c1e
4 changed files with 163 additions and 90 deletions

View File

@ -2,3 +2,37 @@ Notes:
* Source code for libyuv from https://chromium.googlesource.com/libyuv/libyuv/ dated 17 November 2017.
* All code is compilable, except for compare_win.cc
- Use older version (https://chromium.googlesource.com/libyuv/libyuv/+/baf6a3c1bd385e7ffe6b7634560e71fb49e4f589%5E%21/)
Since there's a compiler error on (VS2005):
--------------------------------------------------------------------------------------
pmulld xmm0,xmm6
--------------------------------------------------------------------------------------
- On VS2015, error C2024: 'alignas' attribute applies to variables, data members and tag types only
--------------------------------------------------------------------------------------
__declspec(naked) __declspec(align(16))
Change to :
__declspec(naked)
--------------------------------------------------------------------------------------
* Added these lines to file include/libyuv/basic_types.h:
--
#if _MSC_VER==1400
# include <stdint.h> // for uint8_t
#endif
...
#if defined(_MSC_VER)
# pragma warning(disable:4996) // This function or variable may be unsafe.
#endif
--
* Modify compare_row.h:
- VS2005 doesn't support SSE42, resulting error on HammingDistance_SSE42().
--------------------------------------------------------------------------------------
diff += __popcnt(x);
--------------------------------------------------------------------------------------
So, we use the default method HammingDistance_C() to calculate Hamming Distance.

View File

@ -14,11 +14,18 @@
#include <stddef.h> // for NULL, size_t
#if defined(_MSC_VER) && (_MSC_VER < 1600)
#if _MSC_VER==1400
# include <stdint.h> // for uint8_t
#endif
#include <sys/types.h> // for uintptr_t on x86
#else
#include <stdint.h> // for uintptr_t
#endif
#if defined(_MSC_VER)
# pragma warning(disable:4996) // This function or variable may be unsafe.
#endif
#ifndef GG_LONGLONG
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED

View File

@ -53,7 +53,10 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
#define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2
#define HAS_HAMMINGDISTANCE_SSE42
# if (MSC_VER >= 1400)
// Visual Studio 2005 doesn't support sse42
# define HAS_HAMMINGDISTANCE_SSE42
# endif
#endif
// The following are available for Visual C and clangcl 32 bit:

View File

@ -9,8 +9,6 @@
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#include "libyuv/row.h"
#if defined(_MSC_VER)
@ -22,9 +20,10 @@ namespace libyuv {
extern "C" {
#endif
// This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
/* Visual Studio 2005 doesn't support __popcnt(). */
#if (_MSC_VER > 1400)
uint32 HammingDistance_SSE42(const uint8* src_a,
const uint8* src_b,
int count) {
@ -39,21 +38,28 @@ uint32 HammingDistance_SSE42(const uint8* src_a,
}
return diff;
}
#endif
__declspec(naked) uint32
SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
#if (_MSC_VER >= 1900)
__declspec(naked)
#else
__declspec(naked) __declspec(align(16))
#endif
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
pxor xmm0, xmm0
pxor xmm5, xmm5
align 4
wloop:
movdqu xmm1, [eax]
movdqa xmm1, [eax]
lea eax, [eax + 16]
movdqu xmm2, [edx]
movdqa xmm2, [edx]
lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
@ -65,7 +71,6 @@ __declspec(naked) uint32
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
sub ecx, 16
jg wloop
pshufd xmm1, xmm0, 0xee
@ -80,21 +85,27 @@ __declspec(naked) uint32
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable : 4752)
__declspec(naked) uint32
SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
#pragma warning(disable: 4752)
#if (_MSC_VER >= 1900)
__declspec(naked)
#else
__declspec(naked) __declspec(align(16))
#endif
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
vpxor ymm0, ymm0, ymm0 // sum
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
@ -104,7 +115,6 @@ __declspec(naked) uint32
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
sub ecx, 32
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
@ -120,66 +130,81 @@ __declspec(naked) uint32
}
#endif // _MSC_VER >= 1700
uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
__declspec(naked) uint32
HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
#if (_MSC_VER >= 1900)
__declspec(naked)
#else
__declspec(naked) __declspec(align(16))
#endif
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, xmmword ptr kHash16x33
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33
align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
movdqa xmm5, xmmword ptr kHashMul0
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
movdqa xmm5, kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
pmulld xmm3, xmm5
movdqa xmm5, xmmword ptr kHashMul1
punpcklwd xmm3, xmm7 // src[0-3]
pmulld(0xdd) // pmulld xmm3, xmm5
movdqa xmm5, kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
pmulld xmm4, xmm5
movdqa xmm5, xmmword ptr kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
punpckhwd xmm4, xmm7 // src[4-7]
pmulld(0xe5) // pmulld xmm4, xmm5
movdqa xmm5, kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
pmulld xmm2, xmm5
movdqa xmm5, xmmword ptr kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
punpcklwd xmm2, xmm7 // src[8-11]
pmulld(0xd5) // pmulld xmm2, xmm5
movdqa xmm5, kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld(0xcd) // pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
@ -187,53 +212,57 @@ __declspec(naked) uint32
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
sub ecx, 16
jg wloop
movd eax, xmm0 // return hash
movd eax, xmm0 // return hash
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked) uint32
HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
#if (_MSC_VER >= 1900)
__declspec(naked)
#else
__declspec(naked) __declspec(align(16))
#endif
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
vmovd xmm0, [esp + 12] // seed
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
align 4
wloop:
vpmovzxbd xmm3, [eax] // src[0-3]
vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16
vpmovzxbd xmm4, [eax + 4] // src[4-7]
vpmulld xmm3, xmm3, xmmword ptr kHashMul0
vpmovzxbd xmm2, [eax + 8] // src[8-11]
vpmulld xmm4, xmm4, xmmword ptr kHashMul1
vpmovzxbd xmm1, [eax + 12] // src[12-15]
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2
lea eax, [eax + 16]
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
vpaddd xmm3, xmm3, xmm4 // add 16 results
vpaddd xmm1, xmm1, xmm2
vpaddd xmm1, xmm1, xmm3
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
vpaddd xmm1, xmm1,xmm2
vpshufd xmm2, xmm1, 0x01
vpaddd xmm1, xmm1, xmm2
vpaddd xmm0, xmm0, xmm1
pmulld xmm1, kHashMul3
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
vmovd eax, xmm0 // return hash
vzeroupper
movd eax, xmm0 // return hash
ret
}
}
#endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus
} // extern "C"