|
|
|
1 |
|
/** |
2 |
|
*** Copyright (C) 1985-2005 Intel Corporation. All rights reserved. |
3 |
|
*** |
4 |
|
*** The information and source code contained herein is the exclusive |
5 |
|
*** property of Intel Corporation and may not be disclosed, examined |
6 |
|
*** or reproduced in whole or in part without explicit written authorization |
7 |
|
*** from the company. |
8 |
|
*** |
9 |
|
**/ |
10 |
|
|
11 |
|
#pragma once |
12 |
|
#ifndef __midl |
13 |
|
#ifndef _INCLUDED_TMM |
14 |
|
#define _INCLUDED_TMM |
15 |
|
|
16 |
|
#include <crtdefs.h> |
17 |
|
|
18 |
|
#if defined(_M_CEE_PURE) |
19 |
|
#error ERROR: XMM intrinsics not supported in the pure mode! |
20 |
|
#else |
21 |
|
|
22 |
|
#include <intrin.h> // PSC, and by inclusion X86, instrinsics |
23 |
|
|
24 |
|
/* |
25 |
|
* the m64 type is required for the integer Streaming SIMD Extensions intrinsics |
26 |
|
*/ |
27 |
|
#ifndef _MMINTRIN_H_INCLUDED |
28 |
|
#include <mmintrin.h> |
29 |
|
#endif |
30 |
|
|
31 |
|
#ifdef _MM2_FUNCTIONALITY |
32 |
|
/* support old notation */ |
33 |
|
#ifndef _MM_FUNCTIONALITY |
34 |
|
#define _MM_FUNCTIONALITY |
35 |
|
#endif |
36 |
|
#endif |
37 |
|
|
38 |
|
#ifdef __ICL |
39 |
|
#ifdef _MM_FUNCTIONALITY |
40 |
|
#include "xmm_func.h" |
41 |
|
#else |
42 |
|
/* using real intrinsics */ |
43 |
|
typedef long long __m128; |
44 |
|
#endif |
45 |
|
#else |
46 |
|
#ifndef _INC_MALLOC |
47 |
|
/* pick up _mm_malloc() and _mm_free() */ |
48 |
|
#include <malloc.h> |
49 |
|
#endif |
50 |
|
#endif |
51 |
|
|
52 |
|
#ifdef __cplusplus |
53 |
|
extern "C" { |
54 |
|
#endif |
55 |
|
|
56 |
|
// Add horizonally packed [saturated] words, double words, |
57 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
58 |
|
|
59 |
|
extern __m128i _mm_hadd_epi16 (__m128i a, __m128i b); |
60 |
|
extern __m128i _mm_hadd_epi32 (__m128i a, __m128i b); |
61 |
|
extern __m128i _mm_hadds_epi16 (__m128i a, __m128i b); |
62 |
|
|
63 |
|
extern __m64 _mm_hadd_pi16 (__m64 a, __m64 b); |
64 |
|
extern __m64 _mm_hadd_pi32 (__m64 a, __m64 b); |
65 |
|
extern __m64 _mm_hadds_pi16 (__m64 a, __m64 b); |
66 |
|
|
67 |
|
// Subtract horizonally packed [saturated] words, double words, |
68 |
|
// {X,}MM2/m{128,64} (b) from {X,}MM1 (a). |
69 |
|
|
70 |
|
extern __m128i _mm_hsub_epi16 (__m128i a, __m128i b); |
71 |
|
extern __m128i _mm_hsub_epi32 (__m128i a, __m128i b); |
72 |
|
extern __m128i _mm_hsubs_epi16 (__m128i a, __m128i b); |
73 |
|
|
74 |
|
extern __m64 _mm_hsub_pi16 (__m64 a, __m64 b); |
75 |
|
extern __m64 _mm_hsub_pi32 (__m64 a, __m64 b); |
76 |
|
extern __m64 _mm_hsubs_pi16 (__m64 a, __m64 b); |
77 |
|
|
78 |
|
// Multiply and add packed words, |
79 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
80 |
|
|
81 |
|
extern __m128i _mm_maddubs_epi16 (__m128i a, __m128i b); |
82 |
|
|
83 |
|
extern __m64 _mm_maddubs_pi16 (__m64 a, __m64 b); |
84 |
|
|
95 |
|
extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); |
96 |
|
|
97 |
|
extern __m64 _mm_shuffle_pi8 (__m64 a, __m64 b); |
98 |
|
|
99 |
|
// Packed byte, word, double word sign, {X,}MM2/m{128,64} (b) to |
100 |
|
// {X,}MM1 (a). |
101 |
|
|
102 |
|
extern __m128i _mm_sign_epi8 (__m128i a, __m128i b); |
103 |
|
extern __m128i _mm_sign_epi16 (__m128i a, __m128i b); |
104 |
|
extern __m128i _mm_sign_epi32 (__m128i a, __m128i b); |
105 |
|
|
106 |
|
extern __m64 _mm_sign_pi8 (__m64 a, __m64 b); |
107 |
|
extern __m64 _mm_sign_pi16 (__m64 a, __m64 b); |
108 |
|
extern __m64 _mm_sign_pi32 (__m64 a, __m64 b); |
109 |
|
|
110 |
|
// Packed align and shift right by n*8 bits, |
111 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
112 |
|
|
113 |
|
extern __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int n); |
114 |
|
|
115 |
|
extern __m64 _mm_alignr_pi8 (__m64 a, __m64 b, int n); |
116 |
|
|
117 |
|
// Packed byte, word, double word absolute value, |
118 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
119 |
|
|
120 |
|
extern __m128i _mm_abs_epi8 (__m128i a); |
121 |
|
extern __m128i _mm_abs_epi16 (__m128i a); |
122 |
|
extern __m128i _mm_abs_epi32 (__m128i a); |
123 |
|
|
124 |
|
extern __m64 _mm_abs_pi8 (__m64 a); |
125 |
|
extern __m64 _mm_abs_pi16 (__m64 a); |
126 |
|
extern __m64 _mm_abs_pi32 (__m64 a); |
127 |
|
|
128 |
|
#ifdef __cplusplus |
129 |
|
}; |
130 |
|
#endif |
131 |
|
|
132 |
|
#endif /* defined(_M_CEE_PURE) */ |
133 |
|
|
134 |
|
#endif |
135 |
|
|
136 |
|
#endif // #ifndef _TMMINTRIN_H |
137 |
|
|
|
|
|