|
|
|
| 1 |
|
/** |
| 2 |
|
*** Copyright (C) 1985-2005 Intel Corporation. All rights reserved. |
| 3 |
|
*** |
| 4 |
|
*** The information and source code contained herein is the exclusive |
| 5 |
|
*** property of Intel Corporation and may not be disclosed, examined |
| 6 |
|
*** or reproduced in whole or in part without explicit written authorization |
| 7 |
|
*** from the company. |
| 8 |
|
*** |
| 9 |
|
**/ |
| 10 |
|
|
| 11 |
|
#pragma once |
| 12 |
|
#ifndef __midl |
| 13 |
|
#ifndef _INCLUDED_TMM |
| 14 |
|
#define _INCLUDED_TMM |
| 15 |
|
|
| 16 |
|
#include <crtdefs.h> |
| 17 |
|
|
| 18 |
|
#if defined(_M_CEE_PURE) |
| 19 |
|
#error ERROR: XMM intrinsics not supported in the pure mode! |
| 20 |
|
#else |
| 21 |
|
|
| 22 |
|
#include <intrin.h> // PSC, and by inclusion X86, instrinsics |
| 23 |
|
|
| 24 |
|
/* |
| 25 |
|
* the m64 type is required for the integer Streaming SIMD Extensions intrinsics |
| 26 |
|
*/ |
| 27 |
|
#ifndef _MMINTRIN_H_INCLUDED |
| 28 |
|
#include <mmintrin.h> |
| 29 |
|
#endif |
| 30 |
|
|
| 31 |
|
#ifdef _MM2_FUNCTIONALITY |
| 32 |
|
/* support old notation */ |
| 33 |
|
#ifndef _MM_FUNCTIONALITY |
| 34 |
|
#define _MM_FUNCTIONALITY |
| 35 |
|
#endif |
| 36 |
|
#endif |
| 37 |
|
|
| 38 |
|
#ifdef __ICL |
| 39 |
|
#ifdef _MM_FUNCTIONALITY |
| 40 |
|
#include "xmm_func.h" |
| 41 |
|
#else |
| 42 |
|
/* using real intrinsics */ |
| 43 |
|
typedef long long __m128; |
| 44 |
|
#endif |
| 45 |
|
#else |
| 46 |
|
#ifndef _INC_MALLOC |
| 47 |
|
/* pick up _mm_malloc() and _mm_free() */ |
| 48 |
|
#include <malloc.h> |
| 49 |
|
#endif |
| 50 |
|
#endif |
| 51 |
|
|
| 52 |
|
#ifdef __cplusplus |
| 53 |
|
extern "C" { |
| 54 |
|
#endif |
| 55 |
|
|
| 56 |
|
// Add horizonally packed [saturated] words, double words, |
| 57 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
| 58 |
|
|
| 59 |
|
extern __m128i _mm_hadd_epi16 (__m128i a, __m128i b); |
| 60 |
|
extern __m128i _mm_hadd_epi32 (__m128i a, __m128i b); |
| 61 |
|
extern __m128i _mm_hadds_epi16 (__m128i a, __m128i b); |
| 62 |
|
|
| 63 |
|
extern __m64 _mm_hadd_pi16 (__m64 a, __m64 b); |
| 64 |
|
extern __m64 _mm_hadd_pi32 (__m64 a, __m64 b); |
| 65 |
|
extern __m64 _mm_hadds_pi16 (__m64 a, __m64 b); |
| 66 |
|
|
| 67 |
|
// Subtract horizonally packed [saturated] words, double words, |
| 68 |
|
// {X,}MM2/m{128,64} (b) from {X,}MM1 (a). |
| 69 |
|
|
| 70 |
|
extern __m128i _mm_hsub_epi16 (__m128i a, __m128i b); |
| 71 |
|
extern __m128i _mm_hsub_epi32 (__m128i a, __m128i b); |
| 72 |
|
extern __m128i _mm_hsubs_epi16 (__m128i a, __m128i b); |
| 73 |
|
|
| 74 |
|
extern __m64 _mm_hsub_pi16 (__m64 a, __m64 b); |
| 75 |
|
extern __m64 _mm_hsub_pi32 (__m64 a, __m64 b); |
| 76 |
|
extern __m64 _mm_hsubs_pi16 (__m64 a, __m64 b); |
| 77 |
|
|
| 78 |
|
// Multiply and add packed words, |
| 79 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
| 80 |
|
|
| 81 |
|
extern __m128i _mm_maddubs_epi16 (__m128i a, __m128i b); |
| 82 |
|
|
| 83 |
|
extern __m64 _mm_maddubs_pi16 (__m64 a, __m64 b); |
| 84 |
|
|
| 95 |
|
extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b); |
| 96 |
|
|
| 97 |
|
extern __m64 _mm_shuffle_pi8 (__m64 a, __m64 b); |
| 98 |
|
|
| 99 |
|
// Packed byte, word, double word sign, {X,}MM2/m{128,64} (b) to |
| 100 |
|
// {X,}MM1 (a). |
| 101 |
|
|
| 102 |
|
extern __m128i _mm_sign_epi8 (__m128i a, __m128i b); |
| 103 |
|
extern __m128i _mm_sign_epi16 (__m128i a, __m128i b); |
| 104 |
|
extern __m128i _mm_sign_epi32 (__m128i a, __m128i b); |
| 105 |
|
|
| 106 |
|
extern __m64 _mm_sign_pi8 (__m64 a, __m64 b); |
| 107 |
|
extern __m64 _mm_sign_pi16 (__m64 a, __m64 b); |
| 108 |
|
extern __m64 _mm_sign_pi32 (__m64 a, __m64 b); |
| 109 |
|
|
| 110 |
|
// Packed align and shift right by n*8 bits, |
| 111 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
| 112 |
|
|
| 113 |
|
extern __m128i _mm_alignr_epi8 (__m128i a, __m128i b, int n); |
| 114 |
|
|
| 115 |
|
extern __m64 _mm_alignr_pi8 (__m64 a, __m64 b, int n); |
| 116 |
|
|
| 117 |
|
// Packed byte, word, double word absolute value, |
| 118 |
|
// {X,}MM2/m{128,64} (b) to {X,}MM1 (a). |
| 119 |
|
|
| 120 |
|
extern __m128i _mm_abs_epi8 (__m128i a); |
| 121 |
|
extern __m128i _mm_abs_epi16 (__m128i a); |
| 122 |
|
extern __m128i _mm_abs_epi32 (__m128i a); |
| 123 |
|
|
| 124 |
|
extern __m64 _mm_abs_pi8 (__m64 a); |
| 125 |
|
extern __m64 _mm_abs_pi16 (__m64 a); |
| 126 |
|
extern __m64 _mm_abs_pi32 (__m64 a); |
| 127 |
|
|
| 128 |
|
#ifdef __cplusplus |
| 129 |
|
}; |
| 130 |
|
#endif |
| 131 |
|
|
| 132 |
|
#endif /* defined(_M_CEE_PURE) */ |
| 133 |
|
|
| 134 |
|
#endif |
| 135 |
|
|
| 136 |
|
#endif // #ifndef _TMMINTRIN_H |
| 137 |
|
|
|
|
|