|
|
|
| 1 |
|
/** |
| 2 |
|
*** Copyright (C) 1985-1999 Intel Corporation. All rights reserved. |
| 3 |
|
*** |
| 4 |
|
*** The information and source code contained herein is the exclusive |
| 5 |
|
*** property of Intel Corporation and may not be disclosed, examined |
| 6 |
|
*** or reproduced in whole or in part without explicit written authorization |
| 7 |
|
*** from the company. |
| 8 |
|
*** |
| 9 |
|
**/ |
| 10 |
|
|
| 11 |
|
/* |
| 12 |
|
* emmintrin.h |
| 13 |
|
* |
| 14 |
|
* Principal header file for Willamette New Instruction intrinsics |
| 15 |
|
* |
| 16 |
|
* The intrinsics package can be used in 2 ways, based whether or not |
| 17 |
|
* _EMM_FUNCTIONALITY is defined; if it is, the C implementation |
| 18 |
|
* will be used (the "functional intrinsics"). |
| 19 |
|
*/ |
| 20 |
|
|
| 21 |
|
#pragma once |
| 22 |
|
#ifndef __midl |
| 23 |
|
#ifndef _INCLUDED_EMM |
| 24 |
|
#define _INCLUDED_EMM |
| 25 |
|
|
| 26 |
|
#if defined(_M_CEE_PURE) |
| 27 |
|
#error ERROR: EMM intrinsics not supported in the pure mode! |
| 28 |
|
#else |
| 29 |
|
|
| 30 |
|
/* |
| 31 |
|
* the __m128 & __m64 types are required for the intrinsics |
| 32 |
|
*/ |
| 33 |
|
#include <crtdefs.h> |
| 34 |
|
#include <xmmintrin.h> |
| 35 |
|
|
| 36 |
|
typedef union __declspec(intrin_type) _CRT_ALIGN(16) __m128i { |
| 37 |
|
__int8 m128i_i8[16]; |
| 38 |
|
__int16 m128i_i16[8]; |
| 39 |
|
__int32 m128i_i32[4]; |
| 40 |
|
__int64 m128i_i64[2]; |
| 41 |
|
unsigned __int8 m128i_u8[16]; |
| 42 |
|
unsigned __int16 m128i_u16[8]; |
| 43 |
|
unsigned __int32 m128i_u32[4]; |
| 44 |
|
unsigned __int64 m128i_u64[2]; |
| 45 |
|
} __m128i; |
| 46 |
|
|
| 47 |
|
typedef struct __declspec(intrin_type) _CRT_ALIGN(16) __m128d { |
| 48 |
|
double m128d_f64[2]; |
| 49 |
|
} __m128d; |
| 50 |
|
|
| 51 |
|
/* |
| 52 |
|
* Macro function for shuffle |
| 53 |
|
*/ |
| 54 |
|
#define _MM_SHUFFLE2(x,y) (((x)<<1) | (y)) |
| 55 |
|
|
| 56 |
|
/*****************************************************/ |
| 57 |
|
/* INTRINSICS FUNCTION PROTOTYPES START HERE */ |
| 58 |
|
/*****************************************************/ |
| 59 |
|
|
| 60 |
|
#if defined __cplusplus |
| 61 |
|
extern "C" { /* Begin "C" */ |
| 62 |
|
/* Intrinsics use C name-mangling. */ |
| 63 |
|
#endif /* __cplusplus */ |
| 64 |
|
|
| 65 |
|
/* |
| 66 |
|
* DP, arithmetic |
| 67 |
|
*/ |
| 68 |
|
|
| 69 |
|
extern __m128d _mm_add_sd(__m128d _A, __m128d _B); |
| 70 |
|
extern __m128d _mm_add_pd(__m128d _A, __m128d _B); |
| 71 |
|
extern __m128d _mm_sub_sd(__m128d _A, __m128d _B); |
| 72 |
|
extern __m128d _mm_sub_pd(__m128d _A, __m128d _B); |
| 73 |
|
extern __m128d _mm_mul_sd(__m128d _A, __m128d _B); |
| 74 |
|
extern __m128d _mm_mul_pd(__m128d _A, __m128d _B); |
| 75 |
|
extern __m128d _mm_sqrt_sd(__m128d _A, __m128d _B); |
| 76 |
|
extern __m128d _mm_sqrt_pd(__m128d _A); |
| 77 |
|
extern __m128d _mm_div_sd(__m128d _A, __m128d _B); |
| 78 |
|
extern __m128d _mm_div_pd(__m128d _A, __m128d _B); |
| 79 |
|
extern __m128d _mm_min_sd(__m128d _A, __m128d _B); |
| 80 |
|
extern __m128d _mm_min_pd(__m128d _A, __m128d _B); |
| 81 |
|
extern __m128d _mm_max_sd(__m128d _A, __m128d _B); |
| 82 |
|
extern __m128d _mm_max_pd(__m128d _A, __m128d _B); |
| 83 |
|
|
| 84 |
|
/* |
| 85 |
|
* DP, logicals |
| 86 |
|
*/ |
| 87 |
|
|
| 88 |
|
extern __m128d _mm_and_pd(__m128d _A, __m128d _B); |
| 89 |
|
extern __m128d _mm_andnot_pd(__m128d _A, __m128d _B); |
| 90 |
|
extern __m128d _mm_or_pd(__m128d _A, __m128d _B); |
| 91 |
|
extern __m128d _mm_xor_pd(__m128d _A, __m128d _B); |
| 92 |
|
|
| 93 |
|
/* |
| 94 |
|
* DP, comparisons |
| 95 |
|
*/ |
| 96 |
|
|
| 97 |
|
extern __m128d _mm_cmpeq_sd(__m128d _A, __m128d _B); |
| 98 |
|
extern __m128d _mm_cmpeq_pd(__m128d _A, __m128d _B); |
| 99 |
|
extern __m128d _mm_cmplt_sd(__m128d _A, __m128d _B); |
| 100 |
|
extern __m128d _mm_cmplt_pd(__m128d _A, __m128d _B); |
| 101 |
|
extern __m128d _mm_cmple_sd(__m128d _A, __m128d _B); |
| 102 |
|
extern __m128d _mm_cmple_pd(__m128d _A, __m128d _B); |
| 103 |
|
extern __m128d _mm_cmpgt_sd(__m128d _A, __m128d _B); |
| 104 |
|
extern __m128d _mm_cmpgt_pd(__m128d _A, __m128d _B); |
| 105 |
|
extern __m128d _mm_cmpge_sd(__m128d _A, __m128d _B); |
| 106 |
|
extern __m128d _mm_cmpge_pd(__m128d _A, __m128d _B); |
| 107 |
|
extern __m128d _mm_cmpneq_sd(__m128d _A, __m128d _B); |
| 108 |
|
extern __m128d _mm_cmpneq_pd(__m128d _A, __m128d _B); |
| 109 |
|
extern __m128d _mm_cmpnlt_sd(__m128d _A, __m128d _B); |
| 110 |
|
extern __m128d _mm_cmpnlt_pd(__m128d _A, __m128d _B); |
| 350 |
|
|
| 351 |
|
extern void _mm_store_si128(__m128i *_P, __m128i _B); |
| 352 |
|
extern void _mm_storeu_si128(__m128i *_P, __m128i _B); |
| 353 |
|
extern void _mm_storel_epi64(__m128i *_P, __m128i _Q); |
| 354 |
|
extern void _mm_maskmoveu_si128(__m128i _D, __m128i _N, char *_P); |
| 355 |
|
|
| 356 |
|
/* |
| 357 |
|
* Integer, moves |
| 358 |
|
*/ |
| 359 |
|
|
| 360 |
|
extern __m128i _mm_move_epi64(__m128i _Q); |
| 361 |
|
extern __m128i _mm_movpi64_epi64(__m64 _Q); |
| 362 |
|
extern __m64 _mm_movepi64_pi64(__m128i _Q); |
| 363 |
|
|
| 364 |
|
/* |
| 365 |
|
* Cacheability support |
| 366 |
|
*/ |
| 367 |
|
|
| 368 |
|
extern void _mm_stream_pd(double *_Dp, __m128d _A); |
| 369 |
|
extern void _mm_stream_si128(__m128i *_P, __m128i _A); |
| 370 |
|
extern void _mm_clflush(void const*_P); |
| 371 |
|
extern void _mm_lfence(void); |
| 372 |
|
extern void _mm_mfence(void); |
| 373 |
|
extern void _mm_stream_si32(int *_P, int _I); |
| 374 |
|
extern void _mm_pause(void); |
| 375 |
|
|
| 376 |
|
/* |
| 377 |
|
* New convert to float |
| 378 |
|
*/ |
| 379 |
|
|
| 380 |
|
extern double _mm_cvtsd_f64(__m128d _A); |
| 381 |
|
|
| 382 |
|
/* |
| 383 |
|
* Support for casting between various SP, DP, INT vector types. |
| 384 |
|
* Note that these do no conversion of values, they just change |
| 385 |
|
* the type. |
| 386 |
|
*/ |
| 387 |
|
|
| 388 |
|
extern __m128 _mm_castpd_ps(__m128d); |
| 389 |
|
extern __m128i _mm_castpd_si128(__m128d); |
| 390 |
|
extern __m128d _mm_castps_pd(__m128); |
| 391 |
|
extern __m128i _mm_castps_si128(__m128); |
| 392 |
|
extern __m128 _mm_castsi128_ps(__m128i); |
| 393 |
|
extern __m128d _mm_castsi128_pd(__m128i); |
| 394 |
|
|
| 395 |
|
/* |
| 396 |
|
* Support for 64-bit extension intrinsics |
| 397 |
|
*/ |
| 398 |
|
|
| 399 |
|
#if defined(_M_AMD64) |
| 400 |
|
extern __int64 _mm_cvtsd_si64(__m128d); |
| 401 |
|
extern __int64 _mm_cvttsd_si64(__m128d); |
| 402 |
|
extern __m128d _mm_cvtsi64_sd(__m128d, __int64); |
| 403 |
|
extern __m128i _mm_cvtsi64_si128(__int64); |
| 404 |
|
extern __int64 _mm_cvtsi128_si64(__m128i); |
| 405 |
|
/* Alternate intrinsic name definitions */ |
| 406 |
|
#define _mm_stream_si64 _mm_stream_si64x |
| 407 |
|
#endif |
| 408 |
|
|
| 409 |
|
#if defined __cplusplus |
| 410 |
|
}; /* End "C" */ |
| 411 |
|
#endif /* __cplusplus */ |
| 412 |
|
|
| 413 |
|
#endif /* defined(_M_CEE_PURE) */ |
| 414 |
|
|
| 415 |
|
#endif |
| 416 |
|
#endif |
| 417 |
|
|
|
|
|