x86 intrinsics list

This document lists intrinsics that the Microsoft C/C++ compiler supports when x86 is targeted.

For information about individual intrinsics, see these resources, as appropriate for the processor you're targeting:

x86 intrinsics

The following table lists the intrinsics available on x86 processors. The Technology column lists required instruction-set support. Use the __cpuid intrinsic to determine instruction-set support at run time. If two entries are in one row, they represent different entry points for the same intrinsic. [Macro] indicates the prototype is a macro. The header required for the function prototype is listed in the Header column. The intrin.h header includes both immintrin.h and ammintrin.h for simplicity.

Intrinsic name Technology Header Function prototype
_addcarry_u16 intrin.h unsigned char _addcarry_u16(unsigned char, unsigned short, unsigned short, unsigned short *);
_addcarry_u32 intrin.h unsigned char _addcarry_u32(unsigned char, unsigned int, unsigned int, unsigned int *);
_addcarry_u8 intrin.h unsigned char _addcarry_u8(unsigned char, unsigned char, unsigned char, unsigned char *);
_addcarryx_u32 ADX immintrin.h unsigned char _addcarryx_u32(unsigned char, unsigned int, unsigned int, unsigned int *);
__addfsbyte intrin.h void __addfsbyte(unsigned long, unsigned char);
__addfsdword intrin.h void __addfsdword(unsigned long, unsigned long);
__addfsword intrin.h void __addfsword(unsigned long, unsigned short);
_AddressOfReturnAddress intrin.h void * _AddressOfReturnAddress(void);
_andn_u32 BMI ammintrin.h unsigned int _andn_u32(unsigned int, unsigned int);
_bextr_u32 BMI ammintrin.h, immintrin.h unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
_bextri_u32 ABM ammintrin.h unsigned int _bextri_u32(unsigned int, unsigned int);
_BitScanForward intrin.h unsigned char _BitScanForward(unsigned long*, unsigned long);
_BitScanReverse intrin.h unsigned char _BitScanReverse(unsigned long*, unsigned long);
_bittest intrin.h unsigned char _bittest(long const *, long);
_bittestandcomplement intrin.h unsigned char _bittestandcomplement(long *, long);
_bittestandreset intrin.h unsigned char _bittestandreset(long *, long);
_bittestandset intrin.h unsigned char _bittestandset(long *, long);
_blcfill_u32 ABM ammintrin.h unsigned int _blcfill_u32(unsigned int);
_blci_u32 ABM ammintrin.h unsigned int _blci_u32(unsigned int);
_blcic_u32 ABM ammintrin.h unsigned int _blcic_u32(unsigned int);
_blcmsk_u32 ABM ammintrin.h unsigned int _blcmsk_u32(unsigned int);
_blcs_u32 ABM ammintrin.h unsigned int _blcs_u32(unsigned int);
_blsfill_u32 ABM ammintrin.h unsigned int _blsfill_u32(unsigned int);
_blsi_u32 BMI ammintrin.h, immintrin.h unsigned int _blsi_u32(unsigned int);
_blsic_u32 ABM ammintrin.h unsigned int _blsic_u32(unsigned int);
_blsmsk_u32 BMI ammintrin.h, immintrin.h unsigned int _blsmsk_u32(unsigned int);
_blsr_u32 BMI ammintrin.h, immintrin.h unsigned int _blsr_u32(unsigned int);
_bzhi_u32 BMI immintrin.h unsigned int _bzhi_u32(unsigned int, unsigned int);
_castf32_u32 immintrin.h unsigned __int32 _castf32_u32 (float);
_castf64_u64 immintrin.h unsigned __int64 _castf64_u64 (double);
_castu32_f32 immintrin.h float _castu32_f32 (unsigned __int32);
_castu64_f64 immintrin.h double _castu64_f64 (unsigned __int64 a);
_clac SMAP intrin.h void _clac(void);
__cpuid intrin.h void __cpuid(int *, int);
__cpuidex intrin.h void __cpuidex(int *, int, int);
__debugbreak intrin.h void __debugbreak(void);
_disable intrin.h void _disable(void);
_div64 intrin.h int _div64(__int64, int, int *);
__emul intrin.h __int64 [pascal/cdecl] __emul(int, int);
__emulu intrin.h unsigned __int64 [pascal/cdecl]__emulu(unsigned int, unsigned int);
_enable intrin.h void _enable(void);
__fastfail intrin.h void __fastfail(unsigned int);
_fxrstor FXSR immintrin.h void _fxrstor(void const*);
_fxsave FXSR immintrin.h void _fxsave(void*);
__getcallerseflags intrin.h (unsigned int __getcallerseflags());
__halt intrin.h void __halt(void);
__inbyte intrin.h unsigned char __inbyte(unsigned short);
__inbytestring intrin.h void __inbytestring(unsigned short, unsigned char *, unsigned long);
__incfsbyte intrin.h void __incfsbyte(unsigned long);
__incfsdword intrin.h void __incfsdword(unsigned long);
__incfsword intrin.h void __incfsword(unsigned long);
__indword intrin.h unsigned long __indword(unsigned short);
__indwordstring intrin.h void __indwordstring(unsigned short, unsigned long *, unsigned long);
__int2c intrin.h void __int2c(void);
_InterlockedAddLargeStatistic intrin.h long _InterlockedAddLargeStatistic(__int64 volatile *, long);
_InterlockedAnd intrin.h long _InterlockedAnd(long volatile *, long);
_InterlockedAnd_HLEAcquire HLE immintrin.h long _InterlockedAnd_HLEAcquire(long volatile *, long);
_InterlockedAnd_HLERelease HLE immintrin.h long _InterlockedAnd_HLERelease(long volatile *, long);
_InterlockedAnd16 intrin.h short _InterlockedAnd16(short volatile *, short);
_InterlockedAnd8 intrin.h char _InterlockedAnd8(char volatile *, char);
_interlockedbittestandreset intrin.h unsigned char _interlockedbittestandreset(long *, long);
_interlockedbittestandreset_HLEAcquire HLE immintrin.h unsigned char _interlockedbittestandreset_HLEAcquire(long *, long);
_interlockedbittestandreset_HLERelease HLE immintrin.h unsigned char _interlockedbittestandreset_HLERelease(long *, long);
_interlockedbittestandset intrin.h unsigned char _interlockedbittestandset(long *, long);
_interlockedbittestandset_HLEAcquire HLE immintrin.h unsigned char _interlockedbittestandset_HLEAcquire(long *, long);
_interlockedbittestandset_HLERelease HLE immintrin.h unsigned char _interlockedbittestandset_HLERelease(long *, long);
_InterlockedCompareExchange intrin.h long _InterlockedCompareExchange (long volatile *, long, long);
_InterlockedCompareExchange_HLEAcquire HLE immintrin.h long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
_InterlockedCompareExchange_HLERelease HLE immintrin.h long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
_InterlockedCompareExchange16 intrin.h short _InterlockedCompareExchange16(short volatile *, short, short);
_InterlockedCompareExchange64 intrin.h __int64 _InterlockedCompareExchange64(__int64 volatile *, __int64, __int64);
_InterlockedCompareExchange64_HLEAcquire HLE immintrin.h __int64 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *, __int64, __int64);
_InterlockedCompareExchange64_HLERelease HLE immintrin.h __int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, __int64);
_InterlockedCompareExchange8 intrin.h char _InterlockedCompareExchange8(char volatile *, char, char);
_InterlockedCompareExchangePointer intrin.h void *_InterlockedCompareExchangePointer (void *volatile *, void *, void *);
_InterlockedCompareExchangePointer_HLEAcquire HLE immintrin.h void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, void *);
_InterlockedCompareExchangePointer_HLERelease HLE immintrin.h void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, void *);
_InterlockedDecrement intrin.h long _InterlockedDecrement(long volatile *);
_InterlockedDecrement16 intrin.h short _InterlockedDecrement16(short volatile *);
_InterlockedExchange intrin.h long _InterlockedExchange(long volatile *, long);
_InterlockedExchange_HLEAcquire HLE immintrin.h long _InterlockedExchange_HLEAcquire(long volatile *, long);
_InterlockedExchange_HLERelease HLE immintrin.h long _InterlockedExchange_HLERelease(long volatile *, long);
_InterlockedExchange16 intrin.h short _InterlockedExchange16(short volatile *, short);
_InterlockedExchange8 intrin.h char _InterlockedExchange8(char volatile *, char);
_InterlockedExchangeAdd intrin.h long _InterlockedExchangeAdd(long volatile *, long);
_InterlockedExchangeAdd_HLEAcquire HLE immintrin.h long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
_InterlockedExchangeAdd_HLERelease HLE immintrin.h long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
_InterlockedExchangeAdd16 intrin.h short _InterlockedExchangeAdd16(short volatile *, short);
_InterlockedExchangeAdd8 intrin.h char _InterlockedExchangeAdd8(char volatile *, char);
_InterlockedExchangePointer intrin.h void * _InterlockedExchangePointer(void *volatile *, void *);
_InterlockedExchangePointer_HLEAcquire HLE immintrin.h void * _InterlockedExchangePointer_HLEAcquire(void *volatile *, void *);
_InterlockedExchangePointer_HLERelease HLE immintrin.h void * _InterlockedExchangePointer_HLERelease(void *volatile *, void *);
_InterlockedIncrement intrin.h long _InterlockedIncrement(long volatile *);
_InterlockedIncrement16 intrin.h short _InterlockedIncrement16(short volatile *);
_InterlockedOr intrin.h long _InterlockedOr(long volatile *, long);
_InterlockedOr_HLEAcquire HLE immintrin.h long _InterlockedOr_HLEAcquire(long volatile *, long);
_InterlockedOr_HLERelease HLE immintrin.h long _InterlockedOr_HLERelease(long volatile *, long);
_InterlockedOr16 intrin.h short _InterlockedOr16(short volatile *, short);
_InterlockedOr8 intrin.h char _InterlockedOr8(char volatile *, char);
_InterlockedXor intrin.h long _InterlockedXor(long volatile *, long);
_InterlockedXor_HLEAcquire HLE immintrin.h long _InterlockedXor_HLEAcquire(long volatile *, long);
_InterlockedXor_HLERelease HLE immintrin.h long _InterlockedXor_HLERelease(long volatile *, long);
_InterlockedXor16 intrin.h short _InterlockedXor16(short volatile *, short);
_InterlockedXor8 intrin.h char _InterlockedXor8(char volatile *, char);
__invlpg intrin.h void __invlpg(void*);
_invpcid INVPCID immintrin.h void _invpcid(unsigned int, void *);
__inword intrin.h unsigned short __inword(unsigned short);
__inwordstring intrin.h void __inwordstring(unsigned short, unsigned short *, unsigned long);
_lgdt intrin.h void _lgdt(void*);
__lidt intrin.h void __lidt(void*);
__ll_lshift intrin.h unsigned __int64 [pascal/cdecl] __ll_lshift(unsigned __int64, int);
__ll_rshift intrin.h __int64 [pascal/cdecl] __ll_rshift(__int64, int);
_loadbe_i16 MOVBE immintrin.h short _loadbe_i16(void const*); [Macro]
_loadbe_i32 MOVBE immintrin.h int _loadbe_i32(void const*); [Macro]
_load_be_u16 MOVBE immintrin.h unsigned short _load_be_u16(void const*); [Macro]
_load_be_u32 MOVBE immintrin.h unsigned int _load_be_u32(void const*); [Macro]
__llwpcb LWP ammintrin.h void __llwpcb(void *);
__lwpins32 LWP ammintrin.h unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
__lwpval32 LWP ammintrin.h void __lwpval32(unsigned int, unsigned int, unsigned int);
__lzcnt LZCNT intrin.h unsigned int __lzcnt(unsigned int);
_lzcnt_u32 BMI ammintrin.h, immintrin.h unsigned int _lzcnt_u32(unsigned int);
__lzcnt16 LZCNT intrin.h unsigned short __lzcnt16(unsigned short);
_m_empty MMX intrin.h void _m_empty(void);
_m_femms 3DNOW intrin.h void _m_femms(void);
_m_from_float 3DNOW intrin.h __m64 _m_from_float(float);
_m_from_int MMX intrin.h __m64 _m_from_int(int);
_m_maskmovq SSE intrin.h void _m_maskmovq(__m64, __m64, char*);
_m_packssdw MMX intrin.h __m64 _m_packssdw(__m64, __m64);
_m_packsswb MMX intrin.h __m64 _m_packsswb(__m64, __m64);
_m_packuswb MMX intrin.h __m64 _m_packuswb(__m64, __m64);
_m_paddb MMX intrin.h __m64 _m_paddb(__m64, __m64);
_m_paddd MMX intrin.h __m64 _m_paddd(__m64, __m64);
_m_paddsb MMX intrin.h __m64 _m_paddsb(__m64, __m64);
_m_paddsw MMX intrin.h __m64 _m_paddsw(__m64, __m64);
_m_paddusb MMX intrin.h __m64 _m_paddusb(__m64, __m64);
_m_paddusw MMX intrin.h __m64 _m_paddusw(__m64, __m64);
_m_paddw MMX intrin.h __m64 _m_paddw(__m64, __m64);
_m_pand MMX intrin.h __m64 _m_pand(__m64, __m64);
_m_pandn MMX intrin.h __m64 _m_pandn(__m64, __m64);
_m_pavgb SSE intrin.h __m64 _m_pavgb(__m64, __m64);
_m_pavgusb 3DNOW intrin.h __m64 _m_pavgusb(__m64, __m64);
_m_pavgw SSE intrin.h __m64 _m_pavgw(__m64, __m64);
_m_pcmpeqb MMX intrin.h __m64 _m_pcmpeqb(__m64, __m64);
_m_pcmpeqd MMX intrin.h __m64 _m_pcmpeqd(__m64, __m64);
_m_pcmpeqw MMX intrin.h __m64 _m_pcmpeqw(__m64, __m64);
_m_pcmpgtb MMX intrin.h __m64 _m_pcmpgtb(__m64, __m64);
_m_pcmpgtd MMX intrin.h __m64 _m_pcmpgtd(__m64, __m64);
_m_pcmpgtw MMX intrin.h __m64 _m_pcmpgtw(__m64, __m64);
_m_pextrw SSE intrin.h int _m_pextrw(__m64, int);
_m_pf2id 3DNOW intrin.h __m64 _m_pf2id(__m64);
_m_pf2iw 3DNOWEXT intrin.h __m64 _m_pf2iw(__m64);
_m_pfacc 3DNOW intrin.h __m64 _m_pfacc(__m64, __m64);
_m_pfadd 3DNOW intrin.h __m64 _m_pfadd(__m64, __m64);
_m_pfcmpeq 3DNOW intrin.h __m64 _m_pfcmpeq(__m64, __m64);
_m_pfcmpge 3DNOW intrin.h __m64 _m_pfcmpge(__m64, __m64);
_m_pfcmpgt 3DNOW intrin.h __m64 _m_pfcmpgt(__m64, __m64);
_m_pfmax 3DNOW intrin.h __m64 _m_pfmax(__m64, __m64);
_m_pfmin 3DNOW intrin.h __m64 _m_pfmin(__m64, __m64);
_m_pfmul 3DNOW intrin.h __m64 _m_pfmul(__m64, __m64);
_m_pfnacc 3DNOWEXT intrin.h __m64 _m_pfnacc(__m64, __m64);
_m_pfpnacc 3DNOWEXT intrin.h __m64 _m_pfpnacc(__m64, __m64);
_m_pfrcp 3DNOW intrin.h __m64 _m_pfrcp(__m64);
_m_pfrcpit1 3DNOW intrin.h __m64 _m_pfrcpit1(__m64, __m64);
_m_pfrcpit2 3DNOW intrin.h __m64 _m_pfrcpit2(__m64, __m64);
_m_pfrsqit1 3DNOW intrin.h __m64 _m_pfrsqit1(__m64, __m64);
_m_pfrsqrt 3DNOW intrin.h __m64 _m_pfrsqrt(__m64);
_m_pfsub 3DNOW intrin.h __m64 _m_pfsub(__m64, __m64);
_m_pfsubr 3DNOW intrin.h __m64 _m_pfsubr(__m64, __m64);
_m_pi2fd 3DNOW intrin.h __m64 _m_pi2fd(__m64);
_m_pi2fw 3DNOWEXT intrin.h __m64 _m_pi2fw(__m64);
_m_pinsrw SSE intrin.h __m64 _m_pinsrw(__m64, int, int);
_m_pmaddwd MMX intrin.h __m64 _m_pmaddwd(__m64, __m64);
_m_pmaxsw SSE intrin.h __m64 _m_pmaxsw(__m64, __m64);
_m_pmaxub SSE intrin.h __m64 _m_pmaxub(__m64, __m64);
_m_pminsw SSE intrin.h __m64 _m_pminsw(__m64, __m64);
_m_pminub SSE intrin.h __m64 _m_pminub(__m64, __m64);
_m_pmovmskb SSE intrin.h int _m_pmovmskb(__m64);
_m_pmulhrw 3DNOW intrin.h __m64 _m_pmulhrw(__m64, __m64);
_m_pmulhuw SSE intrin.h __m64 _m_pmulhuw(__m64, __m64);
_m_pmulhw MMX intrin.h __m64 _m_pmulhw(__m64, __m64);
_m_pmullw MMX intrin.h __m64 _m_pmullw(__m64, __m64);
_m_por MMX intrin.h __m64 _m_por(__m64, __m64);
_m_prefetch 3DNOW intrin.h void _m_prefetch(void*);
_m_prefetchw 3DNOW intrin.h void _m_prefetchw(void*);
_m_psadbw SSE intrin.h __m64 _m_psadbw(__m64, __m64);
_m_pshufw SSE intrin.h __m64 _m_pshufw(__m64, int);
_m_pslld MMX intrin.h __m64 _m_pslld(__m64, __m64);
_m_pslldi MMX intrin.h __m64 _m_pslldi(__m64, int);
_m_psllq MMX intrin.h __m64 _m_psllq(__m64, __m64);
_m_psllqi MMX intrin.h __m64 _m_psllqi(__m64, int);
_m_psllw MMX intrin.h __m64 _m_psllw(__m64, __m64);
_m_psllwi MMX intrin.h __m64 _m_psllwi(__m64, int);
_m_psrad MMX intrin.h __m64 _m_psrad(__m64, __m64);
_m_psradi MMX intrin.h __m64 _m_psradi(__m64, int);
_m_psraw MMX intrin.h __m64 _m_psraw(__m64, __m64);
_m_psrawi MMX intrin.h __m64 _m_psrawi(__m64, int);
_m_psrld MMX intrin.h __m64 _m_psrld(__m64, __m64);
_m_psrldi MMX intrin.h __m64 _m_psrldi(__m64, int);
_m_psrlq MMX intrin.h __m64 _m_psrlq(__m64, __m64);
_m_psrlqi MMX intrin.h __m64 _m_psrlqi(__m64, int);
_m_psrlw MMX intrin.h __m64 _m_psrlw(__m64, __m64);
_m_psrlwi MMX intrin.h __m64 _m_psrlwi(__m64, int);
_m_psubb MMX intrin.h __m64 _m_psubb(__m64, __m64);
_m_psubd MMX intrin.h __m64 _m_psubd(__m64, __m64);
_m_psubsb MMX intrin.h __m64 _m_psubsb(__m64, __m64);
_m_psubsw MMX intrin.h __m64 _m_psubsw(__m64, __m64);
_m_psubusb MMX intrin.h __m64 _m_psubusb(__m64, __m64);
_m_psubusw MMX intrin.h __m64 _m_psubusw(__m64, __m64);
_m_psubw MMX intrin.h __m64 _m_psubw(__m64, __m64);
_m_pswapd 3DNOWEXT intrin.h __m64 _m_pswapd(__m64);
_m_punpckhbw MMX intrin.h __m64 _m_punpckhbw(__m64, __m64);
_m_punpckhdq MMX intrin.h __m64 _m_punpckhdq(__m64, __m64);
_m_punpckhwd MMX intrin.h __m64 _m_punpckhwd(__m64, __m64);
_m_punpcklbw MMX intrin.h __m64 _m_punpcklbw(__m64, __m64);
_m_punpckldq MMX intrin.h __m64 _m_punpckldq(__m64, __m64);
_m_punpcklwd MMX intrin.h __m64 _m_punpcklwd(__m64, __m64);
_m_pxor MMX intrin.h __m64 _m_pxor(__m64, __m64);
_m_to_float 3DNOW intrin.h float _m_to_float(__m64);
_m_to_int MMX intrin.h int _m_to_int(__m64);
_mm_abs_epi16 SSSE3 intrin.h __m128i _mm_abs_epi16(__m128i);
_mm_abs_epi32 SSSE3 intrin.h __m128i _mm_abs_epi32(__m128i);
_mm_abs_epi8 SSSE3 intrin.h __m128i _mm_abs_epi8(__m128i);
_mm_abs_pi16 SSSE3 intrin.h __m64 _mm_abs_pi16(__m64);
_mm_abs_pi32 SSSE3 intrin.h __m64 _mm_abs_pi32(__m64);
_mm_abs_pi8 SSSE3 intrin.h __m64 _mm_abs_pi8(__m64);
_mm_add_epi16 SSE2 intrin.h __m128i _mm_add_epi16(__m128i, __m128i);
_mm_add_epi32 SSE2 intrin.h __m128i _mm_add_epi32(__m128i, __m128i);
_mm_add_epi64 SSE2 intrin.h __m128i _mm_add_epi64(__m128i, __m128i);
_mm_add_epi8 SSE2 intrin.h __m128i _mm_add_epi8(__m128i, __m128i);
_mm_add_pd SSE2 intrin.h __m128d _mm_add_pd(__m128d, __m128d);
_mm_add_pi8 MMX mmintrin.h __m64 _mm_add_pi8(__m64, __m64); [Macro]
_mm_add_pi16 MMX mmintrin.h __m64 _mm_add_pi16(__m64, __m64); [Macro]
_mm_add_pi32 MMX mmintrin.h __m64 _mm_add_pi32(__m64, __m64); [Macro]
_mm_add_ps SSE intrin.h __m128 _mm_add_ps(__m128, __m128);
_mm_add_sd SSE2 intrin.h __m128d _mm_add_sd(__m128d, __m128d);
_mm_add_si64 SSE2 intrin.h __m64 _mm_add_si64(__m64, __m64);
_mm_add_ss SSE intrin.h __m128 _mm_add_ss(__m128, __m128);
_mm_adds_epi16 SSE2 intrin.h __m128i _mm_adds_epi16(__m128i, __m128i);
_mm_adds_epi8 SSE2 intrin.h __m128i _mm_adds_epi8(__m128i, __m128i);
_mm_adds_epu16 SSE2 intrin.h __m128i _mm_adds_epu16(__m128i, __m128i);
_mm_adds_epu8 SSE2 intrin.h __m128i _mm_adds_epu8(__m128i, __m128i);
_mm_adds_pi8 MMX mmintrin.h __m64 _mm_adds_pi8(__m64, __m64); [Macro]
_mm_adds_pi16 MMX mmintrin.h __m64 _mm_adds_pi16(__m64, __m64); [Macro]
_mm_adds_pu8 MMX mmintrin.h __m64 _mm_adds_pu8(__m64, __m64); [Macro]
_mm_adds_pu16 MMX mmintrin.h __m64 _mm_adds_pu16(__m64, __m64); [Macro]
_mm_addsub_pd SSE3 intrin.h __m128d _mm_addsub_pd(__m128d, __m128d);
_mm_addsub_ps SSE3 intrin.h __m128 _mm_addsub_ps(__m128, __m128);
_mm_aesdec_si128 AESNI immintrin.h __m128i _mm_aesdec_si128(__m128i, __m128i);
_mm_aesdeclast_si128 AESNI immintrin.h __m128i _mm_aesdeclast_si128(__m128i, __m128i);
_mm_aesenc_si128 AESNI immintrin.h __m128i _mm_aesenc_si128(__m128i, __m128i);
_mm_aesenclast_si128 AESNI immintrin.h __m128i _mm_aesenclast_si128(__m128i, __m128i);
_mm_aesimc_si128 AESNI immintrin.h __m128i _mm_aesimc_si128 (__m128i);
_mm_aeskeygenassist_si128 AESNI immintrin.h __m128i _mm_aeskeygenassist_si128 (__m128i, const int);
_mm_alignr_epi8 SSSE3 intrin.h __m128i _mm_alignr_epi8(__m128i, __m128i, int);
_mm_alignr_pi8 SSSE3 intrin.h __m64 _mm_alignr_pi8(__m64, __m64, int);
_mm_and_pd SSE2 intrin.h __m128d _mm_and_pd(__m128d, __m128d);
_mm_and_ps SSE intrin.h __m128 _mm_and_ps(__m128, __m128);
_mm_and_si64 MMX mmintrin.h __m64 _mm_and_si64(__m64, __m64); [Macro]
_mm_and_si128 SSE2 intrin.h __m128i _mm_and_si128(__m128i, __m128i);
_mm_andnot_pd SSE2 intrin.h __m128d _mm_andnot_pd(__m128d, __m128d);
_mm_andnot_ps SSE intrin.h __m128 _mm_andnot_ps(__m128, __m128);
_mm_andnot_si64 MMX mmintrin.h __m64 _mm_andnot_si64(__m64, __m64); [Macro]
_mm_andnot_si128 SSE2 intrin.h __m128i _mm_andnot_si128(__m128i, __m128i);
_mm_avg_epu16 SSE2 intrin.h __m128i _mm_avg_epu16(__m128i, __m128i);
_mm_avg_epu8 SSE2 intrin.h __m128i _mm_avg_epu8(__m128i, __m128i);
_mm_blend_epi16 SSE41 intrin.h __m128i _mm_blend_epi16 (__m128i, __m128i, const int);
_mm_blend_epi32 AVX2 immintrin.h __m128i _mm_blend_epi32(__m128i, __m128i, const int);
_mm_blend_pd SSE41 intrin.h __m128d _mm_blend_pd (__m128d, __m128d, const int);
_mm_blend_ps SSE41 intrin.h __m128 _mm_blend_ps (__m128, __m128, const int);
_mm_blendv_epi8 SSE41 intrin.h __m128i _mm_blendv_epi8 (__m128i, __m128i, __m128i);
_mm_blendv_pd SSE41 intrin.h __m128d _mm_blendv_pd(__m128d, __m128d, __m128d);
_mm_blendv_ps SSE41 intrin.h __m128 _mm_blendv_ps(__m128, __m128, __m128);
_mm_broadcast_ss AVX immintrin.h __m128 _mm_broadcast_ss(float const *);
_mm_broadcastb_epi8 AVX2 immintrin.h __m128i _mm_broadcastb_epi8(__m128i);
_mm_broadcastd_epi32 AVX2 immintrin.h __m128i _mm_broadcastd_epi32(__m128i);
_mm_broadcastq_epi64 AVX2 immintrin.h __m128i _mm_broadcastq_epi64(__m128i);
_mm_broadcastsd_pd AVX2 immintrin.h __m128d _mm_broadcastsd_pd(__m128d);
_mm_broadcastss_ps AVX2 immintrin.h __m128 _mm_broadcastss_ps(__m128);
_mm_broadcastw_epi16 AVX2 immintrin.h __m128i _mm_broadcastw_epi16(__m128i);
_mm_castpd_ps SSSE3 intrin.h __m128 _mm_castpd_ps(__m128d);
_mm_castpd_si128 SSSE3 intrin.h __m128i _mm_castpd_si128(__m128d);
_mm_castps_pd SSSE3 intrin.h __m128d _mm_castps_pd(__m128);
_mm_castps_si128 SSSE3 intrin.h __m128i _mm_castps_si128(__m128);
_mm_castsi128_pd SSSE3 intrin.h __m128d _mm_castsi128_pd(__m128i);
_mm_castsi128_ps SSSE3 intrin.h __m128 _mm_castsi128_ps(__m128i);
_mm_clflush SSE2 intrin.h void _mm_clflush(void const *);
_mm_clmulepi64_si128 PCLMULQDQ immintrin.h __m128i _mm_clmulepi64_si128 (__m128i, __m128i, const int);
_mm_cmov_si128 XOP ammintrin.h __m128i _mm_cmov_si128(__m128i, __m128i, __m128i);
_mm_cmp_pd AVX immintrin.h __m128d _mm_cmp_pd(__m128d, __m128d, const int);
_mm_cmp_ps AVX immintrin.h __m128 _mm_cmp_ps(__m128, __m128, const int);
_mm_cmp_sd AVX immintrin.h __m128d _mm_cmp_sd(__m128d, __m128d, const int);
_mm_cmp_ss AVX immintrin.h __m128 _mm_cmp_ss(__m128, __m128, const int);
_mm_cmpeq_epi16 SSE2 intrin.h __m128i _mm_cmpeq_epi16(__m128i, __m128i);
_mm_cmpeq_epi32 SSE2 intrin.h __m128i _mm_cmpeq_epi32(__m128i, __m128i);
_mm_cmpeq_epi64 SSE41 intrin.h __m128i _mm_cmpeq_epi64(__m128i, __m128i);
_mm_cmpeq_epi8 SSE2 intrin.h __m128i _mm_cmpeq_epi8(__m128i, __m128i);
_mm_cmpeq_pd SSE2 intrin.h __m128d _mm_cmpeq_pd(__m128d, __m128d);
_mm_cmpeq_pi8 MMX mmintrin.h __m64 _mm_cmpeq_pi8(__m64, __m64); [Macro]
_mm_cmpeq_pi16 MMX mmintrin.h __m64 _mm_cmpeq_pi16(__m64, __m64); [Macro]
_mm_cmpeq_pi32 MMX mmintrin.h __m64 _mm_cmpeq_pi32(__m64, __m64); [Macro]
_mm_cmpeq_ps SSE intrin.h __m128 _mm_cmpeq_ps(__m128, __m128);
_mm_cmpeq_sd SSE2 intrin.h __m128d _mm_cmpeq_sd(__m128d, __m128d);
_mm_cmpeq_ss SSE intrin.h __m128 _mm_cmpeq_ss(__m128, __m128);
_mm_cmpestra SSE42 intrin.h int _mm_cmpestra(__m128i, int, __m128i, int, const int);
_mm_cmpestrc SSE42 intrin.h int _mm_cmpestrc(__m128i, int, __m128i, int, const int);
_mm_cmpestri SSE42 intrin.h int _mm_cmpestri(__m128i, int, __m128i, int, const int);
_mm_cmpestrm SSE42 intrin.h __m128i _mm_cmpestrm(__m128i, int, __m128i, int, const int);
_mm_cmpestro SSE42 intrin.h int _mm_cmpestro(__m128i, int, __m128i, int, const int);
_mm_cmpestrs SSE42 intrin.h int _mm_cmpestrs(__m128i, int, __m128i, int, const int);
_mm_cmpestrz SSE42 intrin.h int _mm_cmpestrz(__m128i, int, __m128i, int, const int);
_mm_cmpge_pd SSE2 intrin.h __m128d _mm_cmpge_pd(__m128d, __m128d);
_mm_cmpge_ps SSE intrin.h __m128 _mm_cmpge_ps(__m128, __m128);
_mm_cmpge_sd SSE2 intrin.h __m128d _mm_cmpge_sd(__m128d, __m128d);
_mm_cmpge_ss SSE intrin.h __m128 _mm_cmpge_ss(__m128, __m128);
_mm_cmpgt_epi16 SSE2 intrin.h __m128i _mm_cmpgt_epi16(__m128i, __m128i);
_mm_cmpgt_epi32 SSE2 intrin.h __m128i _mm_cmpgt_epi32(__m128i, __m128i);
_mm_cmpgt_epi64 SSE42 intrin.h __m128i _mm_cmpgt_epi64(__m128i, __m128i);
_mm_cmpgt_epi8 SSE2 intrin.h __m128i _mm_cmpgt_epi8(__m128i, __m128i);
_mm_cmpgt_pi8 MMX mmintrin.h __m64 _mm_cmpgt_pi8(__m64, __m64); [Macro]
_mm_cmpgt_pi16 MMX mmintrin.h __m64 _mm_cmpgt_pi16(__m64, __m64); [Macro]
_mm_cmpgt_pi32 MMX mmintrin.h __m64 _mm_cmpgt_pi32(__m64, __m64); [Macro]
_mm_cmpgt_pd SSE2 intrin.h __m128d _mm_cmpgt_pd(__m128d, __m128d);
_mm_cmpgt_ps SSE intrin.h __m128 _mm_cmpgt_ps(__m128, __m128);
_mm_cmpgt_sd SSE2 intrin.h __m128d _mm_cmpgt_sd(__m128d, __m128d);
_mm_cmpgt_ss SSE intrin.h __m128 _mm_cmpgt_ss(__m128, __m128);
_mm_cmpistra SSE42 intrin.h int _mm_cmpistra(__m128i, __m128i, const int);
_mm_cmpistrc SSE42 intrin.h int _mm_cmpistrc(__m128i, __m128i, const int);
_mm_cmpistri SSE42 intrin.h int _mm_cmpistri(__m128i, __m128i, const int);
_mm_cmpistrm SSE42 intrin.h __m128i _mm_cmpistrm(__m128i, __m128i, const int);
_mm_cmpistro SSE42 intrin.h int _mm_cmpistro(__m128i, __m128i, const int);
_mm_cmpistrs SSE42 intrin.h int _mm_cmpistrs(__m128i, __m128i, const int);
_mm_cmpistrz SSE42 intrin.h int _mm_cmpistrz(__m128i, __m128i, const int);
_mm_cmple_pd SSE2 intrin.h __m128d _mm_cmple_pd(__m128d, __m128d);
_mm_cmple_ps SSE intrin.h __m128 _mm_cmple_ps(__m128, __m128);
_mm_cmple_sd SSE2 intrin.h __m128d _mm_cmple_sd(__m128d, __m128d);
_mm_cmple_ss SSE intrin.h __m128 _mm_cmple_ss(__m128, __m128);
_mm_cmplt_epi16 SSE2 intrin.h __m128i _mm_cmplt_epi16(__m128i, __m128i);
_mm_cmplt_epi32 SSE2 intrin.h __m128i _mm_cmplt_epi32(__m128i, __m128i);
_mm_cmplt_epi8 SSE2 intrin.h __m128i _mm_cmplt_epi8(__m128i, __m128i);
_mm_cmplt_pd SSE2 intrin.h __m128d _mm_cmplt_pd(__m128d, __m128d);
_mm_cmplt_ps SSE intrin.h __m128 _mm_cmplt_ps(__m128, __m128);
_mm_cmplt_sd SSE2 intrin.h __m128d _mm_cmplt_sd(__m128d, __m128d);
_mm_cmplt_ss SSE intrin.h __m128 _mm_cmplt_ss(__m128, __m128);
_mm_cmpneq_pd SSE2 intrin.h __m128d _mm_cmpneq_pd(__m128d, __m128d);
_mm_cmpneq_ps SSE intrin.h __m128 _mm_cmpneq_ps(__m128, __m128);
_mm_cmpneq_sd SSE2 intrin.h __m128d _mm_cmpneq_sd(__m128d, __m128d);
_mm_cmpneq_ss SSE intrin.h __m128 _mm_cmpneq_ss(__m128, __m128);
_mm_cmpnge_pd SSE2 intrin.h __m128d _mm_cmpnge_pd(__m128d, __m128d);
_mm_cmpnge_ps SSE intrin.h __m128 _mm_cmpnge_ps(__m128, __m128);
_mm_cmpnge_sd SSE2 intrin.h __m128d _mm_cmpnge_sd(__m128d, __m128d);
_mm_cmpnge_ss SSE intrin.h __m128 _mm_cmpnge_ss(__m128, __m128);
_mm_cmpngt_pd SSE2 intrin.h __m128d _mm_cmpngt_pd(__m128d, __m128d);
_mm_cmpngt_ps SSE intrin.h __m128 _mm_cmpngt_ps(__m128, __m128);
_mm_cmpngt_sd SSE2 intrin.h __m128d _mm_cmpngt_sd(__m128d, __m128d);
_mm_cmpngt_ss SSE intrin.h __m128 _mm_cmpngt_ss(__m128, __m128);
_mm_cmpnle_pd SSE2 intrin.h __m128d _mm_cmpnle_pd(__m128d, __m128d);
_mm_cmpnle_ps SSE intrin.h __m128 _mm_cmpnle_ps(__m128, __m128);
_mm_cmpnle_sd SSE2 intrin.h __m128d _mm_cmpnle_sd(__m128d, __m128d);
_mm_cmpnle_ss SSE intrin.h __m128 _mm_cmpnle_ss(__m128, __m128);
_mm_cmpnlt_pd SSE2 intrin.h __m128d _mm_cmpnlt_pd(__m128d, __m128d);
_mm_cmpnlt_ps SSE intrin.h __m128 _mm_cmpnlt_ps(__m128, __m128);
_mm_cmpnlt_sd SSE2 intrin.h __m128d _mm_cmpnlt_sd(__m128d, __m128d);
_mm_cmpnlt_ss SSE intrin.h __m128 _mm_cmpnlt_ss(__m128, __m128);
_mm_cmpord_pd SSE2 intrin.h __m128d _mm_cmpord_pd(__m128d, __m128d);
_mm_cmpord_ps SSE intrin.h __m128 _mm_cmpord_ps(__m128, __m128);
_mm_cmpord_sd SSE2 intrin.h __m128d _mm_cmpord_sd(__m128d, __m128d);
_mm_cmpord_ss SSE intrin.h __m128 _mm_cmpord_ss(__m128, __m128);
_mm_cmpunord_pd SSE2 intrin.h __m128d _mm_cmpunord_pd(__m128d, __m128d);
_mm_cmpunord_ps SSE intrin.h __m128 _mm_cmpunord_ps(__m128, __m128);
_mm_cmpunord_sd SSE2 intrin.h __m128d _mm_cmpunord_sd(__m128d, __m128d);
_mm_cmpunord_ss SSE intrin.h __m128 _mm_cmpunord_ss(__m128, __m128);
_mm_com_epi16 XOP ammintrin.h __m128i _mm_com_epi16(__m128i, __m128i, int);
_mm_com_epi32 XOP ammintrin.h __m128i _mm_com_epi32(__m128i, __m128i, int);
_mm_com_epi64 XOP ammintrin.h __m128i _mm_com_epi32(__m128i, __m128i, int);
_mm_com_epi8 XOP ammintrin.h __m128i _mm_com_epi8(__m128i, __m128i, int);
_mm_com_epu16 XOP ammintrin.h __m128i _mm_com_epu16(__m128i, __m128i, int);
_mm_com_epu32 XOP ammintrin.h __m128i _mm_com_epu32(__m128i, __m128i, int);
_mm_com_epu64 XOP ammintrin.h __m128i _mm_com_epu32(__m128i, __m128i, int);
_mm_com_epu8 XOP ammintrin.h __m128i _mm_com_epu8(__m128i, __m128i, int);
_mm_comieq_sd SSE2 intrin.h int _mm_comieq_sd(__m128d, __m128d);
_mm_comieq_ss SSE intrin.h int _mm_comieq_ss(__m128, __m128);
_mm_comige_sd SSE2 intrin.h int _mm_comige_sd(__m128d, __m128d);
_mm_comige_ss SSE intrin.h int _mm_comige_ss(__m128, __m128);
_mm_comigt_sd SSE2 intrin.h int _mm_comigt_sd(__m128d, __m128d);
_mm_comigt_ss SSE intrin.h int _mm_comigt_ss(__m128, __m128);
_mm_comile_sd SSE2 intrin.h int _mm_comile_sd(__m128d, __m128d);
_mm_comile_ss SSE intrin.h int _mm_comile_ss(__m128, __m128);
_mm_comilt_sd SSE2 intrin.h int _mm_comilt_sd(__m128d, __m128d);
_mm_comilt_ss SSE intrin.h int _mm_comilt_ss(__m128, __m128);
_mm_comineq_sd SSE2 intrin.h int _mm_comineq_sd(__m128d, __m128d);
_mm_comineq_ss SSE intrin.h int _mm_comineq_ss(__m128, __m128);
_mm_crc32_u16 SSE42 intrin.h unsigned int _mm_crc32_u16(unsigned int, unsigned short);
_mm_crc32_u32 SSE42 intrin.h unsigned int _mm_crc32_u32(unsigned int, unsigned int);
_mm_crc32_u8 SSE42 intrin.h unsigned int _mm_crc32_u8(unsigned int, unsigned char);
_mm_cvt_pi2ps SSE intrin.h __m128 _mm_cvt_pi2ps(__m128, __m64);
_mm_cvt_ps2pi SSE intrin.h __m64 _mm_cvt_ps2pi(__m128);
_mm_cvt_si2ss SSE intrin.h __m128 _mm_cvt_si2ss(__m128, int);
_mm_cvt_ss2si SSE intrin.h int _mm_cvt_ss2si(__m128);
_mm_cvtepi16_epi32 SSE41 intrin.h __m128i _mm_cvtepi16_epi32(__m128i);
_mm_cvtepi16_epi64 SSE41 intrin.h __m128i _mm_cvtepi16_epi64(__m128i);
_mm_cvtepi32_epi64 SSE41 intrin.h __m128i _mm_cvtepi32_epi64(__m128i);
_mm_cvtepi32_pd SSE2 intrin.h __m128d _mm_cvtepi32_pd(__m128i);
_mm_cvtepi32_ps SSE2 intrin.h __m128 _mm_cvtepi32_ps(__m128i);
_mm_cvtepi8_epi16 SSE41 intrin.h __m128i _mm_cvtepi8_epi16 (__m128i);
_mm_cvtepi8_epi32 SSE41 intrin.h __m128i _mm_cvtepi8_epi32 (__m128i);
_mm_cvtepi8_epi64 SSE41 intrin.h __m128i _mm_cvtepi8_epi64 (__m128i);
_mm_cvtepu16_epi32 SSE41 intrin.h __m128i _mm_cvtepu16_epi32(__m128i);
_mm_cvtepu16_epi64 SSE41 intrin.h __m128i _mm_cvtepu16_epi64(__m128i);
_mm_cvtepu32_epi64 SSE41 intrin.h __m128i _mm_cvtepu32_epi64(__m128i);
_mm_cvtepu8_epi16 SSE41 intrin.h __m128i _mm_cvtepu8_epi16 (__m128i);
_mm_cvtepu8_epi32 SSE41 intrin.h __m128i _mm_cvtepu8_epi32 (__m128i);
_mm_cvtepu8_epi64 SSE41 intrin.h __m128i _mm_cvtepu8_epi64 (__m128i);
_mm_cvtpd_epi32 SSE2 intrin.h __m128i _mm_cvtpd_epi32(__m128d);
_mm_cvtpd_pi32 SSE2 intrin.h __m64 _mm_cvtpd_pi32(__m128d);
_mm_cvtpd_ps SSE2 intrin.h __m128 _mm_cvtpd_ps(__m128d);
_mm_cvtph_ps F16C immintrin.h __m128 _mm_cvtph_ps(__m128i);
_mm_cvtpi32_pd SSE2 intrin.h __m128d _mm_cvtpi32_pd(__m64);
_mm_cvtps_epi32 SSE2 intrin.h __m128i _mm_cvtps_epi32(__m128);
_mm_cvtps_pd SSE2 intrin.h __m128d _mm_cvtps_pd(__m128);
_mm_cvtps_ph F16C immintrin.h __m128i _mm_cvtps_ph(__m128, const int);
_mm_cvtsd_f64 SSSE3 intrin.h double _mm_cvtsd_f64(__m128d);
_mm_cvtsd_si32 SSE2 intrin.h int _mm_cvtsd_si32(__m128d);
_mm_cvtsd_ss SSE2 intrin.h __m128 _mm_cvtsd_ss(__m128, __m128d);
_mm_cvtsi128_si32 SSE2 intrin.h int _mm_cvtsi128_si32(__m128i);
_mm_cvtsi32_sd SSE2 intrin.h __m128d _mm_cvtsi32_sd(__m128d, int);
_mm_cvtsi32_si128 SSE2 intrin.h __m128i _mm_cvtsi32_si128(int);
_mm_cvtsi32_si64 MMX mmintrin.h __m64 _mm_cvtsi32_si64(int); [Macro]
_mm_cvtsi64_si32 MMX mmintrin.h int _mm_cvtsi64_si32 (__m64); [Macro]
_mm_cvtss_f32 SSSE3 intrin.h float _mm_cvtss_f32(__m128);
_mm_cvtss_sd SSE2 intrin.h __m128d _mm_cvtss_sd(__m128d, __m128);
_mm_cvtt_ps2pi SSE intrin.h __m64 _mm_cvtt_ps2pi(__m128);
_mm_cvtt_ss2si SSE intrin.h int _mm_cvtt_ss2si(__m128);
_mm_cvttpd_epi32 SSE2 intrin.h __m128i _mm_cvttpd_epi32(__m128d);
_mm_cvttpd_pi32 SSE2 intrin.h __m64 _mm_cvttpd_pi32(__m128d);
_mm_cvttps_epi32 SSE2 intrin.h __m128i _mm_cvttps_epi32(__m128);
_mm_cvttsd_si32 SSE2 intrin.h int _mm_cvttsd_si32(__m128d);
_mm_div_pd SSE2 intrin.h __m128d _mm_div_pd(__m128d, __m128d);
_mm_div_ps SSE intrin.h __m128 _mm_div_ps(__m128, __m128);
_mm_div_sd SSE2 intrin.h __m128d _mm_div_sd(__m128d, __m128d);
_mm_div_ss SSE intrin.h __m128 _mm_div_ss(__m128, __m128);
_mm_dp_pd SSE41 intrin.h __m128d _mm_dp_pd(__m128d, __m128d, const int);
_mm_dp_ps SSE41 intrin.h __m128 _mm_dp_ps(__m128, __m128, const int);
_mm_empty MMX mmintrin.h void _mm_empty (void); [Macro]
_mm_extract_epi16 SSE2 intrin.h int _mm_extract_epi16(__m128i, int);
_mm_extract_epi32 SSE41 intrin.h int _mm_extract_epi32(__m128i, const int);
_mm_extract_epi8 SSE41 intrin.h int _mm_extract_epi8 (__m128i, const int);
_mm_extract_ps SSE41 intrin.h int _mm_extract_ps(__m128, const int);
_mm_extract_si64 SSE4a intrin.h __m128i _mm_extract_si64(__m128i, __m128i);
_mm_extracti_si64 SSE4a intrin.h __m128i _mm_extracti_si64(__m128i, int, int);
_mm_fmadd_pd FMA immintrin.h __m128d _mm_fmadd_pd (__m128d, __m128d, __m128d);
_mm_fmadd_ps FMA immintrin.h __m128 _mm_fmadd_ps (__m128, __m128, __m128);
_mm_fmadd_sd FMA immintrin.h __m128d _mm_fmadd_sd (__m128d, __m128d, __m128d);
_mm_fmadd_ss FMA immintrin.h __m128 _mm_fmadd_ss (__m128, __m128, __m128);
_mm_fmaddsub_pd FMA immintrin.h __m128d _mm_fmaddsub_pd (__m128d, __m128d, __m128d);
_mm_fmaddsub_ps FMA immintrin.h __m128 _mm_fmaddsub_ps (__m128, __m128, __m128);
_mm_fmsub_pd FMA immintrin.h __m128d _mm_fmsub_pd (__m128d, __m128d, __m128d);
_mm_fmsub_ps FMA immintrin.h __m128 _mm_fmsub_ps (__m128, __m128, __m128);
_mm_fmsub_sd FMA immintrin.h __m128d _mm_fmsub_sd (__m128d, __m128d, __m128d);
_mm_fmsub_ss FMA immintrin.h __m128 _mm_fmsub_ss (__m128, __m128, __m128);
_mm_fmsubadd_pd FMA immintrin.h __m128d _mm_fmsubadd_pd (__m128d, __m128d, __m128d);
_mm_fmsubadd_ps FMA immintrin.h __m128 _mm_fmsubadd_ps (__m128, __m128, __m128);
_mm_fnmadd_pd FMA immintrin.h __m128d _mm_fnmadd_pd (__m128d, __m128d, __m128d);
_mm_fnmadd_ps FMA immintrin.h __m128 _mm_fnmadd_ps (__m128, __m128, __m128);
_mm_fnmadd_sd FMA immintrin.h __m128d _mm_fnmadd_sd (__m128d, __m128d, __m128d);
_mm_fnmadd_ss FMA immintrin.h __m128 _mm_fnmadd_ss (__m128, __m128, __m128);
_mm_fnmsub_pd FMA immintrin.h __m128d _mm_fnmsub_pd (__m128d, __m128d, __m128d);
_mm_fnmsub_ps FMA immintrin.h __m128 _mm_fnmsub_ps (__m128, __m128, __m128);
_mm_fnmsub_sd FMA immintrin.h __m128d _mm_fnmsub_sd (__m128d, __m128d, __m128d);
_mm_fnmsub_ss FMA immintrin.h __m128 _mm_fnmsub_ss (__m128, __m128, __m128);
_mm_frcz_pd XOP ammintrin.h __m128d _mm_frcz_pd(__m128d);
_mm_frcz_ps XOP ammintrin.h __m128 _mm_frcz_ps(__m128);
_mm_frcz_sd XOP ammintrin.h __m128d _mm_frcz_sd(__m128d, __m128d);
_mm_frcz_ss XOP ammintrin.h __m128 _mm_frcz_ss(__m128, __m128);
_mm_getcsr SSE intrin.h unsigned int _mm_getcsr(void);
_mm_hadd_epi16 SSSE3 intrin.h __m128i _mm_hadd_epi16(__m128i, __m128i);
_mm_hadd_epi32 SSSE3 intrin.h __m128i _mm_hadd_epi32(__m128i, __m128i);
_mm_hadd_pd SSE3 intrin.h __m128d _mm_hadd_pd(__m128d, __m128d);
_mm_hadd_pi16 SSSE3 intrin.h __m64 _mm_hadd_pi16(__m64, __m64);
_mm_hadd_pi32 SSSE3 intrin.h __m64 _mm_hadd_pi32(__m64, __m64);
_mm_hadd_ps SSE3 intrin.h __m128 _mm_hadd_ps(__m128, __m128);
_mm_haddd_epi16 XOP ammintrin.h __m128i _mm_haddd_epi16(__m128i);
_mm_haddd_epi8 XOP ammintrin.h __m128i _mm_haddd_epi8(__m128i);
_mm_haddd_epu16 XOP ammintrin.h __m128i _mm_haddd_epu16(__m128i);
_mm_haddd_epu8 XOP ammintrin.h __m128i _mm_haddd_epu8(__m128i);
_mm_haddq_epi16 XOP ammintrin.h __m128i _mm_haddq_epi16(__m128i);
_mm_haddq_epi32 XOP ammintrin.h __m128i _mm_haddq_epi32(__m128i);
_mm_haddq_epi8 XOP ammintrin.h __m128i _mm_haddq_epi8(__m128i);
_mm_haddq_epu16 XOP ammintrin.h __m128i _mm_haddq_epu16(__m128i);
_mm_haddq_epu32 XOP ammintrin.h __m128i _mm_haddq_epu32(__m128i);
_mm_haddq_epu8 XOP ammintrin.h __m128i _mm_haddq_epu8(__m128i);
_mm_hadds_epi16 SSSE3 intrin.h __m128i _mm_hadds_epi16(__m128i, __m128i);
_mm_hadds_pi16 SSSE3 intrin.h __m64 _mm_hadds_pi16(__m64, __m64);
_mm_haddw_epi8 XOP ammintrin.h __m128i _mm_haddw_epi8(__m128i);
_mm_haddw_epu8 XOP ammintrin.h __m128i _mm_haddw_epu8(__m128i);
_mm_hsub_epi16 SSSE3 intrin.h __m128i _mm_hsub_epi16(__m128i, __m128i);
_mm_hsub_epi32 SSSE3 intrin.h __m128i _mm_hsub_epi32(__m128i, __m128i);
_mm_hsub_pd SSE3 intrin.h __m128d _mm_hsub_pd(__m128d, __m128d);
_mm_hsub_pi16 SSSE3 intrin.h __m64 _mm_hsub_pi16(__m64, __m64);
_mm_hsub_pi32 SSSE3 intrin.h __m64 _mm_hsub_pi32(__m64, __m64);
_mm_hsub_ps SSE3 intrin.h __m128 _mm_hsub_ps(__m128, __m128);
_mm_hsubd_epi16 XOP ammintrin.h __m128i _mm_hsubd_epi16(__m128i);
_mm_hsubq_epi32 XOP ammintrin.h __m128i _mm_hsubq_epi32(__m128i);
_mm_hsubs_epi16 SSSE3 intrin.h __m128i _mm_hsubs_epi16(__m128i, __m128i);
_mm_hsubs_pi16 SSSE3 intrin.h __m64 _mm_hsubs_pi16(__m64, __m64);
_mm_hsubw_epi8 XOP ammintrin.h __m128i _mm_hsubw_epi8(__m128i);
_mm_i32gather_epi32 AVX2 immintrin.h __m128i _mm_i32gather_epi32(int const *, __m128i, const int);
_mm_i32gather_epi64 AVX2 immintrin.h __m128i _mm_i32gather_epi64(__int64 const *, __m128i, const int);
_mm_i32gather_pd AVX2 immintrin.h __m128d _mm_i32gather_pd(double const *, __m128i, const int);
_mm_i32gather_ps AVX2 immintrin.h __m128 _mm_i32gather_ps(float const *, __m128i, const int);
_mm_i64gather_epi32 AVX2 immintrin.h __m128i _mm_i64gather_epi32(int const *, __m128i, const int);
_mm_i64gather_epi64 AVX2 immintrin.h __m128i _mm_i64gather_epi64(__int64 const *, __m128i, const int);
_mm_i64gather_pd AVX2 immintrin.h __m128d _mm_i64gather_pd(double const *, __m128i, const int);
_mm_i64gather_ps AVX2 immintrin.h __m128 _mm_i64gather_ps(float const *, __m128i, const int);
_mm_insert_epi16 SSE2 intrin.h __m128i _mm_insert_epi16(__m128i, int, int);
_mm_insert_epi32 SSE41 intrin.h __m128i _mm_insert_epi32(__m128i, int, const int);
_mm_insert_epi8 SSE41 intrin.h __m128i _mm_insert_epi8 (__m128i, int, const int);
_mm_insert_ps SSE41 intrin.h __m128 _mm_insert_ps(__m128, __m128, const int);
_mm_insert_si64 SSE4a intrin.h __m128i _mm_insert_si64(__m128i, __m128i);
_mm_inserti_si64 SSE4a intrin.h __m128i _mm_inserti_si64(__m128i, __m128i, int, int);
_mm_lddqu_si128 SSE3 intrin.h __m128i _mm_lddqu_si128(__m128i const*);
_mm_lfence SSE2 intrin.h void _mm_lfence(void);
_mm_load_pd SSE2 intrin.h __m128d _mm_load_pd(double*);
_mm_load_ps SSE intrin.h __m128 _mm_load_ps(float*);
_mm_load_ps1 SSE intrin.h __m128 _mm_load_ps1(float*);
_mm_load_sd SSE2 intrin.h __m128d _mm_load_sd(double*);
_mm_load_si128 SSE2 intrin.h __m128i _mm_load_si128(__m128i*);
_mm_load_ss SSE intrin.h __m128 _mm_load_ss(float*);
_mm_load1_pd SSE2 intrin.h __m128d _mm_load1_pd(double*);
_mm_loaddup_pd SSE3 intrin.h __m128d _mm_loaddup_pd(double const*);
_mm_loadh_pd SSE2 intrin.h __m128d _mm_loadh_pd(__m128d, double*);
_mm_loadh_pi SSE intrin.h __m128 _mm_loadh_pi(__m128, __m64*);
_mm_loadl_epi64 SSE2 intrin.h __m128i _mm_loadl_epi64(__m128i*);
_mm_loadl_pd SSE2 intrin.h __m128d _mm_loadl_pd(__m128d, double*);
_mm_loadl_pi SSE intrin.h __m128 _mm_loadl_pi(__m128, __m64*);
_mm_loadr_pd SSE2 intrin.h __m128d _mm_loadr_pd(double*);
_mm_loadr_ps SSE intrin.h __m128 _mm_loadr_ps(float*);
_mm_loadu_pd SSE2 intrin.h __m128d _mm_loadu_pd(double*);
_mm_loadu_ps SSE intrin.h __m128 _mm_loadu_ps(float*);
_mm_loadu_si128 SSE2 intrin.h __m128i _mm_loadu_si128(__m128i*);
_mm_macc_epi16 XOP ammintrin.h __m128i _mm_macc_epi16(__m128i, __m128i, __m128i);
_mm_macc_epi32 XOP ammintrin.h __m128i _mm_macc_epi32(__m128i, __m128i, __m128i);
_mm_macc_pd FMA4 ammintrin.h __m128d _mm_macc_pd(__m128d, __m128d, __m128d);
_mm_macc_ps FMA4 ammintrin.h __m128 _mm_macc_ps(__m128, __m128, __m128);
_mm_macc_sd FMA4 ammintrin.h __m128d _mm_macc_sd(__m128d, __m128d, __m128d);
_mm_macc_ss FMA4 ammintrin.h __m128 _mm_macc_ss(__m128, __m128, __m128);
_mm_maccd_epi16 XOP ammintrin.h __m128i _mm_maccd_epi16(__m128i, __m128i, __m128i);
_mm_macchi_epi32 XOP ammintrin.h __m128i _mm_macchi_epi32(__m128i, __m128i, __m128i);
_mm_macclo_epi32 XOP ammintrin.h __m128i _mm_macclo_epi32(__m128i, __m128i, __m128i);
_mm_maccs_epi16 XOP ammintrin.h __m128i _mm_maccs_epi16(__m128i, __m128i, __m128i);
_mm_maccs_epi32 XOP ammintrin.h __m128i _mm_maccs_epi32(__m128i, __m128i, __m128i);
_mm_maccsd_epi16 XOP ammintrin.h __m128i _mm_maccsd_epi16(__m128i, __m128i, __m128i);
_mm_maccshi_epi32 XOP ammintrin.h __m128i _mm_maccshi_epi32(__m128i, __m128i, __m128i);
_mm_maccslo_epi32 XOP ammintrin.h __m128i _mm_maccslo_epi32(__m128i, __m128i, __m128i);
_mm_madd_epi16 SSE2 intrin.h __m128i _mm_madd_epi16(__m128i, __m128i);
_mm_madd_pi16 MMX mmintrin.h __m64 _mm_madd_pi16(__m64, __m64); [Macro]
_mm_maddd_epi16 XOP ammintrin.h __m128i _mm_maddd_epi16(__m128i, __m128i, __m128i);
_mm_maddsd_epi16 XOP ammintrin.h __m128i _mm_maddsd_epi16(__m128i, __m128i, __m128i);
_mm_maddsub_pd FMA4 ammintrin.h __m128d _mm_maddsub_pd(__m128d, __m128d, __m128d);
_mm_maddsub_ps FMA4 ammintrin.h __m128 _mm_maddsub_ps(__m128, __m128, __m128);
_mm_maddubs_epi16 SSSE3 intrin.h __m128i _mm_maddubs_epi16(__m128i, __m128i);
_mm_maddubs_pi16 SSSE3 intrin.h __m64 _mm_maddubs_pi16(__m64, __m64);
_mm_mask_i32gather_epi32 AVX2 immintrin.h __m128i _mm_mask_i32gather_epi32(__m128i, int const *, __m128i, __m128i, const int);
_mm_mask_i32gather_epi64 AVX2 immintrin.h __m128i _mm_mask_i32gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int);
_mm_mask_i32gather_pd AVX2 immintrin.h __m128d _mm_mask_i32gather_pd(__m128d, double const *, __m128i, __m128d, const int);
_mm_mask_i32gather_ps AVX2 immintrin.h __m128 _mm_mask_i32gather_ps(__m128, float const *, __m128i, __m128, const int);
_mm_mask_i64gather_epi32 AVX2 immintrin.h __m128i _mm_mask_i64gather_epi32(__m128i, int const *, __m128i, __m128i, const int);
_mm_mask_i64gather_epi64 AVX2 immintrin.h __m128i _mm_mask_i64gather_epi64(__m128i, __int64 const *, __m128i, __m128i, const int);
_mm_mask_i64gather_pd AVX2 immintrin.h __m128d _mm_mask_i64gather_pd(__m128d, double const *, __m128i, __m128d, const int);
_mm_mask_i64gather_ps AVX2 immintrin.h __m128 _mm_mask_i64gather_ps(__m128, float const *, __m128i, __m128, const int);
_mm_maskload_epi32 AVX2 immintrin.h __m128i _mm_maskload_epi32(int const *, __m128i);
_mm_maskload_epi64 AVX2 immintrin.h __m128i _mm_maskload_epi64(__int64 const *, __m128i);
_mm_maskload_pd AVX immintrin.h __m128d _mm_maskload_pd(double const *, __m128i);
_mm_maskload_ps AVX immintrin.h __m128 _mm_maskload_ps(float const *, __m128i);
_mm_maskmoveu_si128 SSE2 intrin.h void _mm_maskmoveu_si128(__m128i, __m128i, char*);
_mm_maskstore_epi32 AVX2 immintrin.h void _mm_maskstore_epi32(int *, __m128i, __m128i);
_mm_maskstore_epi64 AVX2 immintrin.h void _mm_maskstore_epi64(__int64 *, __m128i, __m128i);
_mm_maskstore_pd AVX immintrin.h void _mm_maskstore_pd(double *, __m128i, __m128d);
_mm_maskstore_ps AVX immintrin.h void _mm_maskstore_ps(float *, __m128i, __m128);
_mm_max_epi16 SSE2 intrin.h __m128i _mm_max_epi16(__m128i, __m128i);
_mm_max_epi32 SSE41 intrin.h __m128i _mm_max_epi32(__m128i, __m128i);
_mm_max_epi8 SSE41 intrin.h __m128i _mm_max_epi8 (__m128i, __m128i);
_mm_max_epu16 SSE41 intrin.h __m128i _mm_max_epu16(__m128i, __m128i);
_mm_max_epu32 SSE41 intrin.h __m128i _mm_max_epu32(__m128i, __m128i);
_mm_max_epu8 SSE2 intrin.h __m128i _mm_max_epu8(__m128i, __m128i);
_mm_max_pd SSE2 intrin.h __m128d _mm_max_pd(__m128d, __m128d);
_mm_max_ps SSE intrin.h __m128 _mm_max_ps(__m128, __m128);
_mm_max_sd SSE2 intrin.h __m128d _mm_max_sd(__m128d, __m128d);
_mm_max_ss SSE intrin.h __m128 _mm_max_ss(__m128, __m128);
_mm_mfence SSE2 intrin.h void _mm_mfence(void);
_mm_min_epi16 SSE2 intrin.h __m128i _mm_min_epi16(__m128i, __m128i);
_mm_min_epi32 SSE41 intrin.h __m128i _mm_min_epi32(__m128i, __m128i);
_mm_min_epi8 SSE41 intrin.h __m128i _mm_min_epi8 (__m128i, __m128i);
_mm_min_epu16 SSE41 intrin.h __m128i _mm_min_epu16(__m128i, __m128i);
_mm_min_epu32 SSE41 intrin.h __m128i _mm_min_epu32(__m128i, __m128i);
_mm_min_epu8 SSE2 intrin.h __m128i _mm_min_epu8(__m128i, __m128i);
_mm_min_pd SSE2 intrin.h __m128d _mm_min_pd(__m128d, __m128d);
_mm_min_ps SSE intrin.h __m128 _mm_min_ps(__m128, __m128);
_mm_min_sd SSE2 intrin.h __m128d _mm_min_sd(__m128d, __m128d);
_mm_min_ss SSE intrin.h __m128 _mm_min_ss(__m128, __m128);
_mm_minpos_epu16 SSE41 intrin.h __m128i _mm_minpos_epu16(__m128i);
_mm_monitor SSE3 intrin.h void _mm_monitor(void const*, unsigned int, unsigned int);
_mm_move_epi64 SSE2 intrin.h __m128i _mm_move_epi64(__m128i);
_mm_move_sd SSE2 intrin.h __m128d _mm_move_sd(__m128d, __m128d);
_mm_move_ss SSE intrin.h __m128 _mm_move_ss(__m128, __m128);
_mm_movedup_pd SSE3 intrin.h __m128d _mm_movedup_pd(__m128d);
_mm_movehdup_ps SSE3 intrin.h __m128 _mm_movehdup_ps(__m128);
_mm_movehl_ps SSE intrin.h __m128 _mm_movehl_ps(__m128, __m128);
_mm_moveldup_ps SSE3 intrin.h __m128 _mm_moveldup_ps(__m128);
_mm_movelh_ps SSE intrin.h __m128 _mm_movelh_ps(__m128, __m128);
_mm_movemask_epi8 SSE2 intrin.h int _mm_movemask_epi8(__m128i);
_mm_movemask_pd SSE2 intrin.h int _mm_movemask_pd(__m128d);
_mm_movemask_ps SSE intrin.h int _mm_movemask_ps(__m128);
_mm_movepi64_pi64 SSE2 intrin.h __m64 _mm_movepi64_pi64(__m128i);
_mm_movpi64_epi64 SSE2 intrin.h __m128i _mm_movpi64_epi64(__m64);
_mm_mpsadbw_epu8 SSE41 intrin.h __m128i _mm_mpsadbw_epu8(__m128i, __m128i, const int);
_mm_msub_pd FMA4 ammintrin.h __m128d _mm_msub_pd(__m128d, __m128d, __m128d);
_mm_msub_ps FMA4 ammintrin.h __m128 _mm_msub_ps(__m128, __m128, __m128);
_mm_msub_sd FMA4 ammintrin.h __m128d _mm_msub_sd(__m128d, __m128d, __m128d);
_mm_msub_ss FMA4 ammintrin.h __m128 _mm_msub_ss(__m128, __m128, __m128);
_mm_msubadd_pd FMA4 ammintrin.h __m128d _mm_msubadd_pd(__m128d, __m128d, __m128d);
_mm_msubadd_ps FMA4 ammintrin.h __m128 _mm_msubadd_ps(__m128, __m128, __m128);
_mm_mul_epi32 SSE41 intrin.h __m128i _mm_mul_epi32(__m128i, __m128i);
_mm_mul_epu32 SSE2 intrin.h __m128i _mm_mul_epu32(__m128i, __m128i);
_mm_mul_pd SSE2 intrin.h __m128d _mm_mul_pd(__m128d, __m128d);
_mm_mul_ps SSE intrin.h __m128 _mm_mul_ps(__m128, __m128);
_mm_mul_sd SSE2 intrin.h __m128d _mm_mul_sd(__m128d, __m128d);
_mm_mul_ss SSE intrin.h __m128 _mm_mul_ss(__m128, __m128);
_mm_mul_su32 SSE2 intrin.h __m64 _mm_mul_su32(__m64, __m64);
_mm_mulhi_epi16 SSE2 intrin.h __m128i _mm_mulhi_epi16(__m128i, __m128i);
_mm_mulhi_epu16 SSE2 intrin.h __m128i _mm_mulhi_epu16(__m128i, __m128i);
_mm_mulhi_pi16 MMX mmintrin.h __m64 _mm_mulhi_pi16(__m64, __m64); [Macro]
_mm_mulhrs_epi16 SSSE3 intrin.h __m128i _mm_mulhrs_epi16(__m128i, __m128i);
_mm_mulhrs_pi16 SSSE3 intrin.h __m64 _mm_mulhrs_pi16(__m64, __m64);
_mm_mullo_epi16 SSE2 intrin.h __m128i _mm_mullo_epi16(__m128i, __m128i);
_mm_mullo_epi32 SSE41 intrin.h __m128i _mm_mullo_epi32(__m128i, __m128i);
_mm_mullo_pi16 MMX mmintrin.h __m64 _mm_mullo_pi16(__m64, __m64); [Macro]
_mm_mwait SSE3 intrin.h void _mm_mwait(unsigned int, unsigned int);
_mm_nmacc_pd FMA4 ammintrin.h __m128d _mm_nmacc_pd(__m128d, __m128d, __m128d);
_mm_nmacc_ps FMA4 ammintrin.h __m128 _mm_nmacc_ps(__m128, __m128, __m128);
_mm_nmacc_sd FMA4 ammintrin.h __m128d _mm_nmacc_sd(__m128d, __m128d, __m128d);
_mm_nmacc_ss FMA4 ammintrin.h __m128 _mm_nmacc_ss(__m128, __m128, __m128);
_mm_nmsub_pd FMA4 ammintrin.h __m128d _mm_nmsub_pd(__m128d, __m128d, __m128d);
_mm_nmsub_ps FMA4 ammintrin.h __m128 _mm_nmsub_ps(__m128, __m128, __m128);
_mm_nmsub_sd FMA4 ammintrin.h __m128d _mm_nmsub_sd(__m128d, __m128d, __m128d);
_mm_nmsub_ss FMA4 ammintrin.h __m128 _mm_nmsub_ss(__m128, __m128, __m128);
_mm_or_pd SSE2 intrin.h __m128d _mm_or_pd(__m128d, __m128d);
_mm_or_ps SSE intrin.h __m128 _mm_or_ps(__m128, __m128);
_mm_or_si64 MMX mmintrin.h __m64 _mm_or_si64(__m64, __m64); [Macro]
_mm_or_si128 SSE2 intrin.h __m128i _mm_or_si128(__m128i, __m128i);
_mm_packs_epi16 SSE2 intrin.h __m128i _mm_packs_epi16(__m128i, __m128i);
_mm_packs_epi32 SSE2 intrin.h __m128i _mm_packs_epi32(__m128i, __m128i);
_mm_packs_pi16 MMX mmintrin.h __m64 _mm_packs_pi16 (__m64, __m64); [Macro]
_mm_packs_pi32 MMX mmintrin.h __m64 _mm_packs_pi32 (__m64, __m64); [Macro]
_mm_packs_pu16 MMX mmintrin.h __m64 _mm_packs_pu16 (__m64, __m64); [Macro]
_mm_packus_epi16 SSE2 intrin.h __m128i _mm_packus_epi16(__m128i, __m128i);
_mm_packus_epi32 SSE41 intrin.h __m128i _mm_packus_epi32(__m128i, __m128i);
_mm_pause SSE2 intrin.h void _mm_pause(void);
_mm_perm_epi8 XOP ammintrin.h __m128i _mm_perm_epi8(__m128i, __m128i, __m128i);
_mm_permute_pd AVX immintrin.h __m128d _mm_permute_pd(__m128d, int);
_mm_permute_ps AVX immintrin.h __m128 _mm_permute_ps(__m128, int);
_mm_permute2_pd XOP ammintrin.h __m128d _mm_permute2_pd(__m128d, __m128d, __m128i, int);
_mm_permute2_ps XOP ammintrin.h __m128 _mm_permute2_ps(__m128, __m128, __m128i, int);
_mm_permutevar_pd AVX immintrin.h __m128d _mm_permutevar_pd(__m128d, __m128i);
_mm_permutevar_ps AVX immintrin.h __m128 _mm_permutevar_ps(__m128, __m128i);
_mm_popcnt_u32 POPCNT intrin.h int _mm_popcnt_u32(unsigned int);
_mm_prefetch SSE intrin.h void _mm_prefetch(char*, int);
_mm_rcp_ps SSE intrin.h __m128 _mm_rcp_ps(__m128);
_mm_rcp_ss SSE intrin.h __m128 _mm_rcp_ss(__m128);
_mm_rot_epi16 XOP ammintrin.h __m128i _mm_rot_epi16(__m128i, __m128i);
_mm_rot_epi32 XOP ammintrin.h __m128i _mm_rot_epi32(__m128i, __m128i);
_mm_rot_epi64 XOP ammintrin.h __m128i _mm_rot_epi64(__m128i, __m128i);
_mm_rot_epi8 XOP ammintrin.h __m128i _mm_rot_epi8(__m128i, __m128i);
_mm_roti_epi16 XOP ammintrin.h __m128i _mm_rot_epi16(__m128i, int);
_mm_roti_epi32 XOP ammintrin.h __m128i _mm_rot_epi32(__m128i, int);
_mm_roti_epi64 XOP ammintrin.h __m128i _mm_rot_epi64(__m128i, int);
_mm_roti_epi8 XOP ammintrin.h __m128i _mm_rot_epi8(__m128i, int);
_mm_round_pd SSE41 intrin.h __m128d _mm_round_pd(__m128d, const int);
_mm_round_ps SSE41 intrin.h __m128 _mm_round_ps(__m128, const int);
_mm_round_sd SSE41 intrin.h __m128d _mm_round_sd(__m128d, __m128d, const int);
_mm_round_ss SSE41 intrin.h __m128 _mm_round_ss(__m128, __m128, const int);
_mm_rsqrt_ps SSE intrin.h __m128 _mm_rsqrt_ps(__m128);
_mm_rsqrt_ss SSE intrin.h __m128 _mm_rsqrt_ss(__m128);
_mm_sad_epu8 SSE2 intrin.h __m128i _mm_sad_epu8(__m128i, __m128i);
_mm_set_epi16 SSE2 intrin.h __m128i _mm_set_epi16(short, short, short, short, short, short, short, short);
_mm_set_epi32 SSE2 intrin.h __m128i _mm_set_epi32(int, int, int, int);
_mm_set_epi64 SSE2 intrin.h __m128i _mm_set_epi64(__m64, __m64);
_mm_set_epi8 SSE2 intrin.h __m128i _mm_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char);
_mm_set_pd SSE2 intrin.h __m128d _mm_set_pd(double, double);
_mm_set_pi16 MMX intrin.h __m64 _mm_set_pi16(short, short, short, short);
_mm_set_pi32 MMX intrin.h __m64 _mm_set_pi32(int, int);
_mm_set_pi8 MMX intrin.h __m64 _mm_set_pi8(char, char, char, char, char, char, char, char);
_mm_set_ps SSE intrin.h __m128 _mm_set_ps(float, float, float, float);
_mm_set_ps1 SSE intrin.h __m128 _mm_set_ps1(float);
_mm_set_sd SSE2 intrin.h __m128d _mm_set_sd(double);
_mm_set_ss SSE intrin.h __m128 _mm_set_ss(float);
_mm_set1_epi16 SSE2 intrin.h __m128i _mm_set1_epi16(short);
_mm_set1_epi32 SSE2 intrin.h __m128i _mm_set1_epi32(int);
_mm_set1_epi64 SSE2 intrin.h __m128i _mm_set1_epi64(__m64);
_mm_set1_epi8 SSE2 intrin.h __m128i _mm_set1_epi8(char);
_mm_set1_pd SSE2 intrin.h __m128d _mm_set1_pd(double);
_mm_set1_pi16 MMX intrin.h __m64 _mm_set1_pi16(short);
_mm_set1_pi32 MMX intrin.h __m64 _mm_set1_pi32(int);
_mm_set1_pi8 MMX intrin.h __m64 _mm_set1_pi8(char);
_mm_setcsr SSE intrin.h void _mm_setcsr(unsigned int);
_mm_setl_epi64 SSE2 intrin.h __m128i _mm_setl_epi64(__m128i);
_mm_setr_epi16 SSE2 intrin.h __m128i _mm_setr_epi16(short, short, short, short, short, short, short, short);
_mm_setr_epi32 SSE2 intrin.h __m128i _mm_setr_epi32(int, int, int, int);
_mm_setr_epi64 SSE2 intrin.h __m128i _mm_setr_epi64(__m64, __m64);
_mm_setr_epi8 SSE2 intrin.h __m128i _mm_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char);
_mm_setr_pd SSE2 intrin.h __m128d _mm_setr_pd(double, double);
_mm_setr_pi16 MMX intrin.h __m64 _mm_setr_pi16(short, short, short, short);
_mm_setr_pi32 MMX intrin.h __m64 _mm_setr_pi32(int, int);
_mm_setr_pi8 MMX intrin.h __m64 _mm_setr_pi8(char, char, char, char, char, char, char, char);
_mm_setr_ps SSE intrin.h __m128 _mm_setr_ps(float, float, float, float);
_mm_setzero_pd SSE2 intrin.h __m128d _mm_setzero_pd(void);
_mm_setzero_ps SSE intrin.h __m128 _mm_setzero_ps(void);
_mm_setzero_si128 SSE2 intrin.h __m128i _mm_setzero_si128(void);
_mm_setzero_si64 MMX intrin.h __m64 _mm_setzero_si64(void);
_mm_sfence SSE intrin.h void _mm_sfence(void);
_mm_sha_epi16 XOP ammintrin.h __m128i _mm_sha_epi16(__m128i, __m128i);
_mm_sha_epi32 XOP ammintrin.h __m128i _mm_sha_epi32(__m128i, __m128i);
_mm_sha_epi64 XOP ammintrin.h __m128i _mm_sha_epi64(__m128i, __m128i);
_mm_sha_epi8 XOP ammintrin.h __m128i _mm_sha_epi8(__m128i, __m128i);
_mm_shl_epi16 XOP ammintrin.h __m128i _mm_shl_epi16(__m128i, __m128i);
_mm_shl_epi32 XOP ammintrin.h __m128i _mm_shl_epi32(__m128i, __m128i);
_mm_shl_epi64 XOP ammintrin.h __m128i _mm_shl_epi64(__m128i, __m128i);
_mm_shl_epi8 XOP ammintrin.h __m128i _mm_shl_epi8(__m128i, __m128i);
_mm_shuffle_epi32 SSE2 intrin.h __m128i _mm_shuffle_epi32(__m128i, int);
_mm_shuffle_epi8 SSSE3 intrin.h __m128i _mm_shuffle_epi8(__m128i, __m128i);
_mm_shuffle_pd SSE2 intrin.h __m128d _mm_shuffle_pd(__m128d, __m128d, int);
_mm_shuffle_pi8 SSSE3 intrin.h __m64 _mm_shuffle_pi8(__m64, __m64);
_mm_shuffle_ps SSE intrin.h __m128 _mm_shuffle_ps(__m128, __m128, unsigned int);
_mm_shufflehi_epi16 SSE2 intrin.h __m128i _mm_shufflehi_epi16(__m128i, int);
_mm_shufflelo_epi16 SSE2 intrin.h __m128i _mm_shufflelo_epi16(__m128i, int);
_mm_sign_epi16 SSSE3 intrin.h __m128i _mm_sign_epi16(__m128i, __m128i);
_mm_sign_epi32 SSSE3 intrin.h __m128i _mm_sign_epi32(__m128i, __m128i);
_mm_sign_epi8 SSSE3 intrin.h __m128i _mm_sign_epi8(__m128i, __m128i);
_mm_sign_pi16 SSSE3 intrin.h __m64 _mm_sign_pi16(__m64, __m64);
_mm_sign_pi32 SSSE3 intrin.h __m64 _mm_sign_pi32(__m64, __m64);
_mm_sign_pi8 SSSE3 intrin.h __m64 _mm_sign_pi8(__m64, __m64);
_mm_sll_epi16 SSE2 intrin.h __m128i _mm_sll_epi16(__m128i, __m128i);
_mm_sll_epi32 SSE2 intrin.h __m128i _mm_sll_epi32(__m128i, __m128i);
_mm_sll_epi64 SSE2 intrin.h __m128i _mm_sll_epi64(__m128i, __m128i);
_mm_sll_pi16 MMX mmintrin.h __m64 _mm_sll_pi16(__m64, __m64); [Macro]
_mm_sll_pi32 MMX mmintrin.h __m64 _mm_sll_pi32(__m64, __m64); [Macro]
_mm_sll_si64 MMX mmintrin.h __m64 _mm_sll_si64(__m64, __m64); [Macro]
_mm_slli_epi16 SSE2 intrin.h __m128i _mm_slli_epi16(__m128i, int);
_mm_slli_epi32 SSE2 intrin.h __m128i _mm_slli_epi32(__m128i, int);
_mm_slli_epi64 SSE2 intrin.h __m128i _mm_slli_epi64(__m128i, int);
_mm_slli_pi16 MMX mmintrin.h __m64 _mm_slli_pi16(__m64, int); [Macro]
_mm_slli_pi32 MMX mmintrin.h __m64 _mm_slli_pi32(__m64, int); [Macro]
_mm_slli_si64 MMX mmintrin.h __m64 _mm_slli_si64(__m64, int); [Macro]
_mm_slli_si128 SSE2 intrin.h __m128i _mm_slli_si128(__m128i, int);
_mm_sllv_epi32 AVX2 immintrin.h __m128i _mm_sllv_epi32(__m128i, __m128i);
_mm_sllv_epi64 AVX2 immintrin.h __m128i _mm_sllv_epi64(__m128i, __m128i);
_mm_sqrt_pd SSE2 intrin.h __m128d _mm_sqrt_pd(__m128d);
_mm_sqrt_ps SSE intrin.h __m128 _mm_sqrt_ps(__m128);
_mm_sqrt_sd SSE2 intrin.h __m128d _mm_sqrt_sd(__m128d, __m128d);
_mm_sqrt_ss SSE intrin.h __m128 _mm_sqrt_ss(__m128);
_mm_sra_epi16 SSE2 intrin.h __m128i _mm_sra_epi16(__m128i, __m128i);
_mm_sra_epi32 SSE2 intrin.h __m128i _mm_sra_epi32(__m128i, __m128i);
_mm_sra_pi16 MMX mmintrin.h __m64 _mm_sra_pi16(__m64, __m64); [Macro]
_mm_sra_pi32 MMX mmintrin.h __m64 _mm_sra_pi32(__m64, __m64); [Macro]
_mm_srai_epi16 SSE2 intrin.h __m128i _mm_srai_epi16(__m128i, int);
_mm_srai_epi32 SSE2 intrin.h __m128i _mm_srai_epi32(__m128i, int);
_mm_srai_pi16 MMX mmintrin.h __m64 _mm_srai_pi16(__m64, int); [Macro]
_mm_srai_pi32 MMX mmintrin.h __m64 _mm_srai_pi32(__m64, int); [Macro]
_mm_srav_epi32 AVX2 immintrin.h __m128i _mm_srav_epi32(__m128i, __m128i);
_mm_srl_epi16 SSE2 intrin.h __m128i _mm_srl_epi16(__m128i, __m128i);
_mm_srl_epi32 SSE2 intrin.h __m128i _mm_srl_epi32(__m128i, __m128i);
_mm_srl_epi64 SSE2 intrin.h __m128i _mm_srl_epi64(__m128i, __m128i);
_mm_srl_pi16 MMX mmintrin.h __m64 _mm_srl_pi16(__m64, __m64); [Macro]
_mm_srl_pi32 MMX mmintrin.h __m64 _mm_srl_pi32(__m64, __m64); [Macro]
_mm_srl_si64 MMX mmintrin.h __m64 _mm_srl_si64(__m64, __m64); [Macro]
_mm_srli_epi16 SSE2 intrin.h __m128i _mm_srli_epi16(__m128i, int);
_mm_srli_epi32 SSE2 intrin.h __m128i _mm_srli_epi32(__m128i, int);
_mm_srli_epi64 SSE2 intrin.h __m128i _mm_srli_epi64(__m128i, int);
_mm_srli_pi16 MMX mmintrin.h __m64 _mm_srli_pi16(__m64, int); [Macro]
_mm_srli_pi32 MMX mmintrin.h __m64 _mm_srli_pi32(__m64, int); [Macro]
_mm_srli_si64 MMX mmintrin.h __m64 _mm_srli_si64(__m64, int); [Macro]
_mm_srli_si128 SSE2 intrin.h __m128i _mm_srli_si128(__m128i, int);
_mm_srlv_epi32 AVX2 immintrin.h __m128i _mm_srlv_epi32(__m128i, __m128i);
_mm_srlv_epi64 AVX2 immintrin.h __m128i _mm_srlv_epi64(__m128i, __m128i);
_mm_store_pd SSE2 intrin.h void _mm_store_pd(double*, __m128d);
_mm_store_ps SSE intrin.h void _mm_store_ps(float*, __m128);
_mm_store_ps1 SSE intrin.h void _mm_store_ps1(float*, __m128);
_mm_store_sd SSE2 intrin.h void _mm_store_sd(double*, __m128d);
_mm_store_si128 SSE2 intrin.h void _mm_store_si128(__m128i*, __m128i);
_mm_store_ss SSE intrin.h void _mm_store_ss(float*, __m128);
_mm_store1_pd SSE2 intrin.h void _mm_store1_pd(double*, __m128d);
_mm_storeh_pd SSE2 intrin.h void _mm_storeh_pd(double*, __m128d);
_mm_storeh_pi SSE intrin.h void _mm_storeh_pi(__m64*, __m128);
_mm_storel_epi64 SSE2 intrin.h void _mm_storel_epi64(__m128i*, __m128i);
_mm_storel_pd SSE2 intrin.h void _mm_storel_pd(double*, __m128d);
_mm_storel_pi SSE intrin.h void _mm_storel_pi(__m64*, __m128);
_mm_storer_pd SSE2 intrin.h void _mm_storer_pd(double*, __m128d);
_mm_storer_ps SSE intrin.h void _mm_storer_ps(float*, __m128);
_mm_storeu_pd SSE2 intrin.h void _mm_storeu_pd(double*, __m128d);
_mm_storeu_ps SSE intrin.h void _mm_storeu_ps(float*, __m128);
_mm_storeu_si128 SSE2 intrin.h void _mm_storeu_si128(__m128i*, __m128i);
_mm_stream_load_si128 SSE41 intrin.h __m128i _mm_stream_load_si128(__m128i*);
_mm_stream_pd SSE2 intrin.h void _mm_stream_pd(double*, __m128d);
_mm_stream_pi SSE intrin.h void _mm_stream_pi(__m64*, __m64);
_mm_stream_ps SSE intrin.h void _mm_stream_ps(float*, __m128);
_mm_stream_sd SSE4a intrin.h void _mm_stream_sd(double*, __m128d);
_mm_stream_si128 SSE2 intrin.h void _mm_stream_si128(__m128i*, __m128i);
_mm_stream_si32 SSE2 intrin.h void _mm_stream_si32(int*, int);
_mm_stream_ss SSE4a intrin.h void _mm_stream_ss(float*, __m128);
_mm_sub_epi16 SSE2 intrin.h __m128i _mm_sub_epi16(__m128i, __m128i);
_mm_sub_epi32 SSE2 intrin.h __m128i _mm_sub_epi32(__m128i, __m128i);
_mm_sub_epi64 SSE2 intrin.h __m128i _mm_sub_epi64(__m128i, __m128i);
_mm_sub_epi8 SSE2 intrin.h __m128i _mm_sub_epi8(__m128i, __m128i);
_mm_sub_pd SSE2 intrin.h __m128d _mm_sub_pd(__m128d, __m128d);
_mm_sub_pi8 MMX mmintrin.h __m64 _mm_sub_pi8(__m64, __m64); [Macro]
_mm_sub_pi16 MMX mmintrin.h __m64 _mm_sub_pi16(__m64, __m64); [Macro]
_mm_sub_pi32 MMX mmintrin.h __m64 _mm_sub_pi32(__m64, __m64); [Macro]
_mm_sub_ps SSE intrin.h __m128 _mm_sub_ps(__m128, __m128);
_mm_sub_sd SSE2 intrin.h __m128d _mm_sub_sd(__m128d, __m128d);
_mm_sub_si64 SSE2 intrin.h __m64 _mm_sub_si64(__m64, __m64);
_mm_sub_ss SSE intrin.h __m128 _mm_sub_ss(__m128, __m128);
_mm_subs_epi16 SSE2 intrin.h __m128i _mm_subs_epi16(__m128i, __m128i);
_mm_subs_epi8 SSE2 intrin.h __m128i _mm_subs_epi8(__m128i, __m128i);
_mm_subs_epu16 SSE2 intrin.h __m128i _mm_subs_epu16(__m128i, __m128i);
_mm_subs_epu8 SSE2 intrin.h __m128i _mm_subs_epu8(__m128i, __m128i);
_mm_subs_pi8 MMX mmintrin.h __m64 _mm_subs_pi8(__m64, __m64); [Macro]
_mm_subs_pi16 MMX mmintrin.h __m64 _mm_subs_pi16(__m64, __m64); [Macro]
_mm_subs_pu8 MMX mmintrin.h __m64 _mm_subs_pu8(__m64, __m64); [Macro]
_mm_subs_pu16 MMX mmintrin.h __m64 _mm_subs_pu16(__m64, __m64); [Macro]
_mm_testc_pd AVX immintrin.h int _mm_testc_pd(__m128d, __m128d);
_mm_testc_ps AVX immintrin.h int _mm_testc_ps(__m128, __m128);
_mm_testc_si128 SSE41 intrin.h int _mm_testc_si128(__m128i, __m128i);
_mm_testnzc_pd AVX immintrin.h int _mm_testnzc_pd(__m128d, __m128d);
_mm_testnzc_ps AVX immintrin.h int _mm_testnzc_ps(__m128, __m128);
_mm_testnzc_si128 SSE41 intrin.h int _mm_testnzc_si128(__m128i, __m128i);
_mm_testz_pd AVX immintrin.h int _mm_testz_pd(__m128d, __m128d);
_mm_testz_ps AVX immintrin.h int _mm_testz_ps(__m128, __m128);
_mm_testz_si128 SSE41 intrin.h int _mm_testz_si128(__m128i, __m128i);
_mm_ucomieq_sd SSE2 intrin.h int _mm_ucomieq_sd(__m128d, __m128d);
_mm_ucomieq_ss SSE intrin.h int _mm_ucomieq_ss(__m128, __m128);
_mm_ucomige_sd SSE2 intrin.h int _mm_ucomige_sd(__m128d, __m128d);
_mm_ucomige_ss SSE intrin.h int _mm_ucomige_ss(__m128, __m128);
_mm_ucomigt_sd SSE2 intrin.h int _mm_ucomigt_sd(__m128d, __m128d);
_mm_ucomigt_ss SSE intrin.h int _mm_ucomigt_ss(__m128, __m128);
_mm_ucomile_sd SSE2 intrin.h int _mm_ucomile_sd(__m128d, __m128d);
_mm_ucomile_ss SSE intrin.h int _mm_ucomile_ss(__m128, __m128);
_mm_ucomilt_sd SSE2 intrin.h int _mm_ucomilt_sd(__m128d, __m128d);
_mm_ucomilt_ss SSE intrin.h int _mm_ucomilt_ss(__m128, __m128);
_mm_ucomineq_sd SSE2 intrin.h int _mm_ucomineq_sd(__m128d, __m128d);
_mm_ucomineq_ss SSE intrin.h int _mm_ucomineq_ss(__m128, __m128);
_mm_unpackhi_epi16 SSE2 intrin.h __m128i _mm_unpackhi_epi16(__m128i, __m128i);
_mm_unpackhi_epi32 SSE2 intrin.h __m128i _mm_unpackhi_epi32(__m128i, __m128i);
_mm_unpackhi_epi64 SSE2 intrin.h __m128i _mm_unpackhi_epi64(__m128i, __m128i);
_mm_unpackhi_epi8 SSE2 intrin.h __m128i _mm_unpackhi_epi8(__m128i, __m128i);
_mm_unpackhi_pd SSE2 intrin.h __m128d _mm_unpackhi_pd(__m128d, __m128d);
_mm_unpackhi_pi8 MMX mmintrin.h __m64 _mm_unpackhi_pi8 (__m64, __m64); [Macro]
_mm_unpackhi_pi16 MMX mmintrin.h __m64 _mm_unpackhi_pi16 (__m64, __m64); [Macro]
_mm_unpackhi_pi32 MMX mmintrin.h __m64 _mm_unpackhi_pi32 (__m64, __m64); [Macro]
_mm_unpackhi_ps SSE intrin.h __m128 _mm_unpackhi_ps(__m128, __m128);
_mm_unpacklo_epi16 SSE2 intrin.h __m128i _mm_unpacklo_epi16(__m128i, __m128i);
_mm_unpacklo_epi32 SSE2 intrin.h __m128i _mm_unpacklo_epi32(__m128i, __m128i);
_mm_unpacklo_epi64 SSE2 intrin.h __m128i _mm_unpacklo_epi64(__m128i, __m128i);
_mm_unpacklo_epi8 SSE2 intrin.h __m128i _mm_unpacklo_epi8(__m128i, __m128i);
_mm_unpacklo_pd SSE2 intrin.h __m128d _mm_unpacklo_pd(__m128d, __m128d);
_mm_unpacklo_pi8 MMX mmintrin.h __m64 _mm_unpacklo_pi8 (__m64, __m64); [Macro]
_mm_unpacklo_pi16 MMX mmintrin.h __m64 _mm_unpacklo_pi16 (__m64, __m64); [Macro]
_mm_unpacklo_pi32 MMX mmintrin.h __m64 _mm_unpacklo_pi32 (__m64, __m64); [Macro]
_mm_unpacklo_ps SSE intrin.h __m128 _mm_unpacklo_ps(__m128, __m128);
_mm_xor_pd SSE2 intrin.h __m128d _mm_xor_pd(__m128d, __m128d);
_mm_xor_ps SSE intrin.h __m128 _mm_xor_ps(__m128, __m128);
_mm_xor_si64 MMX mmintrin.h __m64 _mm_xor_si64(__m64, __m64); [Macro]
_mm_xor_si128 SSE2 intrin.h __m128i _mm_xor_si128(__m128i, __m128i);
_mm256_abs_epi16 AVX2 immintrin.h __m256i _mm256_abs_epi16(__m256i);
_mm256_abs_epi32 AVX2 immintrin.h __m256i _mm256_abs_epi32(__m256i);
_mm256_abs_epi8 AVX2 immintrin.h __m256i _mm256_abs_epi8(__m256i);
_mm256_add_epi16 AVX2 immintrin.h __m256i _mm256_add_epi16(__m256i, __m256i);
_mm256_add_epi32 AVX2 immintrin.h __m256i _mm256_add_epi32(__m256i, __m256i);
_mm256_add_epi64 AVX2 immintrin.h __m256i _mm256_add_epi64(__m256i, __m256i);
_mm256_add_epi8 AVX2 immintrin.h __m256i _mm256_add_epi8(__m256i, __m256i);
_mm256_add_pd AVX immintrin.h __m256d _mm256_add_pd(__m256d, __m256d);
_mm256_add_ps AVX immintrin.h __m256 _mm256_add_ps(__m256, __m256);
_mm256_adds_epi16 AVX2 immintrin.h __m256i _mm256_adds_epi16(__m256i, __m256i);
_mm256_adds_epi8 AVX2 immintrin.h __m256i _mm256_adds_epi8(__m256i, __m256i);
_mm256_adds_epu16 AVX2 immintrin.h __m256i _mm256_adds_epu16(__m256i, __m256i);
_mm256_adds_epu8 AVX2 immintrin.h __m256i _mm256_adds_epu8(__m256i, __m256i);
_mm256_addsub_pd AVX immintrin.h __m256d _mm256_addsub_pd(__m256d, __m256d);
_mm256_addsub_ps AVX immintrin.h __m256 _mm256_addsub_ps(__m256, __m256);
_mm256_alignr_epi8 AVX2 immintrin.h __m256i _mm256_alignr_epi8(__m256i, __m256i, const int);
_mm256_and_pd AVX immintrin.h __m256d _mm256_and_pd(__m256d, __m256d);
_mm256_and_ps AVX immintrin.h __m256 _mm256_and_ps(__m256, __m256);
_mm256_and_si256 AVX2 immintrin.h __m256i _mm256_and_si256(__m256i, __m256i);
_mm256_andnot_pd AVX immintrin.h __m256d _mm256_andnot_pd(__m256d, __m256d);
_mm256_andnot_ps AVX immintrin.h __m256 _mm256_andnot_ps(__m256, __m256);
_mm256_andnot_si256 AVX2 immintrin.h __m256i _mm256_andnot_si256(__m256i, __m256i);
_mm256_avg_epu16 AVX2 immintrin.h __m256i _mm256_avg_epu16(__m256i, __m256i);
_mm256_avg_epu8 AVX2 immintrin.h __m256i _mm256_avg_epu8(__m256i, __m256i);
_mm256_blend_epi16 AVX2 immintrin.h __m256i _mm256_blend_epi16(__m256i, __m256i, const int);
_mm256_blend_epi32 AVX2 immintrin.h __m256i _mm256_blend_epi32(__m256i, __m256i, const int);
_mm256_blend_pd AVX immintrin.h __m256d _mm256_blend_pd(__m256d, __m256d, const int);
_mm256_blend_ps AVX immintrin.h __m256 _mm256_blend_ps(__m256, __m256, const int);
_mm256_blendv_epi8 AVX2 immintrin.h __m256i _mm256_blendv_epi8(__m256i, __m256i, __m256i);
_mm256_blendv_pd AVX immintrin.h __m256d _mm256_blendv_pd(__m256d, __m256d, __m256d);
_mm256_blendv_ps AVX immintrin.h __m256 _mm256_blendv_ps(__m256, __m256, __m256);
_mm256_broadcast_pd AVX immintrin.h __m256d _mm256_broadcast_pd(__m128d const *);
_mm256_broadcast_ps AVX immintrin.h __m256 _mm256_broadcast_ps(__m128 const *);
_mm256_broadcast_sd AVX immintrin.h __m256d _mm256_broadcast_sd(double const *);
_mm256_broadcast_ss AVX immintrin.h __m256 _mm256_broadcast_ss(float const *);
_mm256_broadcastb_epi8 AVX2 immintrin.h __m256i _mm256_broadcastb_epi8 (__m128i);
_mm256_broadcastd_epi32 AVX2 immintrin.h __m256i _mm256_broadcastd_epi32(__m128i);
_mm256_broadcastq_epi64 AVX2 immintrin.h __m256i _mm256_broadcastq_epi64(__m128i);
_mm256_broadcastsd_pd AVX2 immintrin.h __m256d _mm256_broadcastsd_pd(__m128d);
_mm256_broadcastsi128_si256 AVX2 immintrin.h __m256i _mm256_broadcastsi128_si256(__m128i);
_mm256_broadcastss_ps AVX2 immintrin.h __m256 _mm256_broadcastss_ps(__m128);
_mm256_broadcastw_epi16 AVX2 immintrin.h __m256i _mm256_broadcastw_epi16(__m128i);
_mm256_castpd_ps AVX immintrin.h __m256 _mm256_castpd_ps(__m256d);
_mm256_castpd_si256 AVX immintrin.h __m256i _mm256_castpd_si256(__m256d);
_mm256_castpd128_pd256 AVX immintrin.h __m256d _mm256_castpd128_pd256(__m128d);
_mm256_castpd256_pd128 AVX immintrin.h __m128d _mm256_castpd256_pd128(__m256d);
_mm256_castps_pd AVX immintrin.h __m256d _mm256_castps_pd(__m256);
_mm256_castps_si256 AVX immintrin.h __m256i _mm256_castps_si256(__m256);
_mm256_castps128_ps256 AVX immintrin.h __m256 _mm256_castps128_ps256(__m128);
_mm256_castps256_ps128 AVX immintrin.h __m128 _mm256_castps256_ps128(__m256);
_mm256_castsi128_si256 AVX immintrin.h __m256i _mm256_castsi128_si256(__m128i);
_mm256_castsi256_pd AVX immintrin.h __m256d _mm256_castsi256_pd(__m256i);
_mm256_castsi256_ps AVX immintrin.h __m256 _mm256_castsi256_ps(__m256i);
_mm256_castsi256_si128 AVX immintrin.h __m128i _mm256_castsi256_si128(__m256i);
_mm256_cmov_si256 XOP ammintrin.h __m256i _mm256_cmov_si256(__m256i, __m256i, __m256i);
_mm256_cmp_pd AVX immintrin.h __m256d _mm256_cmp_pd(__m256d, __m256d, const int);
_mm256_cmp_ps AVX immintrin.h __m256 _mm256_cmp_ps(__m256, __m256, const int);
_mm256_cmpeq_epi16 AVX2 immintrin.h __m256i _mm256_cmpeq_epi16(__m256i, __m256i);
_mm256_cmpeq_epi32 AVX2 immintrin.h __m256i _mm256_cmpeq_epi32(__m256i, __m256i);
_mm256_cmpeq_epi64 AVX2 immintrin.h __m256i _mm256_cmpeq_epi64(__m256i, __m256i);
_mm256_cmpeq_epi8 AVX2 immintrin.h __m256i _mm256_cmpeq_epi8(__m256i, __m256i);
_mm256_cmpgt_epi16 AVX2 immintrin.h __m256i _mm256_cmpgt_epi16(__m256i, __m256i);
_mm256_cmpgt_epi32 AVX2 immintrin.h __m256i _mm256_cmpgt_epi32(__m256i, __m256i);
_mm256_cmpgt_epi64 AVX2 immintrin.h __m256i _mm256_cmpgt_epi64(__m256i, __m256i);
_mm256_cmpgt_epi8 AVX2 immintrin.h __m256i _mm256_cmpgt_epi8(__m256i, __m256i);
_mm256_cvtepi16_epi32 AVX2 immintrin.h __m256i _mm256_cvtepi16_epi32(__m128i);
_mm256_cvtepi16_epi64 AVX2 immintrin.h __m256i _mm256_cvtepi16_epi64(__m128i);
_mm256_cvtepi32_epi64 AVX2 immintrin.h __m256i _mm256_cvtepi32_epi64(__m128i);
_mm256_cvtepi32_pd AVX immintrin.h __m256d _mm256_cvtepi32_pd(__m128i);
_mm256_cvtepi32_ps AVX immintrin.h __m256 _mm256_cvtepi32_ps(__m256i);
_mm256_cvtepi8_epi16 AVX2 immintrin.h __m256i _mm256_cvtepi8_epi16(__m128i);
_mm256_cvtepi8_epi32 AVX2 immintrin.h __m256i _mm256_cvtepi8_epi32(__m128i);
_mm256_cvtepi8_epi64 AVX2 immintrin.h __m256i _mm256_cvtepi8_epi64(__m128i);
_mm256_cvtepu16_epi32 AVX2 immintrin.h __m256i _mm256_cvtepu16_epi32(__m128i);
_mm256_cvtepu16_epi64 AVX2 immintrin.h __m256i _mm256_cvtepu16_epi64(__m128i);
_mm256_cvtepu32_epi64 AVX2 immintrin.h __m256i _mm256_cvtepu32_epi64(__m128i);
_mm256_cvtepu8_epi16 AVX2 immintrin.h __m256i _mm256_cvtepu8_epi16(__m128i);
_mm256_cvtepu8_epi32 AVX2 immintrin.h __m256i _mm256_cvtepu8_epi32(__m128i);
_mm256_cvtepu8_epi64 AVX2 immintrin.h __m256i _mm256_cvtepu8_epi64(__m128i);
_mm256_cvtpd_epi32 AVX immintrin.h __m128i _mm256_cvtpd_epi32(__m256d);
_mm256_cvtpd_ps AVX immintrin.h __m128 _mm256_cvtpd_ps(__m256d);
_mm256_cvtph_ps F16C immintrin.h __m256 _mm256_cvtph_ps(__m128i);
_mm256_cvtps_epi32 AVX immintrin.h __m256i _mm256_cvtps_epi32(__m256);
_mm256_cvtps_pd AVX immintrin.h __m256d _mm256_cvtps_pd(__m128);
_mm256_cvtps_ph F16C immintrin.h __m128i _mm256_cvtps_ph(__m256, const int);
_mm256_cvttpd_epi32 AVX immintrin.h __m128i _mm256_cvttpd_epi32(__m256d);
_mm256_cvttps_epi32 AVX immintrin.h __m256i _mm256_cvttps_epi32(__m256);
_mm256_div_pd AVX immintrin.h __m256d _mm256_div_pd(__m256d, __m256d);
_mm256_div_ps AVX immintrin.h __m256 _mm256_div_ps(__m256, __m256);
_mm256_dp_ps AVX immintrin.h __m256 _mm256_dp_ps(__m256, __m256, const int);
_mm256_extractf128_pd AVX immintrin.h __m128d _mm256_extractf128_pd(__m256d, const int);
_mm256_extractf128_ps AVX immintrin.h __m128 _mm256_extractf128_ps(__m256, const int);
_mm256_extractf128_si256 AVX immintrin.h __m128i _mm256_extractf128_si256(__m256i, const int);
_mm256_extracti128_si256 AVX2 immintrin.h __m128i _mm256_extracti128_si256(__m256i, int);
_mm256_fmadd_pd FMA immintrin.h __m256d _mm256_fmadd_pd (__m256d, __m256d, __m256d);
_mm256_fmadd_ps FMA immintrin.h __m256 _mm256_fmadd_ps (__m256, __m256, __m256);
_mm256_fmaddsub_pd FMA immintrin.h __m256d _mm256_fmaddsub_pd (__m256d, __m256d, __m256d);
_mm256_fmaddsub_ps FMA immintrin.h __m256 _mm256_fmaddsub_ps (__m256, __m256, __m256);
_mm256_fmsub_pd FMA immintrin.h __m256d _mm256_fmsub_pd (__m256d, __m256d, __m256d);
_mm256_fmsub_ps FMA immintrin.h __m256 _mm256_fmsub_ps (__m256, __m256, __m256);
_mm256_fmsubadd_pd FMA immintrin.h __m256d _mm256_fmsubadd_pd (__m256d, __m256d, __m256d);
_mm256_fmsubadd_ps FMA immintrin.h __m256 _mm256_fmsubadd_ps (__m256, __m256, __m256);
_mm256_fnmadd_pd FMA immintrin.h __m256d _mm256_fnmadd_pd (__m256d, __m256d, __m256d);
_mm256_fnmadd_ps FMA immintrin.h __m256 _mm256_fnmadd_ps (__m256, __m256, __m256);
_mm256_fnmsub_pd FMA immintrin.h __m256d _mm256_fnmsub_pd (__m256d, __m256d, __m256d);
_mm256_fnmsub_ps FMA immintrin.h __m256 _mm256_fnmsub_ps (__m256, __m256, __m256);
_mm256_frcz_pd XOP ammintrin.h __m256d _mm256_frcz_pd(__m256d);
_mm256_frcz_ps XOP ammintrin.h __m256 _mm256_frcz_ps(__m256);
_mm256_hadd_epi16 AVX2 immintrin.h __m256i _mm256_hadd_epi16(__m256i, __m256i);
_mm256_hadd_epi32 AVX2 immintrin.h __m256i _mm256_hadd_epi32(__m256i, __m256i);
_mm256_hadd_pd AVX immintrin.h __m256d _mm256_hadd_pd(__m256d, __m256d);
_mm256_hadd_ps AVX immintrin.h __m256 _mm256_hadd_ps(__m256, __m256);
_mm256_hadds_epi16 AVX2 immintrin.h __m256i _mm256_hadds_epi16(__m256i, __m256i);
_mm256_hsub_epi16 AVX2 immintrin.h __m256i _mm256_hsub_epi16(__m256i, __m256i);
_mm256_hsub_epi32 AVX2 immintrin.h __m256i _mm256_hsub_epi32(__m256i, __m256i);
_mm256_hsub_pd AVX immintrin.h __m256d _mm256_hsub_pd(__m256d, __m256d);
_mm256_hsub_ps AVX immintrin.h __m256 _mm256_hsub_ps(__m256, __m256);
_mm256_hsubs_epi16 AVX2 immintrin.h __m256i _mm256_hsubs_epi16(__m256i, __m256i);
_mm256_i32gather_epi32 AVX2 immintrin.h __m256i _mm256_i32gather_epi32(int const *, __m256i, const int);
_mm256_i32gather_epi64 AVX2 immintrin.h __m256i _mm256_i32gather_epi64(__int64 const *, __m128i, const int);
_mm256_i32gather_pd AVX2 immintrin.h __m256d _mm256_i32gather_pd(double const *, __m128i, const int);
_mm256_i32gather_ps AVX2 immintrin.h __m256 _mm256_i32gather_ps(float const *, __m256i, const int);
_mm256_i64gather_epi32 AVX2 immintrin.h __m256i _mm256_i64gather_epi32(int const *, __m256i, const int);
_mm256_i64gather_epi64 AVX2 immintrin.h __m256i _mm256_i64gather_epi64(__int64 const *, __m256i, const int);
_mm256_i64gather_pd AVX2 immintrin.h __m256d _mm256_i64gather_pd(double const *, __m256i, const int);
_mm256_i64gather_ps AVX2 immintrin.h __m128 _mm256_i64gather_ps(float const *, __m256i, const int);
_mm256_insertf128_pd AVX immintrin.h __m256d _mm256_insertf128_pd(__m256d, __m128d, int);
_mm256_insertf128_ps AVX immintrin.h __m256 _mm256_insertf128_ps(__m256, __m128, int);
_mm256_insertf128_si256 AVX immintrin.h __m256i _mm256_insertf128_si256(__m256i, __m128i, int);
_mm256_inserti128_si256 AVX2 immintrin.h __m256i _mm256_inserti128_si256(__m256i, __m128i, int);
_mm256_lddqu_si256 AVX immintrin.h __m256i _mm256_lddqu_si256(__m256i *);
_mm256_load_pd AVX immintrin.h __m256d _mm256_load_pd(double const *);
_mm256_load_ps AVX immintrin.h __m256 _mm256_load_ps(float const *);
_mm256_load_si256 AVX immintrin.h __m256i _mm256_load_si256(__m256i *);
_mm256_loadu_pd AVX immintrin.h __m256d _mm256_loadu_pd(double const *);
_mm256_loadu_ps AVX immintrin.h __m256 _mm256_loadu_ps(float const *);
_mm256_loadu_si256 AVX immintrin.h __m256i _mm256_loadu_si256(__m256i *);
_mm256_macc_pd FMA4 ammintrin.h __m256d _mm_macc_pd(__m256d, __m256d, __m256d);
_mm256_macc_ps FMA4 ammintrin.h __m256 _mm_macc_ps(__m256, __m256, __m256);
_mm256_madd_epi16 AVX2 immintrin.h __m256i _mm256_madd_epi16(__m256i, __m256i);
_mm256_maddsub_pd FMA4 ammintrin.h __m256d _mm_maddsub_pd(__m256d, __m256d, __m256d);
_mm256_maddsub_ps FMA4 ammintrin.h __m256 _mm_maddsub_ps(__m256, __m256, __m256);
_mm256_maddubs_epi16 AVX2 immintrin.h __m256i _mm256_maddubs_epi16(__m256i, __m256i);
_mm256_mask_i32gather_epi32 AVX2 immintrin.h __m256i _mm256_mask_i32gather_epi32(__m256i, int const *, __m256i, __m256i, const int);
_mm256_mask_i32gather_epi64 AVX2 immintrin.h __m256i _mm256_mask_i32gather_epi64(__m256i, __int64 const *, __m128i, __m256i, const int);
_mm256_mask_i32gather_pd AVX2 immintrin.h __m256d _mm256_mask_i32gather_pd(__m256d, double const *, __m128i, __m256d, const int);
_mm256_mask_i32gather_ps AVX2 immintrin.h __m256 _mm256_mask_i32gather_ps(__m256, float const *, __m256i, __m256, const int);
_mm256_mask_i64gather_epi32 AVX2 immintrin.h __m128i _mm256_mask_i64gather_epi32(__m128i, int const *, __m256i, __m128i, const int);
_mm256_mask_i64gather_epi64 AVX2 immintrin.h __m256i _mm256_mask_i64gather_epi64(__m256i, __int64 const *, __m256i, __m256i, const int);
_mm256_mask_i64gather_pd AVX2 immintrin.h __m256d _mm256_mask_i64gather_pd(__m256d, double const *, __m256i, __m256d, const int);
_mm256_mask_i64gather_ps AVX2 immintrin.h __m128 _mm256_mask_i64gather_ps(__m128, float const *, __m256i, __m128, const int);
_mm256_maskload_epi32 AVX2 immintrin.h __m256i _mm256_maskload_epi32(int const *, __m256i);
_mm256_maskload_epi64 AVX2 immintrin.h __m256i _mm256_maskload_epi64(__int64 const *, __m256i);
_mm256_maskload_pd AVX immintrin.h __m256d _mm256_maskload_pd(double const *, __m256i);
_mm256_maskload_ps AVX immintrin.h __m256 _mm256_maskload_ps(float const *, __m256i);
_mm256_maskstore_epi32 AVX2 immintrin.h void _mm256_maskstore_epi32(int *, __m256i, __m256i);
_mm256_maskstore_epi64 AVX2 immintrin.h void _mm256_maskstore_epi64(__int64 *, __m256i, __m256i);
_mm256_maskstore_pd AVX immintrin.h void _mm256_maskstore_pd(double *, __m256i, __m256d);
_mm256_maskstore_ps AVX immintrin.h void _mm256_maskstore_ps(float *, __m256i, __m256);
_mm256_max_epi16 AVX2 immintrin.h __m256i _mm256_max_epi16(__m256i, __m256i);
_mm256_max_epi32 AVX2 immintrin.h __m256i _mm256_max_epi32(__m256i, __m256i);
_mm256_max_epi8 AVX2 immintrin.h __m256i _mm256_max_epi8(__m256i, __m256i);
_mm256_max_epu16 AVX2 immintrin.h __m256i _mm256_max_epu16(__m256i, __m256i);
_mm256_max_epu32 AVX2 immintrin.h __m256i _mm256_max_epu32(__m256i, __m256i);
_mm256_max_epu8 AVX2 immintrin.h __m256i _mm256_max_epu8(__m256i, __m256i);
_mm256_max_pd AVX immintrin.h __m256d _mm256_max_pd(__m256d, __m256d);
_mm256_max_ps AVX immintrin.h __m256 _mm256_max_ps(__m256, __m256);
_mm256_min_epi16 AVX2 immintrin.h __m256i _mm256_min_epi16(__m256i, __m256i);
_mm256_min_epi32 AVX2 immintrin.h __m256i _mm256_min_epi32(__m256i, __m256i);
_mm256_min_epi8 AVX2 immintrin.h __m256i _mm256_min_epi8(__m256i, __m256i);
_mm256_min_epu16 AVX2 immintrin.h __m256i _mm256_min_epu16(__m256i, __m256i);
_mm256_min_epu32 AVX2 immintrin.h __m256i _mm256_min_epu32(__m256i, __m256i);
_mm256_min_epu8 AVX2 immintrin.h __m256i _mm256_min_epu8(__m256i, __m256i);
_mm256_min_pd AVX immintrin.h __m256d _mm256_min_pd(__m256d, __m256d);
_mm256_min_ps AVX immintrin.h __m256 _mm256_min_ps(__m256, __m256);
_mm256_movedup_pd AVX immintrin.h __m256d _mm256_movedup_pd(__m256d);
_mm256_movehdup_ps AVX immintrin.h __m256 _mm256_movehdup_ps(__m256);
_mm256_moveldup_ps AVX immintrin.h __m256 _mm256_moveldup_ps(__m256);
_mm256_movemask_epi8 AVX2 immintrin.h int _mm256_movemask_epi8(__m256i);
_mm256_movemask_pd AVX immintrin.h int _mm256_movemask_pd(__m256d);
_mm256_movemask_ps AVX immintrin.h int _mm256_movemask_ps(__m256);
_mm256_mpsadbw_epu8 AVX2 immintrin.h __m256i _mm256_mpsadbw_epu8(__m256i, __m256i, const int);
_mm256_msub_pd FMA4 ammintrin.h __m256d _mm_msub_pd(__m256d, __m256d, __m256d);
_mm256_msub_ps FMA4 ammintrin.h __m256 _mm_msub_ps(__m256, __m256, __m256);
_mm256_msubadd_pd FMA4 ammintrin.h __m256d _mm_msubadd_pd(__m256d, __m256d, __m256d);
_mm256_msubadd_ps FMA4 ammintrin.h __m256 _mm_msubadd_ps(__m256, __m256, __m256);
_mm256_mul_epi32 AVX2 immintrin.h __m256i _mm256_mul_epi32(__m256i, __m256i);
_mm256_mul_epu32 AVX2 immintrin.h __m256i _mm256_mul_epu32(__m256i, __m256i);
_mm256_mul_pd AVX immintrin.h __m256d _mm256_mul_pd(__m256d, __m256d);
_mm256_mul_ps AVX immintrin.h __m256 _mm256_mul_ps(__m256, __m256);
_mm256_mulhi_epi16 AVX2 immintrin.h __m256i _mm256_mulhi_epi16(__m256i, __m256i);
_mm256_mulhi_epu16 AVX2 immintrin.h __m256i _mm256_mulhi_epu16(__m256i, __m256i);
_mm256_mulhrs_epi16 AVX2 immintrin.h __m256i _mm256_mulhrs_epi16(__m256i, __m256i);
_mm256_mullo_epi16 AVX2 immintrin.h __m256i _mm256_mullo_epi16(__m256i, __m256i);
_mm256_mullo_epi32 AVX2 immintrin.h __m256i _mm256_mullo_epi32(__m256i, __m256i);
_mm256_nmacc_pd FMA4 ammintrin.h __m256d _mm_nmacc_pd(__m256d, __m256d, __m256d);
_mm256_nmacc_ps FMA4 ammintrin.h __m256 _mm_nmacc_ps(__m256, __m256, __m256);
_mm256_nmsub_pd FMA4 ammintrin.h __m256d _mm_nmsub_pd(__m256d, __m256d, __m256d);
_mm256_nmsub_ps FMA4 ammintrin.h __m256 _mm_nmsub_ps(__m256, __m256, __m256);
_mm256_or_pd AVX immintrin.h __m256d _mm256_or_pd(__m256d, __m256d);
_mm256_or_ps AVX immintrin.h __m256 _mm256_or_ps(__m256, __m256);
_mm256_or_si256 AVX2 immintrin.h __m256i _mm256_or_si256(__m256i, __m256i);
_mm256_packs_epi16 AVX2 immintrin.h __m256i _mm256_packs_epi16(__m256i, __m256i);
_mm256_packs_epi32 AVX2 immintrin.h __m256i _mm256_packs_epi32(__m256i, __m256i);
_mm256_packus_epi16 AVX2 immintrin.h __m256i _mm256_packus_epi16(__m256i, __m256i);
_mm256_packus_epi32 AVX2 immintrin.h __m256i _mm256_packus_epi32(__m256i, __m256i);
_mm256_permute_pd AVX immintrin.h __m256d _mm256_permute_pd(__m256d, int);
_mm256_permute_ps AVX immintrin.h __m256 _mm256_permute_ps(__m256, int);
_mm256_permute2_pd XOP ammintrin.h __m256d _mm256_permute2_pd(__m256d, __m256d, __m256i, int);
_mm256_permute2_ps XOP ammintrin.h __m256 _mm256_permute2_ps(__m256, __m256, __m256i, int);
_mm256_permute2f128_pd AVX immintrin.h __m256d _mm256_permute2f128_pd(__m256d, __m256d, int);
_mm256_permute2f128_ps AVX immintrin.h __m256 _mm256_permute2f128_ps(__m256, __m256, int);
_mm256_permute2f128_si256 AVX immintrin.h __m256i _mm256_permute2f128_si256(__m256i, __m256i, int);
_mm256_permute2x128_si256 AVX2 immintrin.h __m256i _mm256_permute2x128_si256(__m256i, __m256i, const int);
_mm256_permute4x64_epi64 AVX2 immintrin.h __m256i _mm256_permute4x64_epi64 (__m256i, const int);
_mm256_permute4x64_pd AVX2 immintrin.h __m256d _mm256_permute4x64_pd(__m256d, const int);
_mm256_permutevar_pd AVX immintrin.h __m256d _mm256_permutevar_pd(__m256d, __m256i);
_mm256_permutevar_ps AVX immintrin.h __m256 _mm256_permutevar_ps(__m256, __m256i);
_mm256_permutevar8x32_epi32 AVX2 immintrin.h __m256i _mm256_permutevar8x32_epi32(__m256i, __m256i);
_mm256_permutevar8x32_ps AVX2 immintrin.h __m256 _mm256_permutevar8x32_ps (__m256, __m256i);
_mm256_rcp_ps AVX immintrin.h __m256 _mm256_rcp_ps(__m256);
_mm256_round_pd AVX immintrin.h __m256d _mm256_round_pd(__m256d, int);
_mm256_round_ps AVX immintrin.h __m256 _mm256_round_ps(__m256, int);
_mm256_rsqrt_ps AVX immintrin.h __m256 _mm256_rsqrt_ps(__m256);
_mm256_sad_epu8 AVX2 immintrin.h __m256i _mm256_sad_epu8(__m256i, __m256i);
_mm256_set_epi16 AVX immintrin.h (__m256i _mm256_set_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short);
_mm256_set_epi32 AVX immintrin.h __m256i _mm256_set_epi32(int, int, int, int, int, int, int, int);
_mm256_set_epi8 AVX immintrin.h __m256i _mm256_set_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char);
_mm256_set_pd AVX immintrin.h __m256d _mm256_set_pd(double, double, double, double);
_mm256_set_ps AVX immintrin.h __m256 _mm256_set_ps(float, float, float, float, float, float, float, float);
_mm256_set1_epi16 AVX immintrin.h __m256i _mm256_set1_epi16(short);
_mm256_set1_epi32 AVX immintrin.h __m256i _mm256_set1_epi32(int);
_mm256_set1_epi8 AVX immintrin.h __m256i _mm256_set1_epi8(char);
_mm256_set1_pd AVX immintrin.h __m256d _mm256_set1_pd(double);
_mm256_set1_ps AVX immintrin.h __m256 _mm256_set1_ps(float);
_mm256_setr_epi16 AVX immintrin.h (__m256i _mm256_setr_epi16(short, short, short, short, short, short, short, short, short, short, short, short, short, short, short, short);
_mm256_setr_epi32 AVX immintrin.h __m256i _mm256_setr_epi32(int, int, int, int, int, int, int, int);
_mm256_setr_epi8 AVX immintrin.h (__m256i _mm256_setr_epi8(char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char, char,);
_mm256_setr_pd AVX immintrin.h __m256d _mm256_setr_pd(double, double, double, double);
_mm256_setr_ps AVX immintrin.h __m256 _mm256_setr_ps(float, float, float, float, float, float, float, float);
_mm256_setzero_pd AVX immintrin.h __m256d _mm256_setzero_pd(void);
_mm256_setzero_ps AVX immintrin.h __m256 _mm256_setzero_ps(void);
_mm256_setzero_si256 AVX immintrin.h __m256i _mm256_setzero_si256(void);
_mm256_shuffle_epi32 AVX2 immintrin.h __m256i _mm256_shuffle_epi32(__m256i, const int);
_mm256_shuffle_epi8 AVX2 immintrin.h __m256i _mm256_shuffle_epi8(__m256i, __m256i);
_mm256_shuffle_pd AVX immintrin.h __m256d _mm256_shuffle_pd(__m256d, __m256d, const int);
_mm256_shuffle_ps AVX immintrin.h __m256 _mm256_shuffle_ps(__m256, __m256, const int);
_mm256_shufflehi_epi16 AVX2 immintrin.h __m256i _mm256_shufflehi_epi16(__m256i, const int);
_mm256_shufflelo_epi16 AVX2 immintrin.h __m256i _mm256_shufflelo_epi16(__m256i, const int);
_mm256_sign_epi16 AVX2 immintrin.h __m256i _mm256_sign_epi16(__m256i, __m256i);
_mm256_sign_epi32 AVX2 immintrin.h __m256i _mm256_sign_epi32(__m256i, __m256i);
_mm256_sign_epi8 AVX2 immintrin.h __m256i _mm256_sign_epi8(__m256i, __m256i);
_mm256_sll_epi16 AVX2 immintrin.h __m256i _mm256_sll_epi16(__m256i, __m128i);
_mm256_sll_epi32 AVX2 immintrin.h __m256i _mm256_sll_epi32(__m256i, __m128i);
_mm256_sll_epi64 AVX2 immintrin.h __m256i _mm256_sll_epi64(__m256i, __m128i);
_mm256_slli_epi16 AVX2 immintrin.h __m256i _mm256_slli_epi16(__m256i, int);
_mm256_slli_epi32 AVX2 immintrin.h __m256i _mm256_slli_epi32(__m256i, int);
_mm256_slli_epi64 AVX2 immintrin.h __m256i _mm256_slli_epi64(__m256i, int);
_mm256_slli_si256 AVX2 immintrin.h __m256i _mm256_slli_si256(__m256i, int);
_mm256_sllv_epi32 AVX2 immintrin.h __m256i _mm256_sllv_epi32(__m256i, __m256i);
_mm256_sllv_epi64 AVX2 immintrin.h __m256i _mm256_sllv_epi64(__m256i, __m256i);
_mm256_sqrt_pd AVX immintrin.h __m256d _mm256_sqrt_pd(__m256d);
_mm256_sqrt_ps AVX immintrin.h __m256 _mm256_sqrt_ps(__m256);
_mm256_sra_epi16 AVX2 immintrin.h __m256i _mm256_sra_epi16(__m256i, __m128i);
_mm256_sra_epi32 AVX2 immintrin.h __m256i _mm256_sra_epi32(__m256i, __m128i);
_mm256_srai_epi16 AVX2 immintrin.h __m256i _mm256_srai_epi16(__m256i, int);
_mm256_srai_epi32 AVX2 immintrin.h __m256i _mm256_srai_epi32(__m256i, int);
_mm256_srav_epi32 AVX2 immintrin.h __m256i _mm256_srav_epi32(__m256i, __m256i);
_mm256_srl_epi16 AVX2 immintrin.h __m256i _mm256_srl_epi16(__m256i, __m128i);
_mm256_srl_epi32 AVX2 immintrin.h __m256i _mm256_srl_epi32(__m256i, __m128i);
_mm256_srl_epi64 AVX2 immintrin.h __m256i _mm256_srl_epi64(__m256i, __m128i);
_mm256_srli_epi16 AVX2 immintrin.h __m256i _mm256_srli_epi16(__m256i, int);
_mm256_srli_epi32 AVX2 immintrin.h __m256i _mm256_srli_epi32(__m256i, int);
_mm256_srli_epi64 AVX2 immintrin.h __m256i _mm256_srli_epi64(__m256i, int);
_mm256_srli_si256 AVX2 immintrin.h __m256i _mm256_srli_si256(__m256i, int);
_mm256_srlv_epi32 AVX2 immintrin.h __m256i _mm256_srlv_epi32(__m256i, __m256i);
_mm256_srlv_epi64 AVX2 immintrin.h __m256i _mm256_srlv_epi64(__m256i, __m256i);
_mm256_store_pd AVX immintrin.h void _mm256_store_pd(double *, __m256d);
_mm256_store_ps AVX immintrin.h void _mm256_store_ps(float *, __m256);
_mm256_store_si256 AVX immintrin.h void _mm256_store_si256(__m256i *, __m256i);
_mm256_storeu_pd AVX immintrin.h void _mm256_storeu_pd(double *, __m256d);
_mm256_storeu_ps AVX immintrin.h void _mm256_storeu_ps(float *, __m256);
_mm256_storeu_si256 AVX immintrin.h void _mm256_storeu_si256(__m256i *, __m256i);
_mm256_stream_load_si256 AVX2 immintrin.h __m256i _mm256_stream_load_si256(__m256i const *);
_mm256_stream_pd AVX immintrin.h void __mm256_stream_pd(double *, __m256d);
_mm256_stream_ps AVX immintrin.h void _mm256_stream_ps(float *, __m256);
_mm256_stream_si256 AVX immintrin.h void __mm256_stream_si256(__m256i *, __m256i);
_mm256_sub_epi16 AVX2 immintrin.h __m256i _mm256_sub_epi16(__m256i, __m256i);
_mm256_sub_epi32 AVX2 immintrin.h __m256i _mm256_sub_epi32(__m256i, __m256i);
_mm256_sub_epi64 AVX2 immintrin.h __m256i _mm256_sub_epi64(__m256i, __m256i);
_mm256_sub_epi8 AVX2 immintrin.h __m256i _mm256_sub_epi8(__m256i, __m256i);
_mm256_sub_pd AVX immintrin.h __m256d _mm256_sub_pd(__m256d, __m256d);
_mm256_sub_ps AVX immintrin.h __m256 _mm256_sub_ps(__m256, __m256);
_mm256_subs_epi16 AVX2 immintrin.h __m256i _mm256_subs_epi16(__m256i, __m256i);
_mm256_subs_epi8 AVX2 immintrin.h __m256i _mm256_subs_epi8(__m256i, __m256i);
_mm256_subs_epu16 AVX2 immintrin.h __m256i _mm256_subs_epu16(__m256i, __m256i);
_mm256_subs_epu8 AVX2 immintrin.h __m256i _mm256_subs_epu8(__m256i, __m256i);
_mm256_testc_pd AVX immintrin.h int _mm256_testc_pd(__m256d, __m256d);
_mm256_testc_ps AVX immintrin.h int _mm256_testc_ps(__m256, __m256);
_mm256_testc_si256 AVX immintrin.h int _mm256_testc_si256(__m256i, __m256i);
_mm256_testnzc_pd AVX immintrin.h int _mm256_testnzc_pd(__m256d, __m256d);
_mm256_testnzc_ps AVX immintrin.h int _mm256_testnzc_ps(__m256, __m256);
_mm256_testnzc_si256 AVX immintrin.h int _mm256_testnzc_si256(__m256i, __m256i);
_mm256_testz_pd AVX immintrin.h int _mm256_testz_pd(__m256d, __m256d);
_mm256_testz_ps AVX immintrin.h int _mm256_testz_ps(__m256, __m256);
_mm256_testz_si256 AVX immintrin.h int _mm256_testz_si256(__m256i, __m256i);
_mm256_unpackhi_epi16 AVX2 immintrin.h __m256i _mm256_unpackhi_epi16(__m256i, __m256i);
_mm256_unpackhi_epi32 AVX2 immintrin.h __m256i _mm256_unpackhi_epi32(__m256i, __m256i);
_mm256_unpackhi_epi64 AVX2 immintrin.h __m256i _mm256_unpackhi_epi64(__m256i, __m256i);
_mm256_unpackhi_epi8 AVX2 immintrin.h __m256i _mm256_unpackhi_epi8(__m256i, __m256i);
_mm256_unpackhi_pd AVX immintrin.h __m256d _mm256_unpackhi_pd(__m256d, __m256d);
_mm256_unpackhi_ps AVX immintrin.h __m256 _mm256_unpackhi_ps(__m256, __m256);
_mm256_unpacklo_epi16 AVX2 immintrin.h __m256i _mm256_unpacklo_epi16(__m256i, __m256i);
_mm256_unpacklo_epi32 AVX2 immintrin.h __m256i _mm256_unpacklo_epi32(__m256i, __m256i);
_mm256_unpacklo_epi64 AVX2 immintrin.h __m256i _mm256_unpacklo_epi64(__m256i, __m256i);
_mm256_unpacklo_epi8 AVX2 immintrin.h __m256i _mm256_unpacklo_epi8(__m256i, __m256i);
_mm256_unpacklo_pd AVX immintrin.h __m256d _mm256_unpacklo_pd(__m256d, __m256d);
_mm256_unpacklo_ps AVX immintrin.h __m256 _mm256_unpacklo_ps(__m256, __m256);
_mm256_xor_pd AVX immintrin.h __m256d _mm256_xor_pd(__m256d, __m256d);
_mm256_xor_ps AVX immintrin.h __m256 _mm256_xor_ps(__m256, __m256);
_mm256_xor_si256 AVX2 immintrin.h __m256i _mm256_xor_si256(__m256i, __m256i);
_mm256_zeroall AVX immintrin.h void _mm256_zeroall(void);
_mm256_zeroupper AVX immintrin.h void _mm256_zeroupper(void);
__movsb intrin.h void __movsb(unsigned char *, unsigned char const *, size_t);
__movsd intrin.h void __movsd(unsigned long *, unsigned long const *, size_t);
__movsw intrin.h void __movsw(unsigned short *, unsigned short const *, size_t);
_mulx_u32 BMI immintrin.h unsigned int _mulx_u32(unsigned int, unsigned int, unsigned int*);
__nop intrin.h void __nop(void);
__nvreg_restore_fence intrin.h void __nvreg_restore_fence(void);
__nvreg_save_fence intrin.h void __nvreg_save_fence(void);
__outbyte intrin.h void __outbyte(unsigned short, unsigned char);
__outbytestring intrin.h void __outbytestring(unsigned short, unsigned char *, unsigned long);
__outdword intrin.h void __outdword(unsigned short, unsigned long);
__outdwordstring intrin.h void __outdwordstring(unsigned short, unsigned long *, unsigned long);
__outword intrin.h void __outword(unsigned short, unsigned short);
__outwordstring intrin.h void __outwordstring(unsigned short, unsigned short *, unsigned long);
_pdep_u32 BMI immintrin.h unsigned int _pdep_u32(unsigned int, unsigned int);
_pext_u32 BMI immintrin.h unsigned int _pext_u32(unsigned int, unsigned int);
__popcnt POPCNT intrin.h unsigned int __popcnt(unsigned int);
__popcnt16 POPCNT intrin.h unsigned short __popcnt16(unsigned short);
_rdrand16_step RDRAND immintrin.h int _rdrand16_step(unsigned short *);
_rdrand32_step RDRAND immintrin.h int _rdrand32_step(unsigned int *);
_rdseed16_step RDSEED immintrin.h int _rdseed16_step(unsigned short *);
_rdseed32_step RDSEED immintrin.h int _rdseed32_step(unsigned int *);
__rdtsc intrin.h unsigned __int64 __rdtsc(void);
__rdtscp RDTSCP intrin.h unsigned __int64 __rdtscp(unsigned int*);
_ReadBarrier intrin.h void _ReadBarrier(void);
__readcr0 intrin.h unsigned long __readcr0(void);
__readcr2 intrin.h unsigned long __readcr2(void);
__readcr3 intrin.h unsigned long __readcr3(void);
__readcr4 intrin.h unsigned long __readcr4(void);
__readcr8 intrin.h unsigned long __readcr8(void);
__readdr intrin.h unsigned __readdr(unsigned);
__readeflags intrin.h unsigned __readeflags(void);
__readfsbyte intrin.h unsigned char __readfsbyte(unsigned long);
__readfsdword intrin.h unsigned long __readfsdword(unsigned long);
__readfsword intrin.h unsigned short __readfsword(unsigned long);
__readmsr intrin.h unsigned __int64 __readmsr(unsigned long);
__readpmc intrin.h unsigned __int64 __readpmc(unsigned long);
_ReadWriteBarrier intrin.h void _ReadWriteBarrier(void);
_ReturnAddress intrin.h void * _ReturnAddress(void);
_rorx_u32 BMI immintrin.h unsigned int _rorx_u32(unsigned int, const unsigned int);
_rotl16 intrin.h unsigned short _rotl16(unsigned short, unsigned char);
_rotl8 intrin.h unsigned char _rotl8(unsigned char, unsigned char);
_rotr16 intrin.h unsigned short _rotr16(unsigned short, unsigned char);
_rotr8 intrin.h unsigned char _rotr8(unsigned char, unsigned char);
_rsm intrin.h void _rsm(void);
_sarx_i32 BMI immintrin.h int _sarx_i32(int, unsigned int);
__segmentlimit intrin.h unsigned long __segmentlimit(unsigned long);
_sgdt intrin.h void _sgdt(void*);
_shlx_u32 BMI immintrin.h unsigned int _shlx_u32(unsigned int, unsigned int);
_shrx_u32 BMI immintrin.h unsigned int _shrx_u32(unsigned int, unsigned int);
__sidt intrin.h void __sidt(void*);
__slwpcb LWP ammintrin.h void *__slwpcb(void);
_stac SMAP intrin.h void _stac(void);
_storebe_i16 MOVBE immintrin.h void _storebe_i16(void *, short); [Macro]
_storebe_i32 MOVBE immintrin.h void _storebe_i32(void *, int); [Macro]
_store_be_u16 MOVBE immintrin.h void _store_be_u16(void *, unsigned short); [Macro]
_store_be_u32 MOVBE immintrin.h void _store_be_u32(void *, unsigned int); [Macro]
_Store_HLERelease HLE immintrin.h void _Store_HLERelease(long volatile *, long);
_StorePointer_HLERelease HLE immintrin.h void _StorePointer_HLERelease(void * volatile *, void *);
__stosb intrin.h void __stosb(unsigned char *, unsigned char, size_t);
__stosd intrin.h void __stosd(unsigned long *, unsigned long, size_t);
__stosw intrin.h void __stosw(unsigned short *, unsigned short, size_t);
_subborrow_u16 intrin.h unsigned char _subborrow_u16(unsigned char, unsigned short, unsigned short, unsigned short *);
_subborrow_u32 intrin.h unsigned char _subborrow_u32(unsigned char, unsigned int, unsigned int, unsigned int *);
_subborrow_u8 intrin.h unsigned char _subborrow_u8(unsigned char, unsigned char, unsigned char, unsigned char *);
__svm_clgi intrin.h void __svm_clgi(void);
__svm_invlpga intrin.h void __svm_invlpga(void*, int);
__svm_skinit intrin.h void __svm_skinit(int);
__svm_stgi intrin.h void __svm_stgi(void);
__svm_vmload intrin.h void __svm_vmload(size_t);
__svm_vmrun intrin.h void __svm_vmrun(size_t);
__svm_vmsave intrin.h void __svm_vmsave(size_t);
_t1mskc_u32 ABM ammintrin.h unsigned int _t1mskc_u32(unsigned int);
_tzcnt_u32 BMI ammintrin.h, immintrin.h unsigned int _tzcnt_u32(unsigned int);
_tzmsk_u32 ABM ammintrin.h unsigned int _tzmsk_u32(unsigned int);
__ud2 intrin.h void __ud2(void);
_udiv64 intrin.h unsigned int _udiv64(unsigned __int64, unsigned int, unsigned int *);
__ull_rshift intrin.h unsigned __int64 [pascal/cdecl] __ull_rshift(unsigned __int64, int);
__vmx_off intrin.h void __vmx_off(void);
__vmx_vmptrst intrin.h void __vmx_vmptrst(unsigned __int64 *);
__wbinvd intrin.h void __wbinvd(void);
_WriteBarrier intrin.h void _WriteBarrier(void);
__writecr0 intrin.h void __writecr0(unsigned long);
__writecr3 intrin.h void __writecr3(unsigned long);
__writecr4 intrin.h void __writecr4(unsigned long);
__writecr8 intrin.h void __writecr8(unsigned long);
__writedr intrin.h void __writedr(unsigned, unsigned);
__writeeflags intrin.h void __writeeflags(unsigned);
__writefsbyte intrin.h void __writefsbyte(unsigned long, unsigned char);
__writefsdword intrin.h void __writefsdword(unsigned long, unsigned long);
__writefsword intrin.h void __writefsword(unsigned long, unsigned short);
__writemsr intrin.h void __writemsr(unsigned long, unsigned __int64);
_xabort RTM immintrin.h void _xabort(unsigned int);
_xbegin RTM immintrin.h unsigned _xbegin(void);
_xend RTM immintrin.h void _xend(void);
_xgetbv XSAVE immintrin.h unsigned __int64 _xgetbv(unsigned int);
_xrstor XSAVE immintrin.h void _xrstor(void const*, unsigned __int64);
_xsave XSAVE immintrin.h void _xsave(void*, unsigned __int64);
_xsaveopt XSAVEOPT immintrin.h void _xsaveopt(void*, unsigned __int64);
_xsetbv XSAVE immintrin.h void _xsetbv(unsigned int, unsigned __int64);
_xtest XTEST immintrin.h unsigned char _xtest(void);

See also

Compiler intrinsics
ARM intrinsics
ARM64 intrinsics
x64 (amd64) intrinsics