_mm_maddubs_pi16
Microsoft Specific
Emits the Supplemental Streaming SIMD Extensions 3 (SSSE3) instruction pmaddubsw. This instruction multiplies and adds integers.
__m64 _mm_maddubs_pi16(
__m64 a,
__m64 b
);
Parameters
[in] a
A 64-bit parameter that contains eight 8-bit unsigned integers.[in] b
A 64-bit parameter that contains eight 8-bit signed integers.
Return value
A 64-bit result that contains four 16-bit signed integers, where each result element represents the saturated sum of adjacent SIMD products. This can expressed with the following equations:
r0 := SATURATE_16((a0 * b0) + (a1 * b1))
r1 := SATURATE_16((a2 * b2) + (a3 * b3))
r2 := SATURATE_16((a4 * b4) + (a5 * b5))
r2 := SATURATE_16((a6 * b6) + (a7 * b7))
Requirements
Intrinsic |
Architecture |
---|---|
_mm_maddubs_pi16 |
x86, x64 |
Header file <tmmintrin.h>
Remarks
r0-r3 are the sequentially ordered 16-bit components of return value r. r0 indicates the least significant 16 bits.
a0-a7 and b0-b7 are the sequentially ordered 8-bit components of parameters a and b, respectively. a0 and b0 are the least significant 8 bits. Parameter a contains unsigned bytes. Parameter b contains signed bytes.
SATURATE_16(x) is ((x > 32767) ? 32767 : ((x < -32768) ? -32768 : x))
Before you use this intrinsic, software must ensure that the underlying processor supports the instruction.
Example
#include <stdio.h>
#include <tmmintrin.h>
int main ()
{
__m64 a, b, final;
int temp;
a.m64_u8[0] = 1;
b.m64_i8[0] = 127;
a.m64_u8[1] = 1;
b.m64_i8[1] = -127;
temp = (a.m64_u8[0] * b.m64_i8[0]) + (a.m64_u8[1] * b.m64_i8[1]);
final.m64_i16[0] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m64_u8[2] = 255;
b.m64_i8[2] = 127;
a.m64_u8[3] = 255;
b.m64_i8[3] = 127;
temp = (a.m64_u8[2] * b.m64_i8[2]) + (a.m64_u8[3] * b.m64_i8[3]);
final.m64_i16[1] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m64_u8[4] = 2;
b.m64_i8[4] = -4;
a.m64_u8[5] = 16;
b.m64_i8[5] = 2;
temp = (a.m64_u8[4] * b.m64_i8[4]) + (a.m64_u8[5] * b.m64_i8[5]);
final.m64_i16[2] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m64_u8[6] = 0;
b.m64_i8[6] = -128;
a.m64_u8[7] = 3;
b.m64_i8[7] = -15;
temp = (a.m64_u8[6] * b.m64_i8[6]) + (a.m64_u8[7] * b.m64_i8[7]);
final.m64_i16[3] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
__m64 res = _mm_maddubs_pi16(a, b);
printf_s("Res0 should be %d: %d\nRes1 should be %d: %d\n",
final.m64_i16[0], res.m64_i16[0], final.m64_i16[1], res.m64_i16[1]);
printf_s("Res2 should be %d: %d\nRes3 should be %d: %d\n",
final.m64_i16[2], res.m64_i16[2], final.m64_i16[3], res.m64_i16[3]);
_mm_empty();
return 0;
}
Res0 should be 0: 0 Res1 should be 32767: 32767 Res2 should be 24: 24 Res3 should be -45: -45