_mm_maddubs_epi16
Microsoft Specific
Emits the Supplemental Streaming SIMD Extensions 3 (SSSE3) instruction pmaddubsw. This instruction multiplies and adds integers.
__m128i _mm_maddubs_epi16(
__m128i a,
__m128i b
);
Parameters
[in] a
A 128-bit parameter that contains sixteen 8-bit unsigned integers.[in] b
A 128-bit parameter that contains sixteen 8-bit signed integers.
Return value
A 128-bit result that contains eight 16-bit signed integers, where each result element represents the saturated sum of adjacent SIMD products. This can expressed with the following equations:
r0 := SATURATE_16((a0 * b0) + (a1 * b1))
r1 := SATURATE_16((a2 * b2) + (a3 * b3))
...
r7 := SATURATE_16((a14 * b14) + (a15 * b15))
Requirements
Intrinsic |
Architecture |
---|---|
_mm_maddubs_epi16 |
x86, x64 |
Header file <tmmintrin.h>
Remarks
r0-r7 are the sequentially ordered 16-bit components of return value r. r0 indicates the least significant 16 bits.
a0-a15 and b0-b15 are the sequentially ordered 8-bit components of parameters a and b, respectively. a0 and b0 are the least significant 8 bits. Parameter a contains unsigned bytes. Parameter b contains signed bytes.
SATURATE_16(x) is ((x > 32767) ? 32767 : ((x < -32768) ? -32768 : x))
Before you use this intrinsic, software must ensure that the underlying processor supports the instruction.
Example
#include <stdio.h>
#include <tmmintrin.h>
int main ()
{
__m128i a, b, final;
int temp;
a.m128i_u8[0] = 1;
b.m128i_i8[0] = 32;
a.m128i_u8[1] = 1;
b.m128i_i8[1] = -32;
temp = (a.m128i_u8[0] * b.m128i_i8[0]) + (a.m128i_u8[1] * b.m128i_i8[1]);
final.m128i_i16[0] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[2] = 1;
b.m128i_i8[2] = 2;
a.m128i_u8[3] = 2;
b.m128i_i8[3] = 4;
temp = (a.m128i_u8[2] * b.m128i_i8[2]) + (a.m128i_u8[3] * b.m128i_i8[3]);
final.m128i_i16[1] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[4] = 10;
b.m128i_i8[4] = -128;
a.m128i_u8[5] = 12;
b.m128i_i8[5] = 12;
temp = (a.m128i_u8[4] * b.m128i_i8[4]) + (a.m128i_u8[5] * b.m128i_i8[5]);
final.m128i_i16[2] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[6] = 255;
b.m128i_i8[6] = -128;
a.m128i_u8[7] = 255;
b.m128i_i8[7] = -128;
temp = (a.m128i_u8[6] * b.m128i_i8[6]) + (a.m128i_u8[7] * b.m128i_i8[7]);
final.m128i_i16[3] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[8] = 0;
b.m128i_i8[8] = 100;
a.m128i_u8[9] = 20;
b.m128i_i8[9] = 20;
temp = (a.m128i_u8[8] * b.m128i_i8[8]) + (a.m128i_u8[9] * b.m128i_i8[9]);
final.m128i_i16[4] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[10] = 10;
b.m128i_i8[10] = 10;
a.m128i_u8[11] = 11;
b.m128i_i8[11] = 11;
temp = (a.m128i_u8[10] * b.m128i_i8[10]) + (a.m128i_u8[11] * b.m128i_i8[11]);
final.m128i_i16[5] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[12] = 12;
b.m128i_i8[12] = 12;
a.m128i_u8[13] = 13;
b.m128i_i8[13] = 13;
temp = (a.m128i_u8[12] * b.m128i_i8[12]) + (a.m128i_u8[13] * b.m128i_i8[13]);
final.m128i_i16[6] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
a.m128i_u8[14] = 14;
b.m128i_i8[14] = 14;
a.m128i_u8[15] = 15;
b.m128i_i8[15] = 15;
temp = (a.m128i_u8[14] * b.m128i_i8[14]) + (a.m128i_u8[15] * b.m128i_i8[15]);
final.m128i_i16[7] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;
__m128i res = _mm_maddubs_epi16(a, b);
printf_s("Res0 should be %d: %d\nRes1 should be %d: %d\n",
final.m128i_i16[0], res.m128i_i16[0], final.m128i_i16[1], res.m128i_i16[1]);
printf_s("Res2 should be %d: %d\nRes3 should be %d: %d\n",
final.m128i_i16[2], res.m128i_i16[2], final.m128i_i16[3], res.m128i_i16[3]);
printf_s("Res4 should be %d: %d\nRes5 should be %d: %d\n",
final.m128i_i16[4], res.m128i_i16[4], final.m128i_i16[5], res.m128i_i16[5]);
printf_s("Res6 should be %d: %d\nRes7 should be %d: %d\n",
final.m128i_i16[6], res.m128i_i16[6], final.m128i_i16[7], res.m128i_i16[7]);
return 0;
}
Res0 should be 0: 0
Res1 should be 10: 10
Res2 should be -1136: -1136
Res3 should be -32768: -32768
Res4 should be 400: 400
Res5 should be 221: 221
Res6 should be 313: 313
Res7 should be 421: 421