__m64_pmpy2l, __m64_pmpy2r
Microsoft Specific
Emit the two forms of the IPF Parallel Multiply (pmpy2) instruction, which performs multiplication in parallel on multiple integers.
__m64 __m64_pmpy2l(
__m64 a,
__m64 b
);
__m64 __m64_pmpy2r(
__m64 a,
__m64 b
);
Parameters
[in] a
An __m64 union containing an array of four 16-bit signed integers.[in] b
An __m64 union containing an array of four 16-bit signed integers.
Return Value
An __m64 union containing two 32-bit signed integers representing the results of the multiplication.
Requirements
Intrinsic |
Architecture |
---|---|
__m64_pmpy2l |
IPF |
__m64_pmpy2r |
IPF |
Header file <intrin.h>
Remarks
The __m64_pmpy2l (left) intrinsic multiplies a[1]*b[1] and a[3]*b[3]. The __m64_pmpy2r (right) intrinsic multiplies a[0]*b[0] and a[2]*b[2]. In both cases, the result is two 32-bit signed integers, which are placed in elements 0 and 1 of the result.
Example
// pmpy2.cpp
// processor: IPF
#include <stdio.h>
#include <intrin.h>
#pragma intrinsic(__m64_pmpy2l, __m64_pmpy2r)
void print16(__int16* ia)
{
printf_s("{ %8d, %8d, %8d, %8d }\n", ia[0], ia[1], ia[2], ia[3]);
}
void print32(__int32* ia)
{
printf_s("{ %8d, %8d }\n", ia[0], ia[1]);
}
int main()
{
__int16 a[4] = { -1, 1, -2, 6000 };
__int16 b[4] = { -1, 2, 10, 15000 };
__int32 cl[2], cr[2], i;
__m64 m, n, result;
printf_s("a: \n");
print16(a);
printf_s("b: \n");
print16(b);
for (int i = 0; i < 4; i++)
{
m.m64_i16[i] = a[i];
n.m64_i16[i] = b[i];
}
printf_s("__m64_pmpy2r computes a[0]*b[0] and a[2]*b[2] : ");
result = __m64_pmpy2r(m, n);
print32(result.m64_i32);
result = __m64_pmpy2l(m, n);
printf_s("__m64_pmpy2l computes a[1]*b[1] and a[3]*b[3] : ");
print32(result.m64_i32);
}
a: { -1, 1, -2, 6000 } b: { -1, 2, 10, 15000 } __m64_pmpy2r computes a[0]*b[0] and a[2]*b[2] : { 1, -20 } __m64_pmpy2l computes a[1]*b[1] and a[3]*b[3] : { 2, 90000000 }