SH-4 Inline Assembly Code Examples (Windows CE 5.0)
The following sections provide examples of SH-4 inline assembly code used in common situations.
Pass Address of Floating Point Parameter
The following code example shows how to pass the address of float arguments to __asm call and then how to load the argument to a register within the __asm statement:
#include <stdio.h>
void __asm(const char *, ...);
// Compute x+y+z --> result
float add_trig(float x, float y, float z)
{
float result;
__asm(
"fmov.s @r4, fr0 ; copy x to fr0 \n"
"fmov.s @r5, fr1 ; copy y to fr1 \n"
"fmov.s @r6, fr2 ; copy z to fr2 \n"
"fadd fr0, fr1 ; compute x+y\n"
"fadd fr1, fr2 ; compute x+y+z \n"
"fmov.s fr2, @r7 ; store fr2 into result addr\n",
&x, // pointer to x passed in r4
&y, // pointer to y passed in r5
&z, // pointer to z passed in r6
&result); // pointer to result passed in r7
return result;
}
void main()
{
float retval = add_trig(1.0f, 2.0f,3.0f);
printf("%f\n", retval);
}
Access Double Parameters from Integer Registers
The following code example shows how to access double parameters from integer registers and local argument stack space using inline assembly:
#include <stdio.h>
void __asm(const char *, ...);
// Compute x+y+z --> result
double add_trig(double x, double y, double z)
{
double result;
__asm(
"lds r4, fpul ; load lw-part of x to fpul \n"
"fsts fpul, fr5 ; copy lw-part of x to fr5 \n"
"lds r5, fpul ; load hi-part of x to fpul \n"
"fsts fpul, fr4 ; copy hi-part of x to fr4 \n"
"lds r6, fpul ; load lw-part of y to fpul \n"
"fsts fpul, fr7 ; copy lw-part of y to fr7 \n"
"lds r7, fpul ; load hi-part of y to fpul \n"
"fsts fpul, fr6 ; copy hi-part of y to fr6 \n"
"mov.l @(16,sp),r0 ; load lw-part of z to r0 \n"
"lds r0, fpul ; \n"
"fsts fpul, fr9 ; copy lw-part of z to fr9 \n"
"mov.l @(20,sp),r0 ; load hi-part of z to r0 \n"
"lds r0, fpul ; \n"
"fsts fpul, fr8 ; copy hi-part of z to fr8 \n"
"mov #8, r0 ; prepare to mask the pr bit \n"
"shll16 r0 ; \n"
"sts fpscr,r1 ; \n"
"xor r0, r1 ; toggle pr bit \n"
"lds r1, fpscr ; turn-on pr bit \n"
"fadd dr4, dr6 ; compute x+y \n"
"fadd dr6, dr8 ; compute x+y+z \n"
"xor r0, r1 ; toggle pr bit \n"
"lds r1, fpscr ; turn-off pr bit\n"
"mov.l @(24,sp), r0 ; load result address\n"
"add #4, r0 ; increment result addr by 4\n"
"fmov.s fr8, @r0 ; store hi-part into result addr\n"
"fmov.s fr9, @-r0 ; store lw-part into result addr\n",
x, // passed in r4 and r5
y, // passed in r6 and r7
z, // passed in @(16,sp) and @(20,sp)
&result); // passed in @(24,sp)
return result;
}
void main()
{
double retval = add_trig(1.0, 2.0,3.0);
printf("%g\n", retval);
}
Compute Inner Product of Vectors
The following code example shows how to compute the inner products of the two dimensional vectors:
#include <stdio.h>
void __asm(const char *, ...);
//
// Compute an inner product of v1 and v2 vectors
//
float dot(float *v1, float *v2)
{
float retval[1];
__asm(
"fmov.s @r4+, fr0 ; Load v1 vector into fr0..fr3\n"
"fmov.s @r4+, fr1\n"
"fmov.s @r4+, fr2\n"
"fmov.s @r4+, fr3\n"
"fmov.s @r5+, fr4 ; Load v2 vector into fr4..fr7\n"
"fmov.s @r5+, fr5\n"
"fmov.s @r5+, fr6\n"
"fmov.s @r5+, fr7\n"
"fipr fv0, fv4 ; Do the operation\n"
"fmov.s fr7, @r6 ; Store the return value\n",
v1, // passed in R4
v2, // passed in R5
retval); // passed in R6
return retval[0];
}
void main()
{
float v1[4]={1.0, 2.0, 3.0, 4.0};
float v2[4]={1.0, 2.0, 3.0, 4.0};
float retval;
retval = dot(v1, v2);
printf("retval=%f\n", retval);
}
// range
See Also
Send Feedback on this topic to the authors