LIBS = -lm
OBJS = main.o fx32_vector.o fr32_vector_add.o fx_vector_test.o fr32_vector_add_svv.o \
fr32_vector_sub.o fr32_vector_sub_svv.o fr32_vector_mul.o fr32_vector_mul_svv.o \
- fr32_vector_complex_mul.o fr32_vector_neg.S
+ fr32_vector_complex_mul.o fr32_vector_neg.S fr32_vector_fill.S
all: a.out
--- /dev/null
+/*
+* 32bit vector negate implementation.
+*
+* function prototype
+* void fr32_vector_neg(
+* const fract32 a[],
+* fract32 b[],
+* int count);
+*
+* parameters
+* FP+16 R2 count
+* FP+12 R1 const fr32 b[]
+* FP+ 8 R0 const fr32 a[]
+*
+* return
+* none
+*
+* side effect
+* out[] : obtain output data
+*
+
+* register layout
+* P1 : count : loop counter's initial value
+* I0 : a
+* I1 : b
+* R0 : a[i]
+* R1 : -a[i]
+*/
+
+ .text
+ .align 4
+ .global _fr32_vector_neg;
+ .type _fr32_vector_neg, STT_FUNC;
+
+_fr32_vector_neg:
+ link 0;
+
+ /* Set up registers */
+ i0 = r0; // load a
+ i1 = r1; // load b
+ p1 = R2; // load count
+ p1 += -1;
+
+ /* outer loop */
+ r0 = [i0++];
+
+ loop count lc0 = p1; // Todo : can be 3 parallel instruction. But seems to be simulator bug in 2013RC1RC
+ loop_begin count;
+ r1 = - r0(s) ;
+ r0 = [i0++] || [i1++] = r1
+ loop_end count;
+ r1 = - r0(s) ;
+ [i1++] = r1;
+ /* end of outer loop */
+
+
+ unlink;
+ rts;
+ .size _fr32_vector_neg, .-_fr32_vector_neg
/*
-* 32bit vector negate implementation.
+* 32bit vector fill implementation.
*
* function prototype
-* void fr32_vector_neg(
-* const fract32 a[],
+* void fr32_vector_fill(
+* const fract32 a,
* fract32 b[],
* int count);
*
* parameters
* FP+16 R2 count
* FP+12 R1 const fr32 b[]
-* FP+ 8 R0 const fr32 a[]
+* FP+ 8 R0 const fr32 a
*
* return
* none
* register layout
* P1 : count : loop counter's initial value
-* I0 : a
* I1 : b
-* R0 : a[i]
-* R1 : -a[i]
+* R0 : a
*/
.text
.align 4
- .global _fr32_vector_neg;
- .type _fr32_vector_neg, STT_FUNC;
+ .global _fr32_vector_fill;
+ .type _fr32_vector_fill, STT_FUNC;
-_fr32_vector_neg:
+_fr32_vector_fill:
link 0;
/* Set up registers */
- i0 = r0; // load a
i1 = r1; // load b
p1 = R2; // load count
- p1 += -1;
/* outer loop */
- r0 = [i0++];
- loop count lc0 = p1; // Todo : can be 3 parallel instruction. But seems to be simulator bug in 2013RC1RC
+ loop count lc0 = p1;
loop_begin count;
- r1 = - r0(s) ;
- r0 = [i0++] || [i1++] = r1
+ [i1++] = r0;
loop_end count;
- r1 = - r0(s) ;
- [i1++] = r1;
/* end of outer loop */
unlink;
rts;
- .size _fr32_vector_neg, .-_fr32_vector_neg
+ .size _fr32_vector_fill, .-_fr32_vector_fill
#undef TAPS_08
#undef NUMSAMPLE_08
+/*
+ * Basic test to see scalar + vector addition.
+ */
+#define NUMSAMPLE_09 4
+
+
+
+fract32 desired_09[NUMSAMPLE_09] =
+ {
+ 0x3,
+ 0x3,
+ 0x3,
+ 0x00000000, // 0 for count test
+ };
+
+void test_09_fr32_vector_fill()
+{
+ fract32 output[NUMSAMPLE_09];
+ int i;
+
+
+ // clear output buffer
+ clearBuffer( output, NUMSAMPLE_09);
+ // test subtraction. Sample is less than NUMSAMPLE_09 to test the count parameter
+ fr32_vector_fill( 3, output, NUMSAMPLE_09-1);
+
+ for ( i=0; i<NUMSAMPLE_09; i++)
+ {
+ if ( output[i] != desired_09[i] )
+ {
+ printf( "test_09 NG :output[%2d] = 0x%08X but should be 0x%08X\n", i, output[i], desired_09[i] );
+ return;
+ }
+ }
+ printf ("test_09 OK\n");
+}
+
+#undef TAPS_09
+#undef NUMSAMPLE_09
+
void test_06_fr32_vector_mul_svv();
void test_07_fr32_vector_complex_mul();
void test_08_fr32_vector_neg();
+void test_09_fr32_vector_fill();
#endif /* FX_VECTOR_TEST_H_ */
test_06_fr32_vector_mul_svv();
test_07_fr32_vector_complex_mul();
test_08_fr32_vector_neg();
+ test_09_fr32_vector_fill();