LIBS = -lm
OBJS = main.o fr32_vector_add.o fx_vector_test.o fr32_vector_add_svv.o \
fr32_vector_sub.o fr32_vector_sub_svv.o fr32_vector_mul.o fr32_vector_mul_svv.o \
- fr32_vector_complex_mul.o fr32_vector_neg.S fr32_vector_fill.S
+ fr32_vector_complex_mul.o fr32_vector_neg.o fr32_vector_fill.o
all: a.out
$(CC) $(LDFLAGS) $(OBJS) $(LIBS)
clean:
- rm -f *.o a.out
+ rm -f $(OBJS) a.out
.c.o:
$(CC) $(CCFLAGS) -c $<
/*
-* 32bit vector negate implementation.
+* 32bit vector fill implementation.
*
* function prototype
-* void fr32_vector_neg(
-* const fract32 a[],
+* void fr32_vector_fill(
+* const fract32 a,
* fract32 b[],
* int count);
*
* parameters
* FP+16 R2 count
* FP+12 R1 const fr32 b[]
-* FP+ 8 R0 const fr32 a[]
+* FP+ 8 R0 const fr32 a
*
* return
* none
* register layout
* P1 : count : loop counter's initial value
-* I0 : a
* I1 : b
-* R0 : a[i]
-* R1 : -a[i]
+* R0 : a
*/
.text
.align 4
- .global _fr32_vector_neg;
- .type _fr32_vector_neg, STT_FUNC;
+ .global _fr32_vector_fill;
+ .type _fr32_vector_fill, STT_FUNC;
-_fr32_vector_neg:
+_fr32_vector_fill:
link 0;
/* Set up registers */
- i0 = r0; // load a
i1 = r1; // load b
p1 = R2; // load count
- p1 += -1;
/* outer loop */
- r0 = [i0++];
- loop count lc0 = p1; // Todo : can be 3 parallel instruction. But seems to be simulator bug in 2013RC1RC
+ loop count lc0 = p1;
loop_begin count;
- r1 = - r0(s) ;
- r0 = [i0++] || [i1++] = r1
+ [i1++] = r0;
loop_end count;
- r1 = - r0(s) ;
- [i1++] = r1;
/* end of outer loop */
unlink;
rts;
- .size _fr32_vector_neg, .-_fr32_vector_neg
+ .size _fr32_vector_fill, .-_fr32_vector_fill
/*
-* 32bit vector fill implementation.
+* 32bit vector negate implementation.
*
* function prototype
-* void fr32_vector_fill(
-* const fract32 a,
+* void fr32_vector_neg(
+* const fract32 a[],
* fract32 b[],
* int count);
*
* parameters
* FP+16 R2 count
* FP+12 R1 const fr32 b[]
-* FP+ 8 R0 const fr32 a
+* FP+ 8 R0 const fr32 a[]
*
* return
* none
* register layout
* P1 : count : loop counter's initial value
+* I0 : a
* I1 : b
-* R0 : a
+* R0 : a[i]
+* R1 : -a[i]
*/
.text
.align 4
- .global _fr32_vector_fill;
- .type _fr32_vector_fill, STT_FUNC;
+ .global _fr32_vector_neg;
+ .type _fr32_vector_neg, STT_FUNC;
-_fr32_vector_fill:
+_fr32_vector_neg:
link 0;
/* Set up registers */
+ i0 = r0; // load a
i1 = r1; // load b
p1 = R2; // load count
+ p1 += -1;
/* outer loop */
+ r0 = [i0++];
- loop count lc0 = p1;
+ loop count lc0 = p1; // Todo : can be 3 parallel instruction. But seems to be simulator bug in 2013RC1RC
loop_begin count;
- [i1++] = r0;
+ r1 = - r0(s) ;
+ r0 = [i0++] || [i1++] = r1
loop_end count;
+ r1 = - r0(s) ;
+ [i1++] = r1;
/* end of outer loop */
unlink;
rts;
- .size _fr32_vector_fill, .-_fr32_vector_fill
+ .size _fr32_vector_neg, .-_fr32_vector_neg
+