Initial support for vector addition on ARM32.

author Eric Holk <eholk@chromium.org>

Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)

committer Eric Holk <eholk@chromium.org>

Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)
author Eric Holk <eholk@chromium.org>
Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)
committer Eric Holk <eholk@chromium.org>
Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp

index 5f4991f..03b65dd 100644 (file)
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -603,16 +603,17 @@ template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
    switch (Dest->getType()) {
    default:
      // TODO(kschimpf) Figure if more cases are needed.
-    Asm->setNeedsTextFixup();
+    emitUsingTextFixup(Func);
      break;
    case IceType_f32:
      Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
      break;
    case IceType_f64:
      Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
      break;
    }
-  assert(!Asm->needsTextFixup());
  }
  
  template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const {
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index 2d998ed..5e7bdfd 100644 (file)
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -2798,11 +2798,17 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
    }
  
    if (isVectorType(DestTy)) {
-    UnimplementedLoweringError(this, Instr);
-    return;
+    switch (Instr->getOp()) {
+    default:
+      UnimplementedLoweringError(this, Instr);
+      return;
+    // Explicitly whitelist vector instructions we have implemented/enabled.
+    case InstArithmetic::Fadd:
+    case InstArithmetic::Add:
+      break;
+    }
    }
  
-  // DestTy is a non-i64 scalar.
    Variable *T = makeReg(DestTy);
  
    // * Handle div/rem separately. They require a non-legalized Src1 to inspect
@@ -2900,6 +2906,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
      return;
    case InstArithmetic::Add: {
      if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
+      assert(!isVectorType(DestTy));
        Variable *Src0R = legalizeToReg(Src0);
        Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
        Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
@@ -2911,6 +2918,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
      if (Srcs.hasConstOperand()) {
        if (!Srcs.immediateIsFlexEncodable() &&
            Srcs.negatedImmediateIsFlexEncodable()) {
+        assert(!isVectorType(DestTy));
          Variable *Src0R = Srcs.src0R(this);
          Operand *Src1F = Srcs.negatedSrc1F(this);
          if (!Srcs.swappedOperands()) {
@@ -2923,8 +2931,13 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
        }
      }
      Variable *Src0R = Srcs.src0R(this);
-    Operand *Src1RF = Srcs.src1RF(this);
-    _add(T, Src0R, Src1RF);
+    if (isVectorType(DestTy)) {
+      Variable *Src1R = legalizeToReg(Src1);
+      _vadd(T, Src0R, Src1R);
+    } else {
+      Operand *Src1RF = Srcs.src1RF(this);
+      _add(T, Src0R, Src1RF);
+    }
      _mov(Dest, T);
      return;
    }
diff --git a/tests_lit/assembler/arm32/add-vec.ll b/tests_lit/assembler/arm32/add-vec.ll

new file mode 100644 (file)

index 0000000..c434190
--- /dev/null
+++ b/tests_lit/assembler/arm32/add-vec.ll
@@ -0,0 +1,85 @@
+; Show that we know how to translate vadd vector instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   -reg-use q10,q11 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal <4 x float> @testVaddFloat4(<4 x float> %v1, <4 x float> %v2) {
+; ASM-LABEL: testVaddFloat4:
+; DIS-LABEL: 00000000 <testVaddFloat4>:
+; IASM-LABEL: testVaddFloat4:
+
+entry:
+  %res = fadd <4 x float> %v1, %v2
+
+; ASM:     vadd.f32        q10, q10, q11
+; DIS:   8:       f2444de6
+; IASM:     vadd.f32
+
+  ret <4 x float> %res
+}
+
+define internal <4 x i32> @testVadd4i32(<4 x i32> %v1, <4 x i32> %v2) {
+; ASM-LABEL: testVadd4i32:
+; DIS-LABEL: 00000020 <testVadd4i32>:
+; IASM-LABEL: testVadd4i32:
+
+entry:
+  %res = add <4 x i32> %v1, %v2
+
+; ASM:     vadd.i32        q10, q10, q11
+; DIS:   28:       f26448e6
+; IASM:     vadd.i32
+
+  ret <4 x i32> %res
+}
+
+define internal <8 x i16> @testVadd8i16(<8 x i16> %v1, <8 x i16> %v2) {
+; ASM-LABEL: testVadd8i16:
+; DIS-LABEL: 00000040 <testVadd8i16>:
+; IASM-LABEL: testVadd8i16:
+
+entry:
+  %res = add <8 x i16> %v1, %v2
+
+; ASM:     vadd.i16        q10, q10, q11
+; DIS:   48:       f25448e6
+; IASM:     vadd.i16
+
+  ret <8 x i16> %res
+}
+
+define internal <16 x i8> @testVadd16i8(<16 x i8> %v1, <16 x i8> %v2) {
+; ASM-LABEL: testVadd16i8:
+; DIS-LABEL: 00000060 <testVadd16i8>:
+; IASM-LABEL: testVadd16i8:
+
+entry:
+  %res = add <16 x i8> %v1, %v2
+
+; ASM:     vadd.i8        q10, q10, q11
+; DIS:   68:       f24448e6
+; IASM:     vadd.i8
+
+  ret <16 x i8> %res
+}
diff --git a/tests_lit/assembler/arm32/vldr-vector.ll b/tests_lit/assembler/arm32/vldr-vector.ll

new file mode 100644 (file)

index 0000000..5bd0a04
--- /dev/null
+++ b/tests_lit/assembler/arm32/vldr-vector.ll
@@ -0,0 +1,47 @@
+; Show that we know how to translate vector load instructions.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal <4 x float> @testDerefFloat4(<4 x float> *%p) {
+; ASM-LABEL: testDerefFloat4:
+; DIS-LABEL: 00000000 <testDerefFloat4>:
+; IASM-LABEL: testDerefFloat4:
+
+entry:
+  %ret = load <4 x float>, <4 x float>* %p, align 4
+; ASM:     vld1.64     q0, [r0]
+; DIS:   0:       f4200acf
+
+  ret <4 x float> %ret
+}
+
+define internal <4 x i32> @testDeref4i32(<4 x i32> *%p) {
+; ASM-LABEL: testDeref4i32:
+; DIS-LABEL: 00000010 <testDeref4i32>:
+; IASM-LABEL: testDeref4i32:
+
+entry:
+  %ret = load <4 x i32>, <4 x i32>* %p, align 4
+; ASM:     vld1.64     q0, [r0]
+; DIS:   10:       f4200acf
+
+  ret <4 x i32> %ret
+}
author	Eric Holk <eholk@chromium.org>
	Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)
committer	Eric Holk <eholk@chromium.org>
	Tue, 26 Jan 2016 18:10:39 +0000 (10:10 -0800)
src/IceInstARM32.cpp		patch \| blob \| history
src/IceTargetLoweringARM32.cpp		patch \| blob \| history
tests_lit/assembler/arm32/add-vec.ll	[new file with mode: 0644]	patch \| blob
tests_lit/assembler/arm32/vldr-vector.ll	[new file with mode: 0644]	patch \| blob