Add VMLA (floating point) to the integrated ARM assembler.

author Karl Schimpf <kschimpf@google.com>

Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)

committer Karl Schimpf <kschimpf@google.com>

Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)
author Karl Schimpf <kschimpf@google.com>
Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)
committer Karl Schimpf <kschimpf@google.com>
Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)
diff --git a/src/DartARM32/assembler_arm.cc b/src/DartARM32/assembler_arm.cc

index 26983f9..2867ec4 100644 (file)
--- a/src/DartARM32/assembler_arm.cc
+++ b/src/DartARM32/assembler_arm.cc
@@ -977,42 +977,42 @@ void Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
    EmitVFPddd(cond, B21 | B20, dd, dn, dm);
  }
  
-// Moved to Arm32::AssemblerARM32::vmuls()
+// Moved to Arm32::AssemblerARM32::vsubs()
  void Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
                        Condition cond) {
    EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
  }
  
-// Moved to Arm32::AssemblerARM32::vmuld()
+// Moved to Arm32::AssemblerARM32::vsubd()
  void Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
                        Condition cond) {
    EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
  }
-#endif
  
-#if 0
+// Moved to Arm32::AssemblerARM32::vmuls()
  void Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
                        Condition cond) {
    EmitVFPsss(cond, B21, sd, sn, sm);
  }
  
-
+// Moved to Arm32::AssemblerARM32::vmuld()
  void Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
                        Condition cond) {
    EmitVFPddd(cond, B21, dd, dn, dm);
  }
-#endif
  
+// Moved to Arm32::AssemblerARM32::vmlas()
  void Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
                        Condition cond) {
    EmitVFPsss(cond, 0, sd, sn, sm);
  }
  
-
+// Moved to Arm32::AssemblerARM32::vmlad()
  void Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
                        Condition cond) {
    EmitVFPddd(cond, 0, dd, dn, dm);
  }
+#endif
  
  
  void Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
diff --git a/src/DartARM32/assembler_arm.h b/src/DartARM32/assembler_arm.h

index db9ae94..da42a1e 100644 (file)
--- a/src/DartARM32/assembler_arm.h
+++ b/src/DartARM32/assembler_arm.h
@@ -686,8 +686,12 @@ class Assembler : public ValueObject {
    void vmulqs(QRegister qd, QRegister qn, QRegister qm);
    void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
    void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
+#if 0
+  // Moved to Arm32::AssemblerARM32::vmlas()
    void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
+  // Moved to Arm32::AssemblerARM32::vmlad()
    void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
+#endif
    void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
    void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
  #if 0
diff --git a/src/IceAssemblerARM32.cpp b/src/IceAssemblerARM32.cpp

index abd4716..c9282fe 100644 (file)
--- a/src/IceAssemblerARM32.cpp
+++ b/src/IceAssemblerARM32.cpp
@@ -1045,6 +1045,15 @@ void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
    emitInst(Encoding);
  }
  
+void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
+                                const Operand *OpDd, const Operand *OpDn,
+                                const Operand *OpDm, const char *InstName) {
+  IValueT Dd = encodeDRegister(OpDd, "Dd", InstName);
+  IValueT Dn = encodeDRegister(OpDn, "Dn", InstName);
+  IValueT Dm = encodeDRegister(OpDm, "Dm", InstName);
+  emitVFPddd(Cond, Opcode, Dd, Dn, Dm);
+}
+
  void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
                                  IValueT Sd, IValueT Sn, IValueT Sm) {
    assert(Sd < RegARM32::getNumSRegs());
@@ -1060,6 +1069,15 @@ void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
    emitInst(Encoding);
  }
  
+void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
+                                const Operand *OpSd, const Operand *OpSn,
+                                const Operand *OpSm, const char *InstName) {
+  const IValueT Sd = encodeSRegister(OpSd, "Sd", InstName);
+  const IValueT Sn = encodeSRegister(OpSn, "Sn", InstName);
+  const IValueT Sm = encodeSRegister(OpSm, "Sm", InstName);
+  emitVFPsss(Cond, Opcode, Sd, Sn, Sm);
+}
+
  void AssemblerARM32::adc(const Operand *OpRd, const Operand *OpRn,
                           const Operand *OpSrc1, bool SetFlags,
                           CondARM32::Cond Cond) {
@@ -2075,11 +2093,8 @@ void AssemblerARM32::vadds(const Operand *OpSd, const Operand *OpSn,
    // cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
    // and mmmmM=Rm.
    constexpr const char *Vadds = "vadds";
-  IValueT Sd = encodeSRegister(OpSd, "Sd", Vadds);
-  IValueT Sn = encodeSRegister(OpSn, "Sn", Vadds);
-  IValueT Sm = encodeSRegister(OpSm, "Sm", Vadds);
    constexpr IValueT VaddsOpcode = B21 | B20;
-  emitVFPsss(Cond, VaddsOpcode, Sd, Sn, Sm);
+  emitVFPsss(Cond, VaddsOpcode, OpSd, OpSn, OpSm, Vadds);
  }
  
  void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
@@ -2090,11 +2105,8 @@ void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
    // cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
    // and Mmmmm=Rm.
    constexpr const char *Vaddd = "vaddd";
-  IValueT Dd = encodeDRegister(OpDd, "Dd", Vaddd);
-  IValueT Dn = encodeDRegister(OpDn, "Dn", Vaddd);
-  IValueT Dm = encodeDRegister(OpDm, "Dm", Vaddd);
    constexpr IValueT VadddOpcode = B21 | B20;
-  emitVFPddd(Cond, VadddOpcode, Dd, Dn, Dm);
+  emitVFPddd(Cond, VadddOpcode, OpDd, OpDn, OpDm, Vaddd);
  }
  
  void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm,
@@ -2305,11 +2317,8 @@ void AssemblerARM32::vdivs(const Operand *OpSd, const Operand *OpSn,
    // cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
    // and mmmmM=Rm.
    constexpr const char *Vdivs = "vdivs";
-  IValueT Sd = encodeSRegister(OpSd, "Sd", Vdivs);
-  IValueT Sn = encodeSRegister(OpSn, "Sn", Vdivs);
-  IValueT Sm = encodeSRegister(OpSm, "Sm", Vdivs);
    constexpr IValueT VdivsOpcode = B23;
-  emitVFPsss(Cond, VdivsOpcode, Sd, Sn, Sm);
+  emitVFPsss(Cond, VdivsOpcode, OpSd, OpSn, OpSm, Vdivs);
  }
  
  void AssemblerARM32::vdivd(const Operand *OpDd, const Operand *OpDn,
@@ -2320,11 +2329,8 @@ void AssemblerARM32::vdivd(const Operand *OpDd, const Operand *OpDn,
    // cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
    // and Mmmmm=Rm.
    constexpr const char *Vdivd = "vdivd";
-  IValueT Dd = encodeDRegister(OpDd, "Dd", Vdivd);
-  IValueT Dn = encodeDRegister(OpDn, "Dn", Vdivd);
-  IValueT Dm = encodeDRegister(OpDm, "Dm", Vdivd);
    constexpr IValueT VdivdOpcode = B23;
-  emitVFPddd(Cond, VdivdOpcode, Dd, Dn, Dm);
+  emitVFPddd(Cond, VdivdOpcode, OpDd, OpDn, OpDm, Vdivd);
  }
  
  void AssemblerARM32::veord(const Operand *OpDd, const Operand *OpDn,
@@ -2444,6 +2450,30 @@ void AssemblerARM32::vmovsr(const Operand *OpSn, const Operand *OpRt,
    emitInst(Encoding);
  }
  
+void AssemblerARM32::vmlad(const Operand *OpDd, const Operand *OpDn,
+                           const Operand *OpDm, CondARM32::Cond Cond) {
+  // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
+  //   vmla<c>.f64 <Dd>, <Dn>, <Dm>
+  //
+  // cccc11100d00nnnndddd1011n0M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
+  // Mmmmm=Dm
+  constexpr const char *Vmlad = "vmlad";
+  constexpr IValueT VmladOpcode = 0;
+  emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
+}
+
+void AssemblerARM32::vmlas(const Operand *OpSd, const Operand *OpSn,
+                           const Operand *OpSm, CondARM32::Cond Cond) {
+  // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
+  //   vmla<c>.f32 <Sd>, <Sn>, <Sm>
+  //
+  // cccc11100d00nnnndddd1010n0M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
+  // mmmmM=Sm
+  constexpr const char *Vmlas = "vmlas";
+  constexpr IValueT VmlasOpcode = 0;
+  emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
+}
+
  void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) {
    // MVRS - ARM section A*.8.348, encoding A1:
    //   vmrs<c> APSR_nzcv, FPSCR
@@ -2465,11 +2495,8 @@ void AssemblerARM32::vmuls(const Operand *OpSd, const Operand *OpSn,
    // cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
    // and mmmmM=Rm.
    constexpr const char *Vmuls = "vmuls";
-  IValueT Sd = encodeSRegister(OpSd, "Sd", Vmuls);
-  IValueT Sn = encodeSRegister(OpSn, "Sn", Vmuls);
-  IValueT Sm = encodeSRegister(OpSm, "Sm", Vmuls);
    constexpr IValueT VmulsOpcode = B21;
-  emitVFPsss(Cond, VmulsOpcode, Sd, Sn, Sm);
+  emitVFPsss(Cond, VmulsOpcode, OpSd, OpSn, OpSm, Vmuls);
  }
  
  void AssemblerARM32::vmuld(const Operand *OpDd, const Operand *OpDn,
@@ -2480,11 +2507,8 @@ void AssemblerARM32::vmuld(const Operand *OpDd, const Operand *OpDn,
    // cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
    // and Mmmmm=Rm.
    constexpr const char *Vmuld = "vmuld";
-  IValueT Dd = encodeDRegister(OpDd, "Dd", Vmuld);
-  IValueT Dn = encodeDRegister(OpDn, "Dn", Vmuld);
-  IValueT Dm = encodeDRegister(OpDm, "Dm", Vmuld);
    constexpr IValueT VmuldOpcode = B21;
-  emitVFPddd(Cond, VmuldOpcode, Dd, Dn, Dm);
+  emitVFPddd(Cond, VmuldOpcode, OpDd, OpDn, OpDm, Vmuld);
  }
  
  void AssemblerARM32::vstrd(const Operand *OpDd, const Operand *OpAddress,
@@ -2538,11 +2562,8 @@ void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
    // cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
    // and mmmmM=Rm.
    constexpr const char *Vsubs = "vsubs";
-  IValueT Sd = encodeSRegister(OpSd, "Sd", Vsubs);
-  IValueT Sn = encodeSRegister(OpSn, "Sn", Vsubs);
-  IValueT Sm = encodeSRegister(OpSm, "Sm", Vsubs);
    constexpr IValueT VsubsOpcode = B21 | B20 | B6;
-  emitVFPsss(Cond, VsubsOpcode, Sd, Sn, Sm);
+  emitVFPsss(Cond, VsubsOpcode, OpSd, OpSn, OpSm, Vsubs);
  }
  
  void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
@@ -2553,11 +2574,8 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
    // cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
    // and Mmmmm=Rm.
    constexpr const char *Vsubd = "vsubd";
-  IValueT Dd = encodeDRegister(OpDd, "Dd", Vsubd);
-  IValueT Dn = encodeDRegister(OpDn, "Dn", Vsubd);
-  IValueT Dm = encodeDRegister(OpDm, "Dm", Vsubd);
    constexpr IValueT VsubdOpcode = B21 | B20 | B6;
-  emitVFPddd(Cond, VsubdOpcode, Dd, Dn, Dm);
+  emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
  }
  
  void AssemblerARM32::emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
diff --git a/src/IceAssemblerARM32.h b/src/IceAssemblerARM32.h

index 8290dc7..239154f 100644 (file)
--- a/src/IceAssemblerARM32.h
+++ b/src/IceAssemblerARM32.h
@@ -390,6 +390,12 @@ public:
  
    void vmovsr(const Operand *OpSn, const Operand *OpRt, CondARM32::Cond Cond);
  
+  void vmlad(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
+             CondARM32::Cond Cond);
+
+  void vmlas(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
+             CondARM32::Cond Cond);
+
    // Uses APSR_nzcv as register
    void vmrsAPSR_nzcv(CondARM32::Cond Cond);
  
@@ -585,12 +591,20 @@ private:
                   const Operand *OpSrc, const char *MovName);
  
    // Emit VFP instruction with 3 D registers.
+  void emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, const Operand *OpDd,
+                  const Operand *OpDn, const Operand *OpDm,
+                  const char *InstName);
+
    void emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Dn,
                    IValueT Dm);
  
    // Emit VFP instruction with 3 S registers.
    void emitVFPsss(CondARM32::Cond Cond, IValueT Opcode, IValueT Sd, IValueT Sn,
                    IValueT Sm);
+
+  void emitVFPsss(CondARM32::Cond Cond, IValueT Opcode, const Operand *OpSd,
+                  const Operand *OpSn, const Operand *OpSm,
+                  const char *InstName);
  };
  
  } // end of namespace ARM32
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp

index f6e4ae5..a8d0efa 100644 (file)
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -642,6 +642,27 @@ template <> void InstARM32Veor::emitIAS(const Cfg *Func) const {
    assert(!Asm->needsTextFixup());
  }
  
+template <> void InstARM32Vmla::emitIAS(const Cfg *Func) const {
+  // Note: Dest == getSrc(0) for four address FP instructions.
+  assert(getSrcSize() == 3);
+  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  const Variable *Dest = getDest();
+  switch (Dest->getType()) {
+  default:
+    // TODO(kschimpf) Figure out how vector operations apply.
+    emitUsingTextFixup(Func);
+    break;
+  case IceType_f32:
+    Asm->vmlas(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
+    break;
+  case IceType_f64:
+    Asm->vmlad(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
+    assert(!Asm->needsTextFixup());
+    break;
+  }
+}
+
  template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
    auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
    const Variable *Dest = getDest();
diff --git a/tests_lit/assembler/arm32/vmla.ll b/tests_lit/assembler/arm32/vmla.ll

new file mode 100644 (file)

index 0000000..04797ee
--- /dev/null
+++ b/tests_lit/assembler/arm32/vmla.ll
@@ -0,0 +1,56 @@
+; Show that we can take advantage of the vmla instruction for floating point
+; operations during optimization.
+
+; Note that we use -O2 to force the result of the fmul to be (immediately)
+; available for the fadd. When using -Om1, the merge of fmul and fadd does not
+; happen due to intervening register spill code.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN:   | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN:   -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN:   | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN:   --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN:   | FileCheck %s --check-prefix=DIS
+
+define internal float @mulAddFloat(float %f1, float %f2) {
+; ASM-LABEL: mulAddFloat:
+; DIS-LABEL: 00000000 <mulAddFloat>:
+
+  %v1 = fmul float %f1, 1.5
+  %v2 = fadd float %f2, %v1
+
+; ASM:  vmla.f32        s21, s20, s22
+; DIS:   10:    ee4aaa0b
+; IASM-NOT: vmla
+
+  ret float %v2
+}
+
+define internal double @mulAddDouble(double %f1, double %f2) {
+; ASM-LABEL: mulAddDouble:
+; DIS-LABEL: 00000020 <mulAddDouble>:
+
+  %v1 = fmul double %f1, 1.5
+  %v2 = fadd double %f2, %v1
+
+; ASM:  vmla.f64        d21, d20, d22
+; DIS:   2c:    ee445ba6
+; IASM-NOT: vmla
+
+  ret double %v2
+}
author	Karl Schimpf <kschimpf@google.com>
	Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)
committer	Karl Schimpf <kschimpf@google.com>
	Tue, 26 Jan 2016 20:25:43 +0000 (12:25 -0800)
src/DartARM32/assembler_arm.cc		patch \| blob \| history
src/DartARM32/assembler_arm.h		patch \| blob \| history
src/IceAssemblerARM32.cpp		patch \| blob \| history
src/IceAssemblerARM32.h		patch \| blob \| history
src/IceInstARM32.cpp		patch \| blob \| history
tests_lit/assembler/arm32/vmla.ll	[new file with mode: 0644]	patch \| blob