Condition cond) {
EmitVFPddd(cond, 0, dd, dn, dm);
}
-#endif
-
+// Moved to Arm32::AssemblerARM32::vmlss()
void Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
EmitVFPsss(cond, B6, sd, sn, sm);
}
-
+// Moved to Arm32::AssemblerARM32::vmlsd()
void Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
Condition cond) {
EmitVFPddd(cond, B6, dd, dn, dm);
}
-#if 0
// Moved to Arm32::AssemblerARM32::vdivs()
void Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad()
void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-#endif
+ // Moved to Arm32::AssemblerARM32::vmlss()
void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
+ // Moved to Arm32::AssemblerARM32::vmlsd()
void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-#if 0
// Moved to Arm32::AssemblerARM32::vdivs()
void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vdivd()
emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
}
+void AssemblerARM32::vmlsd(const Operand *OpDd, const Operand *OpDn,
+ const Operand *OpDm, CondARM32::Cond Cond) {
+ // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
+ // vmls<c>.f64 <Dd>, <Dn>, <Dm>
+ //
+ // cccc11100d00nnnndddd1011n1M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
+ // Mmmmm=Dm
+ constexpr const char *Vmlad = "vmlad";
+ constexpr IValueT VmladOpcode = B6;
+ emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
+}
+
+void AssemblerARM32::vmlss(const Operand *OpSd, const Operand *OpSn,
+ const Operand *OpSm, CondARM32::Cond Cond) {
+ // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
+ // vmls<c>.f32 <Sd>, <Sn>, <Sm>
+ //
+ // cccc11100d00nnnndddd1010n1M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
+ // mmmmM=Sm
+ constexpr const char *Vmlas = "vmlas";
+ constexpr IValueT VmlasOpcode = B6;
+ emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
+}
+
void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) {
// MVRS - ARM section A*.8.348, encoding A1:
// vmrs<c> APSR_nzcv, FPSCR
void vmlas(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
+ void vmlsd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
+ CondARM32::Cond Cond);
+
+ void vmlss(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
+ CondARM32::Cond Cond);
+
// Uses APSR_nzcv as register
void vmrsAPSR_nzcv(CondARM32::Cond Cond);
default:
// TODO(kschimpf) Figure out how vector operations apply.
emitUsingTextFixup(Func);
- break;
+ return;
case IceType_f32:
Asm->vmlas(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
- break;
+ return;
case IceType_f64:
Asm->vmlad(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
- break;
+ return;
+ }
+}
+
+template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const {
+ // Note: Dest == getSrc(0) for four address FP instructions.
+ assert(getSrcSize() == 3);
+ auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+ const Variable *Dest = getDest();
+ switch (Dest->getType()) {
+ default:
+ // TODO(kschimpf) Figure out how vector operations apply.
+ emitUsingTextFixup(Func);
+ return;
+ case IceType_f32:
+ Asm->vmlss(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
+ assert(!Asm->needsTextFixup());
+ return;
+ case IceType_f64:
+ Asm->vmlsd(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
+ assert(!Asm->needsTextFixup());
+ return;
}
}
--- /dev/null
+; Show that we can take advantage of the vmls instruction for floating point
+; operations during optimization.
+
+; Note that we use -O2 to force the result of the fmul to be
+; (immediately) available for the fsub. When using -Om1, the merge of
+; fmul and fsub does not happen.
+
+; REQUIRES: allow_dump
+
+; Compile using standalone assembler.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
+; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN: | FileCheck %s --check-prefix=ASM
+
+; Show bytes in assembled standalone code.
+; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN: | FileCheck %s --check-prefix=DIS
+
+; Compile using integrated assembler.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
+; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN: | FileCheck %s --check-prefix=IASM
+
+; Show bytes in assembled integrated code.
+; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
+; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
+; RUN: | FileCheck %s --check-prefix=DIS
+
+define internal float @mulSubFloat(float %f1, float %f2) {
+; ASM-LABEL: mulSubFloat:
+; DIS-LABEL: 00000000 <mulSubFloat>:
+
+ %v1 = fmul float %f1, 1.5
+ %v2 = fsub float %f2, %v1
+
+; ASM: vmls.f32 s21, s20, s22
+; DIS: 10: ee4aaa4b
+; IASM-NOT: vmls.f32
+
+ ret float %v2
+}
+
+define internal double @mulSubDouble(double %f1, double %f2) {
+; ASM-LABEL: mulSubDouble:
+; DIS-LABEL: 00000020 <mulSubDouble>:
+
+ %v1 = fmul double %f1, 1.5
+ %v2 = fsub double %f2, %v1
+
+; ASM: vmls.f64 d21, d20, d22
+; DIS: 2c: ee445be6
+; IASM-NOT: vmls.f64
+
+ ret double %v2
+}