From: Casey Dahlin Date: Wed, 28 Jun 2017 20:58:58 +0000 (-0700) Subject: Support saturating vector add and subtract on ARM32. X-Git-Tag: android-x86-7.1-r3~363 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=b40560b18e145d9523d7ad2d19a30861811d169f;p=android-x86%2Fexternal-swiftshader.git Support saturating vector add and subtract on ARM32. Bug b/37495545 Change-Id: I1d025f894bb7cf08dcaafd950605781633596ab3 Reviewed-on: https://chromium-review.googlesource.com/689098 Reviewed-by: Jim Stichnoth Reviewed-on: https://swiftshader-review.googlesource.com/12748 Reviewed-by: Nicolas Capens Tested-by: Nicolas Capens --- diff --git a/third_party/subzero/src/IceAssemblerARM32.cpp b/third_party/subzero/src/IceAssemblerARM32.cpp index 7751cb98e..2f1fa3cb0 100644 --- a/third_party/subzero/src/IceAssemblerARM32.cpp +++ b/third_party/subzero/src/IceAssemblerARM32.cpp @@ -3381,6 +3381,62 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn, emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd); } +void AssemblerARM32::vqaddqi(Type ElmtTy, const Operand *OpQd, + const Operand *OpQm, const Operand *OpQn) { + // VQADD (integer) - ARM section A8.6.369, encoding A1: + // vqadd.s {,} , + // + // 111100100Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm, + // size is 8, 16, 32, or 64. + assert(isScalarIntegerType(ElmtTy) && + "vqaddqi expects vector with integer element type"); + constexpr const char *Vqaddqi = "vqaddqi"; + constexpr IValueT VqaddqiOpcode = B4; + emitSIMDqqq(VqaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqi); +} + +void AssemblerARM32::vqaddqu(Type ElmtTy, const Operand *OpQd, + const Operand *OpQm, const Operand *OpQn) { + // VQADD (integer) - ARM section A8.6.369, encoding A1: + // vqadd.s {,} , + // + // 111100110Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm, + // size is 8, 16, 32, or 64. + assert(isScalarIntegerType(ElmtTy) && + "vqaddqu expects vector with integer element type"); + constexpr const char *Vqaddqu = "vqaddqu"; + constexpr IValueT VqaddquOpcode = B24 | B4; + emitSIMDqqq(VqaddquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqu); +} + +void AssemblerARM32::vqsubqi(Type ElmtTy, const Operand *OpQd, + const Operand *OpQm, const Operand *OpQn) { + // VQSUB (integer) - ARM section A8.6.369, encoding A1: + // vqsub.s {,} , + // + // 111100100Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm, + // size is 8, 16, 32, or 64. + assert(isScalarIntegerType(ElmtTy) && + "vqsubqi expects vector with integer element type"); + constexpr const char *Vqsubqi = "vqsubqi"; + constexpr IValueT VqsubqiOpcode = B9 | B4; + emitSIMDqqq(VqsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqi); +} + +void AssemblerARM32::vqsubqu(Type ElmtTy, const Operand *OpQd, + const Operand *OpQm, const Operand *OpQn) { + // VQSUB (integer) - ARM section A8.6.369, encoding A1: + // vqsub.s {,} , + // + // 111100110Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm, + // size is 8, 16, 32, or 64. + assert(isScalarIntegerType(ElmtTy) && + "vqsubqu expects vector with integer element type"); + constexpr const char *Vqsubqu = "vqsubqu"; + constexpr IValueT VqsubquOpcode = B24 | B9 | B4; + emitSIMDqqq(VqsubquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqu); +} + void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, const Operand *OpQn) { // VSUB (integer) - ARM section A8.8.414, encoding A1: diff --git a/third_party/subzero/src/IceAssemblerARM32.h b/third_party/subzero/src/IceAssemblerARM32.h index 2e376a984..a7e848120 100644 --- a/third_party/subzero/src/IceAssemblerARM32.h +++ b/third_party/subzero/src/IceAssemblerARM32.h @@ -591,6 +591,18 @@ public: void vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); + // Integer vector saturating subtract. + void vqsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, + const Operand *OpQn); + void vqsubqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, + const Operand *OpQn); + + // Integer vector saturating add. + void vqaddqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, + const Operand *OpQn); + void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, + const Operand *OpQn); + // Float vector subtract void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); diff --git a/third_party/subzero/src/IceInstARM32.cpp b/third_party/subzero/src/IceInstARM32.cpp index c121f7aef..043f4a6da 100644 --- a/third_party/subzero/src/IceInstARM32.cpp +++ b/third_party/subzero/src/IceInstARM32.cpp @@ -1037,6 +1037,56 @@ template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const { assert(!Asm->needsTextFixup()); } +template <> void InstARM32Vqadd::emitIAS(const Cfg *Func) const { + auto *Asm = Func->getAssembler(); + const Variable *Dest = getDest(); + Type DestTy = Dest->getType(); + switch (DestTy) { + default: + llvm::report_fatal_error("Vqadd not defined on type " + + typeStdString(DestTy)); + case IceType_v16i8: + case IceType_v8i16: + case IceType_v4i32: + switch (Sign) { + case InstARM32::FS_None: // defaults to unsigned. + case InstARM32::FS_Unsigned: + Asm->vqaddqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1)); + break; + case InstARM32::FS_Signed: + Asm->vqaddqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1)); + break; + } + break; + } + assert(!Asm->needsTextFixup()); +} + +template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const { + auto *Asm = Func->getAssembler(); + const Variable *Dest = getDest(); + Type DestTy = Dest->getType(); + switch (DestTy) { + default: + llvm::report_fatal_error("Vqsub not defined on type " + + typeStdString(DestTy)); + case IceType_v16i8: + case IceType_v8i16: + case IceType_v4i32: + switch (Sign) { + case InstARM32::FS_None: // defaults to unsigned. + case InstARM32::FS_Unsigned: + Asm->vqsubqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1)); + break; + case InstARM32::FS_Signed: + Asm->vqsubqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1)); + break; + } + break; + } + assert(!Asm->needsTextFixup()); +} + template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const { auto *Asm = Func->getAssembler(); const Variable *Dest = getDest(); @@ -1639,6 +1689,10 @@ template <> const char *InstARM32UnaryopFP::Opcode = "vneg"; template <> const char *InstARM32ThreeAddrFP::Opcode = "vshl"; template <> const char *InstARM32ThreeAddrFP::Opcode = "vshr"; template <> const char *InstARM32Vsub::Opcode = "vsub"; +template <> +const char *InstARM32ThreeAddrFP::Opcode = "vqadd"; +template <> +const char *InstARM32ThreeAddrFP::Opcode = "vqsub"; // Four-addr ops template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mls::Opcode = "mls"; @@ -3110,6 +3164,8 @@ template class InstARM32UnaryopSignAwareFP; template class InstARM32ThreeAddrSignAwareFP; template class InstARM32ThreeAddrSignAwareFP; template class InstARM32ThreeAddrFP; +template class InstARM32ThreeAddrSignAwareFP; +template class InstARM32ThreeAddrSignAwareFP; template class InstARM32LoadBase; template class InstARM32LoadBase; diff --git a/third_party/subzero/src/IceInstARM32.h b/third_party/subzero/src/IceInstARM32.h index 89f894dae..e1344dc1b 100644 --- a/third_party/subzero/src/IceInstARM32.h +++ b/third_party/subzero/src/IceInstARM32.h @@ -442,6 +442,8 @@ public: Vmvn, Vneg, Vorr, + Vqadd, + Vqsub, Vshl, Vshr, Vsqrt, @@ -1016,6 +1018,8 @@ using InstARM32Vmul = InstARM32ThreeAddrFP; using InstARM32Vmvn = InstARM32UnaryopFP; using InstARM32Vneg = InstARM32UnaryopSignAwareFP; using InstARM32Vorr = InstARM32ThreeAddrFP; +using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP; +using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP; using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP; using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP; using InstARM32Vsub = InstARM32ThreeAddrFP; diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp index ace80235f..65dca3a19 100644 --- a/third_party/subzero/src/IceTargetLoweringARM32.cpp +++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp @@ -5320,12 +5320,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { case Intrinsics::Trap: _trap(); return; - case Intrinsics::AddSaturateSigned: { - UnimplementedLoweringError(this, Instr); - return; - } + case Intrinsics::AddSaturateSigned: case Intrinsics::AddSaturateUnsigned: { - UnimplementedLoweringError(this, Instr); + bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned); + Variable *Src0 = legalizeToReg(Instr->getArg(0)); + Variable *Src1 = legalizeToReg(Instr->getArg(1)); + Variable *T = makeReg(DestTy); + _vqadd(T, Src0, Src1, Unsigned); + _mov(Dest, T); return; } case Intrinsics::LoadSubVector: { @@ -5360,12 +5362,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { UnimplementedLoweringError(this, Instr); return; } - case Intrinsics::SubtractSaturateSigned: { - UnimplementedLoweringError(this, Instr); - return; - } + case Intrinsics::SubtractSaturateSigned: case Intrinsics::SubtractSaturateUnsigned: { - UnimplementedLoweringError(this, Instr); + bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned); + Variable *Src0 = legalizeToReg(Instr->getArg(0)); + Variable *Src1 = legalizeToReg(Instr->getArg(1)); + Variable *T = makeReg(DestTy); + _vqsub(T, Src0, Src1, Unsigned); + _mov(Dest, T); return; } case Intrinsics::VectorPackSigned: { diff --git a/third_party/subzero/src/IceTargetLoweringARM32.h b/third_party/subzero/src/IceTargetLoweringARM32.h index f6cd66bac..be848ede3 100644 --- a/third_party/subzero/src/IceTargetLoweringARM32.h +++ b/third_party/subzero/src/IceTargetLoweringARM32.h @@ -910,6 +910,14 @@ protected: void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert(Dest, Src0, Src1); } + void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { + Context.insert(Dest, Src0, Src1) + ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); + } + void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { + Context.insert(Dest, Src0, Src1) + ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); + } InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { return Context.insert(Dest, Src0, Src1); }