Support saturating vector add and subtract on ARM32.

author Casey Dahlin <sadmac@google.com>

Wed, 28 Jun 2017 20:58:58 +0000 (13:58 -0700)

committer Nicolas Capens <nicolascapens@google.com>

Fri, 29 Sep 2017 00:27:49 +0000 (00:27 +0000)
author Casey Dahlin <sadmac@google.com>
Wed, 28 Jun 2017 20:58:58 +0000 (13:58 -0700)
committer Nicolas Capens <nicolascapens@google.com>
Fri, 29 Sep 2017 00:27:49 +0000 (00:27 +0000)
diff --git a/third_party/subzero/src/IceAssemblerARM32.cpp b/third_party/subzero/src/IceAssemblerARM32.cpp

index 7751cb9..2f1fa3c 100644 (file)
--- a/third_party/subzero/src/IceAssemblerARM32.cpp
+++ b/third_party/subzero/src/IceAssemblerARM32.cpp
@@ -3381,6 +3381,62 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
    emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
  }
  
+void AssemblerARM32::vqaddqi(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQADD (integer) - ARM section A8.6.369, encoding A1:
+  //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100100Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqaddqi expects vector with integer element type");
+  constexpr const char *Vqaddqi = "vqaddqi";
+  constexpr IValueT VqaddqiOpcode = B4;
+  emitSIMDqqq(VqaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqi);
+}
+
+void AssemblerARM32::vqaddqu(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQADD (integer) - ARM section A8.6.369, encoding A1:
+  //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100110Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqaddqu expects vector with integer element type");
+  constexpr const char *Vqaddqu = "vqaddqu";
+  constexpr IValueT VqaddquOpcode = B24 | B4;
+  emitSIMDqqq(VqaddquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqu);
+}
+
+void AssemblerARM32::vqsubqi(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQSUB (integer) - ARM section A8.6.369, encoding A1:
+  //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100100Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqsubqi expects vector with integer element type");
+  constexpr const char *Vqsubqi = "vqsubqi";
+  constexpr IValueT VqsubqiOpcode = B9 | B4;
+  emitSIMDqqq(VqsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqi);
+}
+
+void AssemblerARM32::vqsubqu(Type ElmtTy, const Operand *OpQd,
+                             const Operand *OpQm, const Operand *OpQn) {
+  // VQSUB (integer) - ARM section A8.6.369, encoding A1:
+  //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
+  //
+  // 111100110Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
+  // size is 8, 16, 32, or 64.
+  assert(isScalarIntegerType(ElmtTy) &&
+         "vqsubqu expects vector with integer element type");
+  constexpr const char *Vqsubqu = "vqsubqu";
+  constexpr IValueT VqsubquOpcode = B24 | B9 | B4;
+  emitSIMDqqq(VqsubquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqu);
+}
+
  void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd,
                              const Operand *OpQm, const Operand *OpQn) {
    // VSUB (integer) - ARM section A8.8.414, encoding A1:
diff --git a/third_party/subzero/src/IceAssemblerARM32.h b/third_party/subzero/src/IceAssemblerARM32.h

index 2e376a9..a7e8481 100644 (file)
--- a/third_party/subzero/src/IceAssemblerARM32.h
+++ b/third_party/subzero/src/IceAssemblerARM32.h
@@ -591,6 +591,18 @@ public:
    void vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
                const Operand *OpQn);
  
+  // Integer vector saturating subtract.
+  void vqsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+  void vqsubqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+
+  // Integer vector saturating add.
+  void vqaddqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+  void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
+               const Operand *OpQn);
+
    // Float vector subtract
    void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
  
diff --git a/third_party/subzero/src/IceInstARM32.cpp b/third_party/subzero/src/IceInstARM32.cpp

index c121f7a..043f4a6 100644 (file)
--- a/third_party/subzero/src/IceInstARM32.cpp
+++ b/third_party/subzero/src/IceInstARM32.cpp
@@ -1037,6 +1037,56 @@ template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
    assert(!Asm->needsTextFixup());
  }
  
+template <> void InstARM32Vqadd::emitIAS(const Cfg *Func) const {
+  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  const Variable *Dest = getDest();
+  Type DestTy = Dest->getType();
+  switch (DestTy) {
+  default:
+    llvm::report_fatal_error("Vqadd not defined on type " +
+                             typeStdString(DestTy));
+  case IceType_v16i8:
+  case IceType_v8i16:
+  case IceType_v4i32:
+    switch (Sign) {
+    case InstARM32::FS_None: // defaults to unsigned.
+    case InstARM32::FS_Unsigned:
+      Asm->vqaddqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    case InstARM32::FS_Signed:
+      Asm->vqaddqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    }
+    break;
+  }
+  assert(!Asm->needsTextFixup());
+}
+
+template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const {
+  auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
+  const Variable *Dest = getDest();
+  Type DestTy = Dest->getType();
+  switch (DestTy) {
+  default:
+    llvm::report_fatal_error("Vqsub not defined on type " +
+                             typeStdString(DestTy));
+  case IceType_v16i8:
+  case IceType_v8i16:
+  case IceType_v4i32:
+    switch (Sign) {
+    case InstARM32::FS_None: // defaults to unsigned.
+    case InstARM32::FS_Unsigned:
+      Asm->vqsubqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    case InstARM32::FS_Signed:
+      Asm->vqsubqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
+      break;
+    }
+    break;
+  }
+  assert(!Asm->needsTextFixup());
+}
+
  template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
    auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
    const Variable *Dest = getDest();
@@ -1639,6 +1689,10 @@ template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
  template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
  template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr";
  template <> const char *InstARM32Vsub::Opcode = "vsub";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd";
+template <>
+const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub";
  // Four-addr ops
  template <> const char *InstARM32Mla::Opcode = "mla";
  template <> const char *InstARM32Mls::Opcode = "mls";
@@ -3110,6 +3164,8 @@ template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
  template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
  template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
  template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
+template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
  
  template class InstARM32LoadBase<InstARM32::Ldr>;
  template class InstARM32LoadBase<InstARM32::Ldrex>;
diff --git a/third_party/subzero/src/IceInstARM32.h b/third_party/subzero/src/IceInstARM32.h

index 89f894d..e1344dc 100644 (file)
--- a/third_party/subzero/src/IceInstARM32.h
+++ b/third_party/subzero/src/IceInstARM32.h
@@ -442,6 +442,8 @@ public:
      Vmvn,
      Vneg,
      Vorr,
+    Vqadd,
+    Vqsub,
      Vshl,
      Vshr,
      Vsqrt,
@@ -1016,6 +1018,8 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
  using InstARM32Vmvn = InstARM32UnaryopFP<InstARM32::Vmvn>;
  using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
  using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
+using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
+using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
  using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
  using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
  using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.cpp b/third_party/subzero/src/IceTargetLoweringARM32.cpp

index ace8023..65dca3a 100644 (file)
--- a/third_party/subzero/src/IceTargetLoweringARM32.cpp
+++ b/third_party/subzero/src/IceTargetLoweringARM32.cpp
@@ -5320,12 +5320,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    case Intrinsics::Trap:
      _trap();
      return;
-  case Intrinsics::AddSaturateSigned: {
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
+  case Intrinsics::AddSaturateSigned:
    case Intrinsics::AddSaturateUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vqadd(T, Src0, Src1, Unsigned);
+    _mov(Dest, T);
      return;
    }
    case Intrinsics::LoadSubVector: {
@@ -5360,12 +5362,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
      UnimplementedLoweringError(this, Instr);
      return;
    }
-  case Intrinsics::SubtractSaturateSigned: {
-    UnimplementedLoweringError(this, Instr);
-    return;
-  }
+  case Intrinsics::SubtractSaturateSigned:
    case Intrinsics::SubtractSaturateUnsigned: {
-    UnimplementedLoweringError(this, Instr);
+    bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
+    Variable *Src0 = legalizeToReg(Instr->getArg(0));
+    Variable *Src1 = legalizeToReg(Instr->getArg(1));
+    Variable *T = makeReg(DestTy);
+    _vqsub(T, Src0, Src1, Unsigned);
+    _mov(Dest, T);
      return;
    }
    case Intrinsics::VectorPackSigned: {
diff --git a/third_party/subzero/src/IceTargetLoweringARM32.h b/third_party/subzero/src/IceTargetLoweringARM32.h

index f6cd66b..be848ed 100644 (file)
--- a/third_party/subzero/src/IceTargetLoweringARM32.h
+++ b/third_party/subzero/src/IceTargetLoweringARM32.h
@@ -910,6 +910,14 @@ protected:
    void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
      Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
    }
+  void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
+    Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
+        ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
+  }
+  void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
+    Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
+        ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
+  }
    InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
      return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
    }
author	Casey Dahlin <sadmac@google.com>
	Wed, 28 Jun 2017 20:58:58 +0000 (13:58 -0700)
committer	Nicolas Capens <nicolascapens@google.com>
	Fri, 29 Sep 2017 00:27:49 +0000 (00:27 +0000)
third_party/subzero/src/IceAssemblerARM32.cpp		patch \| blob \| history
third_party/subzero/src/IceAssemblerARM32.h		patch \| blob \| history
third_party/subzero/src/IceInstARM32.cpp		patch \| blob \| history
third_party/subzero/src/IceInstARM32.h		patch \| blob \| history
third_party/subzero/src/IceTargetLoweringARM32.cpp		patch \| blob \| history
third_party/subzero/src/IceTargetLoweringARM32.h		patch \| blob \| history