Subzero ARM32: Lower shift and zext, sext, and trunc.

author Jan Voung <jvoung@chromium.org>

Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)

committer Jan Voung <jvoung@chromium.org>

Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)
author Jan Voung <jvoung@chromium.org>
Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)
committer Jan Voung <jvoung@chromium.org>
Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)
diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp

index 4faef29..c62918b 100644 (file)
--- a/src/IceInstARM32.cpp
+++ b/src/IceInstARM32.cpp
@@ -77,6 +77,24 @@ CondARM32::Cond InstARM32::getOppositeCondition(CondARM32::Cond Cond) {
    return InstARM32CondAttributes[Cond].Opposite;
  }
  
+void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
+                                   const InstARM32Pred *Inst, const Cfg *Func) {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(Inst->getSrcSize() == 1);
+  Type SrcTy = Inst->getSrc(0)->getType();
+  Type DestTy = Inst->getDest()->getType();
+  Str << "\t" << Opcode;
+  // Sxt and Uxt need source type width letter to define the operation.
+  // The other unary operations have the same source and dest type and
+  // as a result need only one letter.
+  if (SrcTy != DestTy)
+    Str << getWidthString(SrcTy);
+  Str << "\t";
+  Inst->getDest()->emit(Func);
+  Str << ", ";
+  Inst->getSrc(0)->emit(Func);
+}
+
  void InstARM32Pred::emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
                                  const Cfg *Func) {
    if (!ALLOW_DUMP)
@@ -305,17 +323,22 @@ template <> const char *InstARM32Movt::Opcode = "movt";
  // Unary ops
  template <> const char *InstARM32Movw::Opcode = "movw";
  template <> const char *InstARM32Mvn::Opcode = "mvn";
+template <> const char *InstARM32Sxt::Opcode = "sxt"; // still requires b/h
+template <> const char *InstARM32Uxt::Opcode = "uxt"; // still requires b/h
  // Mov-like ops
  template <> const char *InstARM32Mov::Opcode = "mov";
  // Three-addr ops
  template <> const char *InstARM32Adc::Opcode = "adc";
  template <> const char *InstARM32Add::Opcode = "add";
  template <> const char *InstARM32And::Opcode = "and";
+template <> const char *InstARM32Asr::Opcode = "asr";
  template <> const char *InstARM32Bic::Opcode = "bic";
  template <> const char *InstARM32Eor::Opcode = "eor";
  template <> const char *InstARM32Lsl::Opcode = "lsl";
+template <> const char *InstARM32Lsr::Opcode = "lsr";
  template <> const char *InstARM32Mul::Opcode = "mul";
  template <> const char *InstARM32Orr::Opcode = "orr";
+template <> const char *InstARM32Rsb::Opcode = "rsb";
  template <> const char *InstARM32Sbc::Opcode = "sbc";
  template <> const char *InstARM32Sub::Opcode = "sub";
  
@@ -685,7 +708,8 @@ void InstARM32Str::dump(const Cfg *Func) const {
    if (!ALLOW_DUMP)
      return;
    Ostream &Str = Func->getContext()->getStrDump();
-  dumpOpcodePred(Str, "str", getDest()->getType());
+  Type Ty = getSrc(0)->getType();
+  dumpOpcodePred(Str, "str", Ty);
    Str << " ";
    getSrc(1)->dump(Func);
    Str << ", ";
diff --git a/src/IceInstARM32.h b/src/IceInstARM32.h

index 3f5f6bd..1ba5d4e 100644 (file)
--- a/src/IceInstARM32.h
+++ b/src/IceInstARM32.h
@@ -259,6 +259,7 @@ public:
      Adc,
      Add,
      And,
+    Asr,
      Bic,
      Br,
      Call,
@@ -266,6 +267,7 @@ public:
      Eor,
      Ldr,
      Lsl,
+    Lsr,
      Mla,
      Mov,
      Movt,
@@ -276,10 +278,13 @@ public:
      Pop,
      Push,
      Ret,
+    Rsb,
      Sbc,
      Str,
      Sub,
-    Umull
+    Sxt,
+    Umull,
+    Uxt
    };
  
    static const char *getWidthString(Type Ty);
@@ -314,6 +319,8 @@ public:
    void dumpOpcodePred(Ostream &Str, const char *Opcode, Type Ty) const;
  
    // Shared emit routines for common forms of instructions.
+  static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Inst,
+                             const Cfg *Func);
    static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Inst,
                            const Cfg *Func);
    static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Inst,
@@ -345,12 +352,7 @@ public:
    void emit(const Cfg *Func) const override {
      if (!ALLOW_DUMP)
        return;
-    Ostream &Str = Func->getContext()->getStrEmit();
-    assert(getSrcSize() == 1);
-    Str << "\t" << Opcode << "\t";
-    getDest()->emit(Func);
-    Str << ", ";
-    getSrc(0)->emit(Func);
+    emitUnaryopGPR(Opcode, this, Func);
    }
    void emitIAS(const Cfg *Func) const override {
      (void)Func;
@@ -521,11 +523,14 @@ private:
  typedef InstARM32ThreeAddrGPR<InstARM32::Adc> InstARM32Adc;
  typedef InstARM32ThreeAddrGPR<InstARM32::Add> InstARM32Add;
  typedef InstARM32ThreeAddrGPR<InstARM32::And> InstARM32And;
+typedef InstARM32ThreeAddrGPR<InstARM32::Asr> InstARM32Asr;
  typedef InstARM32ThreeAddrGPR<InstARM32::Bic> InstARM32Bic;
  typedef InstARM32ThreeAddrGPR<InstARM32::Eor> InstARM32Eor;
  typedef InstARM32ThreeAddrGPR<InstARM32::Lsl> InstARM32Lsl;
+typedef InstARM32ThreeAddrGPR<InstARM32::Lsr> InstARM32Lsr;
  typedef InstARM32ThreeAddrGPR<InstARM32::Mul> InstARM32Mul;
  typedef InstARM32ThreeAddrGPR<InstARM32::Orr> InstARM32Orr;
+typedef InstARM32ThreeAddrGPR<InstARM32::Rsb> InstARM32Rsb;
  typedef InstARM32ThreeAddrGPR<InstARM32::Sbc> InstARM32Sbc;
  typedef InstARM32ThreeAddrGPR<InstARM32::Sub> InstARM32Sub;
  // Move instruction (variable <- flex). This is more of a pseudo-inst.
@@ -537,6 +542,11 @@ typedef InstARM32Movlike<InstARM32::Mov> InstARM32Mov;
  typedef InstARM32TwoAddrGPR<InstARM32::Movt> InstARM32Movt;
  typedef InstARM32UnaryopGPR<InstARM32::Movw> InstARM32Movw;
  typedef InstARM32UnaryopGPR<InstARM32::Mvn> InstARM32Mvn;
+// Technically, the uxt{b,h} and sxt{b,h} instructions have a rotation
+// operand as well (rotate source by 8, 16, 24 bits prior to extending),
+// but we aren't using that for now, so just model as a Unaryop.
+typedef InstARM32UnaryopGPR<InstARM32::Sxt> InstARM32Sxt;
+typedef InstARM32UnaryopGPR<InstARM32::Uxt> InstARM32Uxt;
  
  // Direct branch instruction.
  class InstARM32Br : public InstARM32Pred {
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index 37f5e19..8f7c331 100644 (file)
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -1069,9 +1069,90 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
        _mov(DestLo, T_Lo);
        _mov(DestHi, T_Hi);
      } break;
-    case InstArithmetic::Shl:
+    case InstArithmetic::Shl: {
+      // a=b<<c ==>
+      // GCC 4.8 does:
+      // sub t_c1, c.lo, #32
+      // lsl t_hi, b.hi, c.lo
+      // orr t_hi, t_hi, b.lo, lsl t_c1
+      // rsb t_c2, c.lo, #32
+      // orr t_hi, t_hi, b.lo, lsr t_c2
+      // lsl t_lo, b.lo, c.lo
+      // a.lo = t_lo
+      // a.hi = t_hi
+      // Can be strength-reduced for constant-shifts, but we don't do
+      // that for now.
+      // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
+      // On ARM, shifts only take the lower 8 bits of the shift register,
+      // and saturate to the range 0-32, so the negative value will
+      // saturate to 32.
+      Variable *T_Hi = makeReg(IceType_i32);
+      Variable *Src1RLo = legalizeToVar(Src1Lo);
+      Constant *ThirtyTwo = Ctx->getConstantInt32(32);
+      Variable *T_C1 = makeReg(IceType_i32);
+      Variable *T_C2 = makeReg(IceType_i32);
+      _sub(T_C1, Src1RLo, ThirtyTwo);
+      _lsl(T_Hi, Src0RHi, Src1RLo);
+      _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
+                                                   OperandARM32::LSL, T_C1));
+      _rsb(T_C2, Src1RLo, ThirtyTwo);
+      _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
+                                                   OperandARM32::LSR, T_C2));
+      _mov(DestHi, T_Hi);
+      Variable *T_Lo = makeReg(IceType_i32);
+      // _mov seems to sometimes have better register preferencing than lsl.
+      // Otherwise mov w/ lsl shifted register is a pseudo-instruction
+      // that maps to lsl.
+      _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
+                                             OperandARM32::LSL, Src1RLo));
+      _mov(DestLo, T_Lo);
+    } break;
      case InstArithmetic::Lshr:
-    case InstArithmetic::Ashr:
+    // a=b>>c (unsigned) ==>
+    // GCC 4.8 does:
+    // rsb t_c1, c.lo, #32
+    // lsr t_lo, b.lo, c.lo
+    // orr t_lo, t_lo, b.hi, lsl t_c1
+    // sub t_c2, c.lo, #32
+    // orr t_lo, t_lo, b.hi, lsr t_c2
+    // lsr t_hi, b.hi, c.lo
+    // a.lo = t_lo
+    // a.hi = t_hi
+    case InstArithmetic::Ashr: {
+      // a=b>>c (signed) ==> ...
+      // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
+      // and the next orr should be conditioned on PLUS. The last two
+      // right shifts should also be arithmetic.
+      bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
+      Variable *T_Lo = makeReg(IceType_i32);
+      Variable *Src1RLo = legalizeToVar(Src1Lo);
+      Constant *ThirtyTwo = Ctx->getConstantInt32(32);
+      Variable *T_C1 = makeReg(IceType_i32);
+      Variable *T_C2 = makeReg(IceType_i32);
+      _rsb(T_C1, Src1RLo, ThirtyTwo);
+      _lsr(T_Lo, Src0RLo, Src1RLo);
+      _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+                                                   OperandARM32::LSL, T_C1));
+      OperandARM32::ShiftKind RShiftKind;
+      CondARM32::Cond Pred;
+      if (IsAshr) {
+        _subs(T_C2, Src1RLo, ThirtyTwo);
+        RShiftKind = OperandARM32::ASR;
+        Pred = CondARM32::PL;
+      } else {
+        _sub(T_C2, Src1RLo, ThirtyTwo);
+        RShiftKind = OperandARM32::LSR;
+        Pred = CondARM32::AL;
+      }
+      _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+                                                   RShiftKind, T_C2),
+           Pred);
+      _mov(DestLo, T_Lo);
+      Variable *T_Hi = makeReg(IceType_i32);
+      _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+                                             RShiftKind, Src1RLo));
+      _mov(DestHi, T_Hi);
+    } break;
      case InstArithmetic::Udiv:
      case InstArithmetic::Sdiv:
      case InstArithmetic::Urem:
@@ -1122,13 +1203,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
        _mov(Dest, T);
      } break;
      case InstArithmetic::Shl:
-      UnimplementedError(Func->getContext()->getFlags());
+      _lsl(T, Src0R, Src1);
+      _mov(Dest, T);
        break;
      case InstArithmetic::Lshr:
-      UnimplementedError(Func->getContext()->getFlags());
+      _lsr(T, Src0R, Src1);
+      _mov(Dest, T);
        break;
      case InstArithmetic::Ashr:
-      UnimplementedError(Func->getContext()->getFlags());
+      _asr(T, Src0R, Src1);
+      _mov(Dest, T);
        break;
      case InstArithmetic::Udiv:
        UnimplementedError(Func->getContext()->getFlags());
@@ -1311,20 +1395,123 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
  
  void TargetARM32::lowerCast(const InstCast *Inst) {
    InstCast::OpKind CastKind = Inst->getCastKind();
+  Variable *Dest = Inst->getDest();
+  Operand *Src0 = Inst->getSrc(0);
    switch (CastKind) {
    default:
      Func->setError("Cast type not supported");
      return;
    case InstCast::Sext: {
-    UnimplementedError(Func->getContext()->getFlags());
+    if (isVectorType(Dest->getType())) {
+      UnimplementedError(Func->getContext()->getFlags());
+    } else if (Dest->getType() == IceType_i64) {
+      // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
+      Constant *ShiftAmt = Ctx->getConstantInt32(31);
+      Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+      Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+      Variable *T_Lo = makeReg(DestLo->getType());
+      if (Src0->getType() == IceType_i32) {
+        Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
+        _mov(T_Lo, Src0RF);
+      } else if (Src0->getType() == IceType_i1) {
+        Variable *Src0R = legalizeToVar(Src0);
+        _lsl(T_Lo, Src0R, ShiftAmt);
+        _asr(T_Lo, T_Lo, ShiftAmt);
+      } else {
+        Variable *Src0R = legalizeToVar(Src0);
+        _sxt(T_Lo, Src0R);
+      }
+      _mov(DestLo, T_Lo);
+      Variable *T_Hi = makeReg(DestHi->getType());
+      if (Src0->getType() != IceType_i1) {
+        _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
+                                               OperandARM32::ASR, ShiftAmt));
+      } else {
+        // For i1, the asr instruction is already done above.
+        _mov(T_Hi, T_Lo);
+      }
+      _mov(DestHi, T_Hi);
+    } else if (Src0->getType() == IceType_i1) {
+      // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
+      // lsl t1, src_reg, 31
+      // asr t1, t1, 31
+      // dst = t1
+      Variable *Src0R = legalizeToVar(Src0);
+      Constant *ShiftAmt = Ctx->getConstantInt32(31);
+      Variable *T = makeReg(Dest->getType());
+      _lsl(T, Src0R, ShiftAmt);
+      _asr(T, T, ShiftAmt);
+      _mov(Dest, T);
+    } else {
+      // t1 = sxt src; dst = t1
+      Variable *Src0R = legalizeToVar(Src0);
+      Variable *T = makeReg(Dest->getType());
+      _sxt(T, Src0R);
+      _mov(Dest, T);
+    }
      break;
    }
    case InstCast::Zext: {
-    UnimplementedError(Func->getContext()->getFlags());
+    if (isVectorType(Dest->getType())) {
+      UnimplementedError(Func->getContext()->getFlags());
+    } else if (Dest->getType() == IceType_i64) {
+      // t1=uxtb src; dst.lo=t1; dst.hi=0
+      Constant *Zero = Ctx->getConstantZero(IceType_i32);
+      Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+      Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+      Variable *T_Lo = makeReg(DestLo->getType());
+      // i32 and i1 can just take up the whole register.
+      // i32 doesn't need uxt, while i1 will have an and mask later anyway.
+      if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
+        Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
+        _mov(T_Lo, Src0RF);
+      } else {
+        Variable *Src0R = legalizeToVar(Src0);
+        _uxt(T_Lo, Src0R);
+      }
+      if (Src0->getType() == IceType_i1) {
+        Constant *One = Ctx->getConstantInt32(1);
+        _and(T_Lo, T_Lo, One);
+      }
+      _mov(DestLo, T_Lo);
+      Variable *T_Hi = makeReg(DestLo->getType());
+      _mov(T_Hi, Zero);
+      _mov(DestHi, T_Hi);
+    } else if (Src0->getType() == IceType_i1) {
+      // t = Src0; t &= 1; Dest = t
+      Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
+      Constant *One = Ctx->getConstantInt32(1);
+      Variable *T = makeReg(Dest->getType());
+      // Just use _mov instead of _uxt since all registers are 32-bit.
+      // _uxt requires the source to be a register so could have required
+      // a _mov from legalize anyway.
+      _mov(T, Src0RF);
+      _and(T, T, One);
+      _mov(Dest, T);
+    } else {
+      // t1 = uxt src; dst = t1
+      Variable *Src0R = legalizeToVar(Src0);
+      Variable *T = makeReg(Dest->getType());
+      _uxt(T, Src0R);
+      _mov(Dest, T);
+    }
      break;
    }
    case InstCast::Trunc: {
-    UnimplementedError(Func->getContext()->getFlags());
+    if (isVectorType(Dest->getType())) {
+      UnimplementedError(Func->getContext()->getFlags());
+    } else {
+      Operand *Src0 = Inst->getSrc(0);
+      if (Src0->getType() == IceType_i64)
+        Src0 = loOperand(Src0);
+      Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
+      // t1 = trunc Src0RF; Dest = t1
+      Variable *T = makeReg(Dest->getType());
+      _mov(T, Src0RF);
+      if (Dest->getType() == IceType_i1)
+        _and(T, T, Ctx->getConstantInt1(1));
+      _mov(Dest, T);
+    }
      break;
    }
    case InstCast::Fptrunc:
@@ -1348,6 +1535,12 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
      break;
    }
    case InstCast::Bitcast: {
+    Operand *Src0 = Inst->getSrc(0);
+    if (Dest->getType() == Src0->getType()) {
+      InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
+      lowerAssign(Assign);
+      return;
+    }
      UnimplementedError(Func->getContext()->getFlags());
      break;
    }
@@ -1469,20 +1662,20 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
    //
    // We'll go with the LLVM way for now, since it's shorter and has just as
    // few dependencies.
-  int32_t ShiftAmount = 32 - getScalarIntBitWidth(Src0->getType());
-  assert(ShiftAmount >= 0);
+  int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
+  assert(ShiftAmt >= 0);
    Constant *ShiftConst = nullptr;
    Variable *Src0R = nullptr;
    Variable *T = makeReg(IceType_i32);
-  if (ShiftAmount) {
-    ShiftConst = Ctx->getConstantInt32(ShiftAmount);
+  if (ShiftAmt) {
+    ShiftConst = Ctx->getConstantInt32(ShiftAmt);
      Src0R = makeReg(IceType_i32);
      _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
    } else {
      Src0R = legalizeToVar(Src0);
    }
    _mov(T, Zero);
-  if (ShiftAmount) {
+  if (ShiftAmt) {
      Variable *Src1R = legalizeToVar(Src1);
      OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
          Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h

index 8d282d7..53c9895 100644 (file)
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -153,6 +153,10 @@ protected:
              CondARM32::Cond Pred = CondARM32::AL) {
      Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
    }
+  void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
+            CondARM32::Cond Pred = CondARM32::AL) {
+    Context.insert(InstARM32Asr::create(Func, Dest, Src0, Src1, Pred));
+  }
    void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
      Context.insert(InstARM32Bic::create(Func, Dest, Src0, Src1, Pred));
@@ -181,6 +185,10 @@ protected:
              CondARM32::Cond Pred = CondARM32::AL) {
      Context.insert(InstARM32Lsl::create(Func, Dest, Src0, Src1, Pred));
    }
+  void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
+            CondARM32::Cond Pred = CondARM32::AL) {
+    Context.insert(InstARM32Lsr::create(Func, Dest, Src0, Src1, Pred));
+  }
    void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
              CondARM32::Cond Pred = CondARM32::AL) {
      Context.insert(InstARM32Mla::create(Func, Dest, Src0, Src1, Acc, Pred));
@@ -232,6 +240,10 @@ protected:
      for (Variable *Dest : Dests)
        Context.insert(InstFakeDef::create(Func, Dest));
    }
+  void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
+            CondARM32::Cond Pred = CondARM32::AL) {
+    Context.insert(InstARM32Rsb::create(Func, Dest, Src0, Src1, Pred));
+  }
    void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
              CondARM32::Cond Pred = CondARM32::AL) {
      Context.insert(InstARM32Sbc::create(Func, Dest, Src0, Src1, Pred));
@@ -256,6 +268,10 @@ protected:
      Context.insert(
          InstARM32Sub::create(Func, Dest, Src0, Src1, Pred, SetFlags));
    }
+  void _sxt(Variable *Dest, Variable *Src0,
+            CondARM32::Cond Pred = CondARM32::AL) {
+    Context.insert(InstARM32Sxt::create(Func, Dest, Src0, Pred));
+  }
    void _ret(Variable *LR, Variable *Src0 = nullptr) {
      Context.insert(InstARM32Ret::create(Func, LR, Src0));
    }
@@ -267,6 +283,10 @@ protected:
      // Note that the def is not predicated.
      Context.insert(InstFakeDef::create(Func, DestHi, DestLo));
    }
+  void _uxt(Variable *Dest, Variable *Src0,
+            CondARM32::Cond Pred = CondARM32::AL) {
+    Context.insert(InstARM32Uxt::create(Func, Dest, Src0, Pred));
+  }
  
    bool UsesFramePointer;
    bool NeedsStackAlignment;
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll

index 8346f0e..3e3eb04 100644 (file)
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -340,6 +340,14 @@ entry:
  ; OPTM1: test {{.*}},0x20
  ; OPTM1: je
  
+; ARM32-LABEL: shl64BitSigned
+; ARM32: sub [[REG3:r.*]], [[REG2:r.*]], #32
+; ARM32: lsl [[REG1:r.*]], {{r.*}}, [[REG2]]
+; ARM32: orr [[REG1]], [[REG1]], [[REG0:r.*]], lsl [[REG3]]
+; ARM32: rsb [[REG4:r.*]], [[REG2]], #32
+; ARM32: orr [[REG1]], [[REG1]], [[REG0]], lsr [[REG4]]
+; ARM32: lsl {{.*}}, [[REG0]], [[REG2]]
+
  define internal i32 @shl64BitSignedTrunc(i64 %a, i64 %b) {
  entry:
    %shl = shl i64 %a, %b
@@ -358,6 +366,9 @@ entry:
  ; OPTM1: test {{.*}},0x20
  ; OPTM1: je
  
+; ARM32-LABEL: shl64BitSignedTrunc
+; ARM32: lsl r
+
  define internal i64 @shl64BitUnsigned(i64 %a, i64 %b) {
  entry:
    %shl = shl i64 %a, %b
@@ -375,6 +386,14 @@ entry:
  ; OPTM1: test {{.*}},0x20
  ; OPTM1: je
  
+; ARM32-LABEL: shl64BitUnsigned
+; ARM32: sub
+; ARM32: lsl
+; ARM32: orr
+; ARM32: rsb
+; ARM32: orr
+; ARM32: lsl
+
  define internal i64 @shr64BitSigned(i64 %a, i64 %b) {
  entry:
    %shr = ashr i64 %a, %b
@@ -394,6 +413,14 @@ entry:
  ; OPTM1: je
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: shr64BitSigned
+; ARM32: rsb
+; ARM32: lsr
+; ARM32: orr
+; ARM32: subs
+; ARM32: orrpl
+; ARM32: asr
+
  define internal i32 @shr64BitSignedTrunc(i64 %a, i64 %b) {
  entry:
    %shr = ashr i64 %a, %b
@@ -413,6 +440,13 @@ entry:
  ; OPTM1: je
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: shr64BitSignedTrunc
+; ARM32: rsb
+; ARM32: lsr
+; ARM32: orr
+; ARM32: subs
+; ARM32: orrpl
+
  define internal i64 @shr64BitUnsigned(i64 %a, i64 %b) {
  entry:
    %shr = lshr i64 %a, %b
@@ -430,6 +464,14 @@ entry:
  ; OPTM1: test {{.*}},0x20
  ; OPTM1: je
  
+; ARM32-LABEL: shr64BitUnsigned
+; ARM32: rsb
+; ARM32: lsr
+; ARM32: orr
+; ARM32: sub
+; ARM32: orr
+; ARM32: lsr
+
  define internal i32 @shr64BitUnsignedTrunc(i64 %a, i64 %b) {
  entry:
    %shr = lshr i64 %a, %b
@@ -448,6 +490,13 @@ entry:
  ; OPTM1: test {{.*}},0x20
  ; OPTM1: je
  
+; ARM32-LABEL: shr64BitUnsignedTrunc
+; ARM32: rsb
+; ARM32: lsr
+; ARM32: orr
+; ARM32: sub
+; ARM32: orr
+
  define internal i64 @and64BitSigned(i64 %a, i64 %b) {
  entry:
    %and = and i64 %b, %a
@@ -550,17 +599,20 @@ entry:
  ; ARM32: eor
  ; ARM32: eor
  
-define internal i32 @trunc64To32Signed(i64 %a) {
+define internal i32 @trunc64To32Signed(i64 %padding, i64 %a) {
  entry:
    %conv = trunc i64 %a to i32
    ret i32 %conv
  }
  ; CHECK-LABEL: trunc64To32Signed
-; CHECK: mov     eax,DWORD PTR [esp+0x4]
+; CHECK: mov     eax,DWORD PTR [esp+0xc]
  ;
  ; OPTM1-LABEL: trunc64To32Signed
  ; OPTM1: mov     eax,DWORD PTR [esp+
  
+; ARM32-LABEL: trunc64To32Signed
+; ARM32: mov r0, r2
+
  define internal i32 @trunc64To16Signed(i64 %a) {
  entry:
    %conv = trunc i64 %a to i16
@@ -575,6 +627,9 @@ entry:
  ; OPTM1:      mov     eax,DWORD PTR [esp+
  ; OPTM1: movsx  eax,
  
+; ARM32-LABEL: trunc64To16Signed
+; ARM32: sxth r0, r0
+
  define internal i32 @trunc64To8Signed(i64 %a) {
  entry:
    %conv = trunc i64 %a to i8
@@ -589,6 +644,9 @@ entry:
  ; OPTM1:      mov     eax,DWORD PTR [esp+
  ; OPTM1: movsx  eax,
  
+; ARM32-LABEL: trunc64To8Signed
+; ARM32: sxtb r0, r0
+
  define internal i32 @trunc64To32SignedConst() {
  entry:
    %conv = trunc i64 12345678901234 to i32
@@ -600,6 +658,10 @@ entry:
  ; OPTM1-LABEL: trunc64To32SignedConst
  ; OPTM1: mov eax,0x73ce2ff2
  
+; ARM32-LABEL: trunc64To32SignedConst
+; ARM32: movw r0, #12274 ; 0x2ff2
+; ARM32: movt r0, #29646 ; 0x73ce
+
  define internal i32 @trunc64To16SignedConst() {
  entry:
    %conv = trunc i64 12345678901234 to i16
@@ -614,17 +676,25 @@ entry:
  ; OPTM1: mov eax,0x73ce2ff2
  ; OPTM1: movsx eax,
  
-define internal i32 @trunc64To32Unsigned(i64 %a) {
+; ARM32-LABEL: trunc64To16SignedConst
+; ARM32: movw r0, #12274 ; 0x2ff2
+; ARM32: movt r0, #29646 ; 0x73ce
+; ARM32: sxth r0, r0
+
+define internal i32 @trunc64To32Unsigned(i64 %padding, i64 %a) {
  entry:
    %conv = trunc i64 %a to i32
    ret i32 %conv
  }
  ; CHECK-LABEL: trunc64To32Unsigned
-; CHECK: mov     eax,DWORD PTR [esp+0x4]
+; CHECK: mov     eax,DWORD PTR [esp+0xc]
  ;
  ; OPTM1-LABEL: trunc64To32Unsigned
  ; OPTM1: mov     eax,DWORD PTR [esp+
  
+; ARM32-LABEL: trunc64To32Unsigned
+; ARM32: mov r0, r2
+
  define internal i32 @trunc64To16Unsigned(i64 %a) {
  entry:
    %conv = trunc i64 %a to i16
@@ -639,6 +709,9 @@ entry:
  ; OPTM1:      mov     eax,DWORD PTR [esp+
  ; OPTM1: movzx  eax,
  
+; ARM32-LABEL: trunc64To16Unsigned
+; ARM32: uxth
+
  define internal i32 @trunc64To8Unsigned(i64 %a) {
  entry:
    %conv = trunc i64 %a to i8
@@ -653,6 +726,9 @@ entry:
  ; OPTM1: mov    eax,DWORD PTR [esp+
  ; OPTM1: movzx  eax,
  
+; ARM32-LABEL: trunc64To8Unsigned
+; ARM32: uxtb
+
  define internal i32 @trunc64To1(i64 %a) {
  entry:
  ;  %tobool = icmp ne i64 %a, 0
@@ -670,6 +746,10 @@ entry:
  ; OPTM1:      and     eax,0x1
  ; OPTM1:      and     eax,0x1
  
+; ARM32-LABEL: trunc64To1
+; ARM32: and r0, r0, #1
+; ARM32: and r0, r0, #1
+
  define internal i64 @sext32To64(i32 %a) {
  entry:
    %conv = sext i32 %a to i64
@@ -683,6 +763,9 @@ entry:
  ; OPTM1: mov
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: sext32To64
+; ARM32: asr {{.*}}, #31
+
  define internal i64 @sext16To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i16
@@ -697,6 +780,10 @@ entry:
  ; OPTM1: movsx
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: sext16To64
+; ARM32: sxth
+; ARM32: asr {{.*}}, #31
+
  define internal i64 @sext8To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i8
@@ -711,6 +798,10 @@ entry:
  ; OPTM1: movsx
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: sext8To64
+; ARM32: sxtb
+; ARM32: asr {{.*}}, #31
+
  define internal i64 @sext1To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i1
@@ -727,6 +818,10 @@ entry:
  ; OPTM1: shl {{.*}},0x1f
  ; OPTM1: sar {{.*}},0x1f
  
+; ARM32-LABEL: sext1To64
+; ARM32: lsl {{.*}}, #31
+; ARM32: asr {{.*}}, #31
+
  define internal i64 @zext32To64(i32 %a) {
  entry:
    %conv = zext i32 %a to i64
@@ -740,6 +835,9 @@ entry:
  ; OPTM1: mov
  ; OPTM1: mov {{.*}},0x0
  
+; ARM32-LABEL: zext32To64
+; ARM32: mov {{.*}}, #0
+
  define internal i64 @zext16To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i16
@@ -754,6 +852,10 @@ entry:
  ; OPTM1: movzx
  ; OPTM1: mov {{.*}},0x0
  
+; ARM32-LABEL: zext16To64
+; ARM32: uxth
+; ARM32: mov {{.*}}, #0
+
  define internal i64 @zext8To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i8
@@ -768,6 +870,10 @@ entry:
  ; OPTM1: movzx
  ; OPTM1: mov {{.*}},0x0
  
+; ARM32-LABEL: zext8To64
+; ARM32: uxtb
+; ARM32: mov {{.*}}, #0
+
  define internal i64 @zext1To64(i32 %a) {
  entry:
    %a.arg_trunc = trunc i32 %a to i1
@@ -782,6 +888,10 @@ entry:
  ; OPTM1: and {{.*}},0x1
  ; OPTM1: mov {{.*}},0x0
  
+; ARM32-LABEL: zext1To64
+; ARM32: and {{.*}}, #1
+; ARM32: mov {{.*}}, #0
+
  define internal void @icmpEq64(i64 %a, i64 %b, i64 %c, i64 %d) {
  entry:
    %cmp = icmp eq i64 %a, %b
@@ -1114,6 +1224,10 @@ entry:
  ; OPTM1: jne
  ; OPTM1: je
  
+; ARM32-LABEL: icmpEq64Bool
+; ARM32: moveq
+; ARM32: movne
+
  define internal i32 @icmpNe64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp ne i64 %a, %b
@@ -1128,6 +1242,10 @@ entry:
  ; OPTM1: jne
  ; OPTM1: jne
  
+; ARM32-LABEL: icmpNe64Bool
+; ARM32: movne
+; ARM32: moveq
+
  define internal i32 @icmpSgt64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp sgt i64 %a, %b
@@ -1148,6 +1266,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: ja
  
+; ARM32-LABEL: icmpSgt64Bool
+; ARM32: cmp
+; ARM32: sbcs
+; ARM32: movlt
+; ARM32: movge
+
  define internal i32 @icmpUgt64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp ugt i64 %a, %b
@@ -1168,6 +1292,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: ja
  
+; ARM32-LABEL: icmpUgt64Bool
+; ARM32: cmp
+; ARM32: cmpeq
+; ARM32: movhi
+; ARM32: movls
+
  define internal i32 @icmpSge64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp sge i64 %a, %b
@@ -1188,6 +1318,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jae
  
+; ARM32-LABEL: icmpSge64Bool
+; ARM32: cmp
+; ARM32: sbcs
+; ARM32: movge
+; ARM32: movlt
+
  define internal i32 @icmpUge64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp uge i64 %a, %b
@@ -1208,6 +1344,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jae
  
+; ARM32-LABEL: icmpUge64Bool
+; ARM32: cmp
+; ARM32: cmpeq
+; ARM32: movcs
+; ARM32: movcc
+
  define internal i32 @icmpSlt64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp slt i64 %a, %b
@@ -1228,6 +1370,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jb
  
+; ARM32-LABEL: icmpSlt64Bool
+; ARM32: cmp
+; ARM32: sbcs
+; ARM32: movlt
+; ARM32: movge
+
  define internal i32 @icmpUlt64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp ult i64 %a, %b
@@ -1248,6 +1396,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jb
  
+; ARM32-LABEL: icmpUlt64Bool
+; ARM32: cmp
+; ARM32: cmpeq
+; ARM32: movcc
+; ARM32: movcs
+
  define internal i32 @icmpSle64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp sle i64 %a, %b
@@ -1268,6 +1422,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jbe
  
+; ARM32-LABEL: icmpSle64Bool
+; ARM32: cmp
+; ARM32: sbcs
+; ARM32: movge
+; ARM32: movlt
+
  define internal i32 @icmpUle64Bool(i64 %a, i64 %b) {
  entry:
    %cmp = icmp ule i64 %a, %b
@@ -1288,6 +1448,12 @@ entry:
  ; OPTM1: cmp
  ; OPTM1: jbe
  
+; ARM32-LABEL: icmpUle64Bool
+; ARM32: cmp
+; ARM32: cmpeq
+; ARM32: movls
+; ARM32: movhi
+
  define internal i64 @load64(i32 %a) {
  entry:
    %__1 = inttoptr i32 %a to i64*
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll

index 1676b5c..1539b42 100644 (file)
--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -1,7 +1,23 @@
  ; Simple test of signed and unsigned integer conversions.
  
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -O2 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -Om1 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
  
  @i8v = internal global [1 x i8] zeroinitializer, align 1
  @i16v = internal global [2 x i8] zeroinitializer, align 2
@@ -38,6 +54,21 @@ entry:
  ; CHECK-DAG: ds:0x0,{{.*}}i64v
  ; CHECK-DAG: ds:0x4,{{.*}}i64v
  
+; ARM32-LABEL: from_int8
+; ARM32: movw {{.*}}i8v
+; ARM32: ldrb
+; ARM32: sxtb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: sxtb
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
+; ARM32: sxtb
+; ARM32: asr
+; ARM32: movw {{.*}}i64v
+; ARM32-DAG: str r{{.*}}, [r{{[0-9]+}}]
+; ARM32-DAG: str r{{.*}}, [{{.*}}, #4]
+
  define void @from_int16() {
  entry:
    %__0 = bitcast [2 x i8]* @i16v to i16*
@@ -62,6 +93,19 @@ entry:
  ; CHECK: sar {{.*}},0x1f
  ; CHECK: 0x0,{{.*}}i64v
  
+; ARM32-LABEL: from_int16
+; ARM32: movw {{.*}}i16v
+; ARM32: ldrh
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: sxth
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
+; ARM32: sxth
+; ARM32: asr
+; ARM32: movw {{.*}}i64v
+; ARM32: str r
+
  define void @from_int32() {
  entry:
    %__0 = bitcast [4 x i8]* @i32v to i32*
@@ -84,6 +128,17 @@ entry:
  ; CHECK: sar {{.*}},0x1f
  ; CHECK: 0x0,{{.*}} i64v
  
+; ARM32-LABEL: from_int32
+; ARM32: movw {{.*}}i32v
+; ARM32: ldr r
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: asr
+; ARM32: movw {{.*}}i64v
+; ARM32: str r
+
  define void @from_int64() {
  entry:
    %__0 = bitcast [8 x i8]* @i64v to i64*
@@ -105,6 +160,15 @@ entry:
  ; CHECK: 0x0,{{.*}} i16v
  ; CHECK: 0x0,{{.*}} i32v
  
+; ARM32-LABEL: from_int64
+; ARM32: movw {{.*}}i64v
+; ARM32: ldr r
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
  
  define void @from_uint8() {
  entry:
@@ -131,6 +195,20 @@ entry:
  ; CHECK: mov {{.*}},0x0
  ; CHECK: 0x0,{{.*}} i64v
  
+; ARM32-LABEL: from_uint8
+; ARM32: movw {{.*}}u8v
+; ARM32: ldrb
+; ARM32: uxtb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: uxtb
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
+; ARM32: uxtb
+; ARM32: mov {{.*}}, #0
+; ARM32: movw {{.*}}i64v
+; ARM32: str r
+
  define void @from_uint16() {
  entry:
    %__0 = bitcast [2 x i8]* @u16v to i16*
@@ -155,6 +233,19 @@ entry:
  ; CHECK: mov {{.*}},0x0
  ; CHECK: 0x0,{{.*}} i64v
  
+; ARM32-LABEL: from_uint16
+; ARM32: movw {{.*}}u16v
+; ARM32: ldrh
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: uxth
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
+; ARM32: uxth
+; ARM32: mov {{.*}}, #0
+; ARM32: movw {{.*}}i64v
+; ARM32: str r
+
  define void @from_uint32() {
  entry:
    %__0 = bitcast [4 x i8]* @u32v to i32*
@@ -177,6 +268,17 @@ entry:
  ; CHECK: mov {{.*}},0x0
  ; CHECK: 0x0,{{.*}} i64v
  
+; ARM32-LABEL: from_uint32
+; ARM32: movw {{.*}}u32v
+; ARM32: ldr r
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: mov {{.*}}, #0
+; ARM32: movw {{.*}}i64v
+; ARM32: str r
+
  define void @from_uint64() {
  entry:
    %__0 = bitcast [8 x i8]* @u64v to i64*
@@ -197,3 +299,13 @@ entry:
  ; CHECK: 0x0,{{.*}} i8v
  ; CHECK: 0x0,{{.*}} i16v
  ; CHECK: 0x0,{{.*}} i32v
+
+; ARM32-LABEL: from_uint64
+; ARM32: movw {{.*}}u64v
+; ARM32: ldr r
+; ARM32: movw {{.*}}i8v
+; ARM32: strb
+; ARM32: movw {{.*}}i16v
+; ARM32: strh
+; ARM32: movw {{.*}}i32v
+; ARM32: str r
diff --git a/tests_lit/llvm2ice_tests/shift.ll b/tests_lit/llvm2ice_tests/shift.ll

index 1fd77a5..ecf5fee 100644 (file)
--- a/tests_lit/llvm2ice_tests/shift.ll
+++ b/tests_lit/llvm2ice_tests/shift.ll
@@ -1,10 +1,25 @@
  ; This is a test of C-level conversion operations that clang lowers
  ; into pairs of shifts.
  
-; RUN: %p2i -i %s --filetype=obj --disassemble --no-local-syms --args -O2 \
-; RUN:   | FileCheck %s
-; RUN: %p2i -i %s --filetype=obj --disassemble --no-local-syms --args -Om1 \
-; RUN:   | FileCheck %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -O2 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -Om1 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+
  
  @i1 = internal global [4 x i8] zeroinitializer, align 4
  @i2 = internal global [4 x i8] zeroinitializer, align 4
@@ -24,16 +39,24 @@ entry:
  ; CHECK: shl {{.*}},0x18
  ; CHECK: sar {{.*}},0x18
  
+; ARM32-LABEL: conv1
+; ARM32: lsl {{.*}}, #24
+; ARM32: asr {{.*}}, #24
+
  define void @conv2() {
  entry:
    %__0 = bitcast [4 x i8]* @u1 to i32*
    %v0 = load i32, i32* %__0, align 1
    %sext1 = shl i32 %v0, 16
-  %v1 = ashr i32 %sext1, 16
+  %v1 = lshr i32 %sext1, 16
    %__4 = bitcast [4 x i8]* @i2 to i32*
    store i32 %v1, i32* %__4, align 1
    ret void
  }
  ; CHECK-LABEL: conv2
  ; CHECK: shl {{.*}},0x10
-; CHECK: sar {{.*}},0x10
+; CHECK: shr {{.*}},0x10
+
+; ARM32-LABEL: conv2
+; ARM32: lsl {{.*}}, #16
+; ARM32: lsr {{.*}}, #16
diff --git a/tests_lit/llvm2ice_tests/test_i1.ll b/tests_lit/llvm2ice_tests/test_i1.ll

index 65083b6..6563e90 100644 (file)
--- a/tests_lit/llvm2ice_tests/test_i1.ll
+++ b/tests_lit/llvm2ice_tests/test_i1.ll
@@ -1,7 +1,24 @@
  ; Tests various aspects of i1 related lowering.
  
-; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
-; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -O2 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
+; RUN:   --target x8632 -i %s --args -Om1 \
+; RUN:   | %if --need=target_X8632 --command FileCheck %s
+
+; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
+; once enough infrastructure is in. Also, switch to --filetype=obj
+; when possible.
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+; RUN: %if --need=target_ARM32 --command %p2i --filetype=asm --assemble \
+; RUN:   --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
+; RUN:   | %if --need=target_ARM32 --command FileCheck --check-prefix ARM32 %s
+
+; TODO(jvoung): test this.
  
  ; Test that and with true uses immediate 1, not -1.
  define internal i32 @testAndTrue(i32 %arg) {
@@ -13,6 +30,8 @@ entry:
  }
  ; CHECK-LABEL: testAndTrue
  ; CHECK: and {{.*}},0x1
+; ARM32-LABEL: testAndTrue
+; ARM32: and {{.*}}, #1
  
  ; Test that or with true uses immediate 1, not -1.
  define internal i32 @testOrTrue(i32 %arg) {
@@ -24,6 +43,8 @@ entry:
  }
  ; CHECK-LABEL: testOrTrue
  ; CHECK: or {{.*}},0x1
+; ARM32-LABEL: testOrTrue
+; ARM32: orr {{.*}}, #1
  
  ; Test that xor with true uses immediate 1, not -1.
  define internal i32 @testXorTrue(i32 %arg) {
@@ -35,6 +56,8 @@ entry:
  }
  ; CHECK-LABEL: testXorTrue
  ; CHECK: xor {{.*}},0x1
+; ARM32-LABEL: testXorTrue
+; ARM32: eor {{.*}}, #1
  
  ; Test that trunc to i1 masks correctly.
  define internal i32 @testTrunc(i32 %arg) {
@@ -45,6 +68,8 @@ entry:
  }
  ; CHECK-LABEL: testTrunc
  ; CHECK: and {{.*}},0x1
+; ARM32-LABEL: testTrunc
+; ARM32: and {{.*}}, #1
  
  ; Test zext to i8.
  define internal i32 @testZextI8(i32 %arg) {
@@ -59,6 +84,9 @@ entry:
  ; CHECK: and {{.*}},0x1
  ; match the zext i1 instruction (NOTE: no mov need between i1 and i8).
  ; CHECK: and {{.*}},0x1
+; ARM32-LABEL: testZextI8
+; ARM32: and {{.*}}, #1
+; ARM32: and {{.*}}, #1
  
  ; Test zext to i16.
  define internal i32 @testZextI16(i32 %arg) {
@@ -75,6 +103,12 @@ entry:
  ; CHECK: movzx [[REG:e.*]],{{[a-d]l|BYTE PTR}}
  ; CHECK: and [[REG]],0x1
  
+; ARM32-LABEL: testZextI16
+; match the trunc instruction
+; ARM32: and {{.*}}, #1
+; match the zext (no need to uxt into a reg if src is already in a reg)
+; ARM32: and {{.*}}, #1
+
  ; Test zext to i32.
  define internal i32 @testZextI32(i32 %arg) {
  entry:
@@ -88,6 +122,9 @@ entry:
  ; match the zext i1 instruction
  ; CHECK: movzx
  ; CHECK: and {{.*}},0x1
+; ARM32-LABEL: testZextI32
+; ARM32: and {{.*}}, #1
+; ARM32: and {{.*}}, #1
  
  ; Test zext to i64.
  define internal i64 @testZextI64(i32 %arg) {
@@ -103,6 +140,10 @@ entry:
  ; CHECK: movzx
  ; CHECK: and {{.*}},0x1
  ; CHECK: mov {{.*}},0x0
+; ARM32-LABEL: testZextI64
+; ARM32: and {{.*}}, #1
+; ARM32: and {{.*}}, #1
+; ARM32: mov {{.*}}, #0
  
  ; Test sext to i8.
  define internal i32 @testSextI8(i32 %arg) {
@@ -118,6 +159,14 @@ entry:
  ; match the sext i1 instruction
  ; CHECK: shl [[REG:.*]],0x7
  ; CHECK-NEXT: sar [[REG]],0x7
+;
+; ARM shifts by 32, since there aren't any byte regs.
+; ARM32-LABEL: testSextI8
+; match the trunc instruction
+; ARM32: and {{.*}}, #1
+; match the sext i1 instruction
+; ARM32: lsl {{.*}}, #31
+; ARM32-NEXT: asr {{.*}}, #31
  
  ; Test sext to i16.
  define internal i32 @testSextI16(i32 %arg) {
@@ -135,6 +184,11 @@ entry:
  ; CHECK-NEXT: shl [[REG]],0xf
  ; CHECK-NEXT: sar [[REG]],0xf
  
+; ARM32-LABEL: testSextI16
+; ARM32: and {{.*}}, #1
+; ARM32: lsl {{.*}}, #31
+; ARM32-NEXT: asr {{.*}}, #31
+
  ; Test sext to i32.
  define internal i32 @testSextI32(i32 %arg) {
  entry:
@@ -150,6 +204,11 @@ entry:
  ; CHECK-NEXT: shl [[REG]],0x1f
  ; CHECK-NEXT: sar [[REG]],0x1f
  
+; ARM32-LABEL: testSextI32
+; ARM32: and {{.*}}, #1
+; ARM32: lsl {{.*}}, #31
+; ARM32-NEXT: asr {{.*}}, #31
+
  ; Test sext to i64.
  define internal i64 @testSextI64(i32 %arg) {
  entry:
@@ -165,6 +224,40 @@ entry:
  ; CHECK-NEXT: shl [[REG]],0x1f
  ; CHECK-NEXT: sar [[REG]],0x1f
  
+; ARM32-LABEL: testSextI64
+; ARM32: and {{.*}}, #1
+; ARM32: lsl {{.*}}, #31
+; ARM32-NEXT: asr [[REG:r.*]], {{.*}}, #31
+; ARM32-NEXT: {{(mov|str).*}} [[REG]]
+
+; Kind of like sext i1 to i32, but with an immediate source. On ARM,
+; sxtb cannot take an immediate operand, so make sure it's using a reg.
+; If we had optimized constants, this could just be mov dst, 0xffffffff
+; or mvn dst, #0.
+define internal i32 @testSextTrue() {
+  %result = sext i1 true to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testSextTrue
+; CHECK: movzx
+; CHECK-NEXT: shl
+; CHECK-NEXT: sar
+; ARM32-LABEL: testSextTrue
+; ARM32: mov{{.*}}, #1
+; ARM32: lsl
+; ARM32: asr
+
+define internal i32 @testZextTrue() {
+  %result = zext i1 true to i32
+  ret i32 %result
+}
+; CHECK-LABEL: testZextTrue
+; CHECK: movzx
+; CHECK: and {{.*}},0x1
+; ARM32-LABEL: testZextTrue
+; ARM32: mov{{.*}}, #1
+; ARM32: and {{.*}}, #1
+
  ; Test fptosi float to i1.
  define internal i32 @testFptosiFloat(float %arg) {
  entry:
author	Jan Voung <jvoung@chromium.org>
	Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)
committer	Jan Voung <jvoung@chromium.org>
	Fri, 5 Jun 2015 00:02:31 +0000 (17:02 -0700)
src/IceInstARM32.cpp		patch \| blob \| history
src/IceInstARM32.h		patch \| blob \| history
src/IceTargetLoweringARM32.cpp		patch \| blob \| history
src/IceTargetLoweringARM32.h		patch \| blob \| history
tests_lit/llvm2ice_tests/64bit.pnacl.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/convert.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/shift.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/test_i1.ll		patch \| blob \| history