Add option to force filetype=asm for testing

author David Sehr <sehr@chromium.org>

Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)

committer David Sehr <sehr@chromium.org>

Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)
author David Sehr <sehr@chromium.org>
Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)
committer David Sehr <sehr@chromium.org>
Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)
diff --git a/Makefile.standalone b/Makefile.standalone

index a9a9795..1f3a90d 100644 (file)
--- a/Makefile.standalone
+++ b/Makefile.standalone
@@ -127,6 +127,19 @@ ifdef MSAN
    LD_EXTRA += -fsanitize=memory
  endif
  
+ifdef FORCEASM
+  FORCEASM_FLAG = --filetype=asm
+  # With --filetype=asm and --sandbox, the llvm-mc assembler emits the lock and
+  # 16-bit prefixes in the "wrong" order, causing the validator to reject the
+  # resulting nexe.  So we just disable those tests for now.
+  FORCEASM_XTEST_EXCLUDES = -e x8632,sandbox,test_sync_atomic
+  FORCEASM_LIT_PARAM = --param=FORCEASM
+else
+  FORCEASM_FLAG =
+  FORCEASM_XTEST_EXCLUDES =
+  FORCEASM_LIT_PARAM =
+endif
+
  SB_OBJDIR := $(OBJDIR)+Sandboxed
  
  $(info -----------------------------------------------)
@@ -397,7 +410,7 @@ runtime.is.built: $(RT_SRC) pydir/build-runtime.py
  
  check-lit: $(OBJDIR)/pnacl-sz make_symlink
         PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
-       $(LLVM_SRC_PATH)/utils/lit/lit.py -sv tests_lit
+       $(LLVM_SRC_PATH)/utils/lit/lit.py -sv tests_lit $(FORCEASM_LIT_PARAM)
  
  ifdef MINIMAL
  check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
@@ -410,6 +423,8 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
         # TODO(jpp): reenable the x86-64 tests.
         ./pydir/crosstest_generator.py -v --lit \
            --toolchain-root $(TOOLCHAIN_ROOT) \
+          $(FORCEASM_FLAG) \
+          $(FORCEASM_XTEST_EXCLUDES) \
            -i x8632,native,sse2 \
            -i x8632,native,sse4.1,test_vector_ops \
            -i x8632,sandbox,sse4.1,Om1 \
@@ -475,7 +490,7 @@ check-presubmit presubmit:
  # Check the x86 assembler unit tests.
         +make -f Makefile.standalone \
            DEBUG=1 CHECK_X86_ASM=1 check-unit
-# Run spec2k for x86-32.
+# Run lit tests, cross tests, unit tests, and spec2k/x86-32.
         +make -f Makefile.standalone \
            check check-spec
  # Build spec2k under -Om1/x86-32, to check for liveness errors.
@@ -484,6 +499,9 @@ check-presubmit presubmit:
  # Run spec2k for x86-32 without advanced phi lowering.
         +make -f Makefile.standalone \
            SPECFLAGS='--sz=--phi-edge-split=0' check-spec
+# Run cross tests and lit tests to validate filetype=asm output.
+       +make -f Makefile.standalone \
+          FORCEASM=1 check-xtest check-lit
  # Build spec2k for arm32.
         +make -f Makefile.standalone \
            TARGET=arm32 SPECBUILDONLY=true check-spec
diff --git a/pydir/crosstest_generator.py b/pydir/crosstest_generator.py

index 31a1cf7..e478e1c 100755 (executable)
--- a/pydir/crosstest_generator.py
+++ b/pydir/crosstest_generator.py
@@ -107,6 +107,8 @@ def main():
                             '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin'
                           ).format(root=root),
                           help='Path to toolchain binaries.')
+  argparser.add_argument('--filetype', default=None, dest='filetype',
+                         help='File type override, one of {asm, iasm, obj}.')
    args = argparser.parse_args()
  
    # Run from the crosstest directory to make it easy to grab inputs.
@@ -152,6 +154,8 @@ def main():
                  attr=attr)
                extra = (tests.get(test, 'flags').split(' ')
                         if tests.has_option(test, 'flags') else [])
+              if args.filetype:
+                extra += ['--filetype={ftype}'.format(ftype=args.filetype)]
                # Generate the compile command.
                cmp_cmd = (
                  ['{path}/crosstest.py'.format(path=pypath),
diff --git a/pydir/run-pnacl-sz.py b/pydir/run-pnacl-sz.py

index b7434ad..d75acc5 100755 (executable)
--- a/pydir/run-pnacl-sz.py
+++ b/pydir/run-pnacl-sz.py
@@ -40,6 +40,8 @@ def main():
          formatter_class=argparse.ArgumentDefaultsHelpFormatter)
      argparser.add_argument('--input', '-i', required=True,
                             help='LLVM source file to compile')
+    argparser.add_argument('--output', '-o', required=False,
+                           help='Output file to write')
      argparser.add_argument('--insts', required=False,
                             action='store_true',
                             help='Stop after translating to ' +
@@ -77,10 +79,12 @@ def main():
                             help='Add a disassembler flag')
      argparser.add_argument('--filetype', default='iasm', dest='filetype',
                             choices=['obj', 'asm', 'iasm'],
-                           help='Output file type.  Default %(default)s.')
+                           help='Output file type.  Default %(default)s')
+    argparser.add_argument('--forceasm', required=False, action='store_true',
+                           help='Force --filetype=asm')
      argparser.add_argument('--target', default='x8632', dest='target',
                             choices=['x8632','arm32','mips32'],
-                           help='Target architecture.  Default %(default)s.')
+                           help='Target architecture.  Default %(default)s')
      argparser.add_argument('--echo-cmd', required=False,
                             action='store_true',
                             help='Trace command that generates ICE instructions')
@@ -92,7 +96,7 @@ def main():
                             default=[],
                             help='Remaining arguments are passed to pnacl-sz')
      argparser.add_argument('--sandbox', required=False, action='store_true',
-                           help='Sandboxes the generated code.')
+                           help='Sandboxes the generated code')
  
      args = argparser.parse_args()
      pnacl_bin_path = args.pnacl_bin_path
@@ -111,6 +115,16 @@ def main():
      if args.llvm and args.tbc:
        raise RuntimeError("Can't specify both '--tbc' and '--llvm'")
  
+    if args.forceasm:
+      if args.filetype == 'asm':
+        pass
+      elif args.filetype == 'iasm':
+        # TODO(sehr) implement forceasm for iasm.
+        pass
+      elif args.filetype == 'obj':
+        args.filetype = 'asm'
+        args.assemble = True
+
      cmd = []
      if args.tbc:
        cmd = [os.path.join(pnacl_bin_path, 'pnacl-bcfuzz'), llfile,
@@ -141,34 +155,54 @@ def main():
      else:
        cmd += ['--build-on-read=1']
      cmd += ['--filetype=' + args.filetype]
+    script_name = os.path.basename(sys.argv[0])
+    for _, arg in enumerate(args.args):
+      # Redirecting the output file needs to be done through the script
+      # because forceasm may introduce a new temporary file between pnacl-sz
+      # and llvm-mc.  Similar issues could occur when setting filetype, target,
+      # or sandbox through --args.  Filter and report an error.
+      if re.search('^-?-(o|output|filetype|target|sandbox)(=.+)?$', arg):
+        preferred_option = '--output' if re.search('^-?-o(=.+)?$', arg) else arg
+        print 'Option should be set using:'
+        print '    %s ... %s ... --args' % (script_name, preferred_option)
+        print 'rather than:'
+        print '    %s ... --args %s ...' % (script_name, arg)
+        exit(1)
+    asm_temp = None
+    output_file_name = None
+    keep_output_file = False
+    if args.output:
+      output_file_name = args.output
+      keep_output_file = True
      cmd += args.args
      if args.llvm_source:
        cmd += [llfile]
-    asm_temp = None
      if args.assemble or args.disassemble:
-      # On windows we may need to close the file first before it can be
-      # re-opened by the other tools, so don't do delete-on-close,
-      # and instead manually delete.
-      asm_temp = tempfile.NamedTemporaryFile(delete=False)
-      asm_temp.close()
+      if not output_file_name:
+        # On windows we may need to close the file first before it can be
+        # re-opened by the other tools, so don't do delete-on-close,
+        # and instead manually delete.
+        asm_temp = tempfile.NamedTemporaryFile(delete=False)
+        asm_temp.close()
+        output_file_name = asm_temp.name
      if args.assemble and args.filetype != 'obj':
        cmd += (['|', os.path.join(pnacl_bin_path, 'llvm-mc')] +
                TargetAssemblerFlags(args.target, args.sandbox) +
-              ['-filetype=obj', '-o', asm_temp.name])
-    elif asm_temp:
-      cmd += ['-o', asm_temp.name]
+              ['-filetype=obj', '-o', output_file_name])
+    elif output_file_name:
+      cmd += ['-o', output_file_name]
      if args.disassemble:
        # Show wide instruction encodings, diassemble, and show relocs.
        cmd += (['&&', os.path.join(pnacl_bin_path, 'le32-nacl-objdump')] +
                args.dis_flags +
                ['-w', '-d', '-r'] + TargetDisassemblerFlags(args.target) +
-              [asm_temp.name])
+              [output_file_name])
  
      stdout_result = shellcmd(cmd, echo=args.echo_cmd)
      if not args.echo_cmd:
        sys.stdout.write(stdout_result)
-    if asm_temp:
-      os.remove(asm_temp.name)
+    if asm_temp and not keep_output_file:
+      os.remove(output_file_name)
  
  if __name__ == '__main__':
      main()
diff --git a/runtime/szrt_ll.ll b/runtime/szrt_ll.ll

index 1ecfffe..04e3fb5 100644 (file)
--- a/runtime/szrt_ll.ll
+++ b/runtime/szrt_ll.ll
@@ -26,16 +26,18 @@ entry:
    ret <4 x i32> %0
  }
  
-define i8 @__Sz_bitcast_8xi1_i8(<8 x i1> %a) {
+define i32 @__Sz_bitcast_8xi1_i8(<8 x i1> %a) {
  entry:
    %0 = bitcast <8 x i1> %a to i8
-  ret i8 %0
+  %ret = zext i8 %0 to i32
+  ret i32 %ret
  }
  
-define i16 @__Sz_bitcast_16xi1_i16(<16 x i1> %a) {
+define i32 @__Sz_bitcast_16xi1_i16(<16 x i1> %a) {
  entry:
    %0 = bitcast <16 x i1> %a to i16
-  ret i16 %0
+  %ret = zext i16 %0 to i32
+  ret i32 %ret
  }
  
  define <8 x i1> @__Sz_bitcast_i8_8xi1(i8 %a) {
diff --git a/src/IceInstX86Base.h b/src/IceInstX86Base.h

index c4a9fe4..2b7ac3f 100644 (file)
--- a/src/IceInstX86Base.h
+++ b/src/IceInstX86Base.h
@@ -823,26 +823,24 @@ template <typename TraitsType> struct InstImpl {
        if (!BuildDefs::dump())
          return;
        this->validateVectorAddrMode();
+      const Type DestTy = ArithmeticTypeOverride == IceType_void
+                              ? this->getDest()->getType()
+                              : ArithmeticTypeOverride;
+      const char *SuffixString = "";
        switch (Suffix) {
        case InstX86Base::SseSuffix::None:
-        this->emitTwoAddress(Func, Opcode);
          break;
-      case InstX86Base::SseSuffix::Packed: {
-        const Type DestTy = this->getDest()->getType();
-        this->emitTwoAddress(Func, this->Opcode,
-                             Traits::TypeAttributes[DestTy].PdPsString);
-      } break;
-      case InstX86Base::SseSuffix::Scalar: {
-        const Type DestTy = this->getDest()->getType();
-        this->emitTwoAddress(Func, this->Opcode,
-                             Traits::TypeAttributes[DestTy].SdSsString);
-      } break;
-      case InstX86Base::SseSuffix::Integral: {
-        const Type DestTy = this->getDest()->getType();
-        this->emitTwoAddress(Func, this->Opcode,
-                             Traits::TypeAttributes[DestTy].PackString);
-      } break;
+      case InstX86Base::SseSuffix::Packed:
+        SuffixString = Traits::TypeAttributes[DestTy].PdPsString;
+        break;
+      case InstX86Base::SseSuffix::Scalar:
+        SuffixString = Traits::TypeAttributes[DestTy].SdSsString;
+        break;
+      case InstX86Base::SseSuffix::Integral:
+        SuffixString = Traits::TypeAttributes[DestTy].PackString;
+        break;
        }
+      this->emitTwoAddress(Func, Opcode, SuffixString);
      }
      void emitIAS(const Cfg *Func) const override {
        this->validateVectorAddrMode();
@@ -865,12 +863,15 @@ template <typename TraitsType> struct InstImpl {
      }
  
    protected:
-    InstX86BaseBinopXmm(Cfg *Func, Variable *Dest, Operand *Source)
-        : InstX86Base(Func, K, 2, Dest) {
+    InstX86BaseBinopXmm(Cfg *Func, Variable *Dest, Operand *Source,
+                        Type ArithmeticTypeOverride = IceType_void)
+        : InstX86Base(Func, K, 2, Dest),
+          ArithmeticTypeOverride(ArithmeticTypeOverride) {
        this->addSource(Dest);
        this->addSource(Source);
      }
  
+    const Type ArithmeticTypeOverride;
      static const char *Opcode;
      static const XmmEmitterRegOp Emitter;
    };
@@ -1914,16 +1915,25 @@ template <typename TraitsType> struct InstImpl {
        : public InstX86BaseBinopXmm<InstX86Base::Pcmpeq, true,
                                     InstX86Base::SseSuffix::Integral> {
    public:
-    static InstX86Pcmpeq *create(Cfg *Func, Variable *Dest, Operand *Source) {
+    static InstX86Pcmpeq *create(Cfg *Func, Variable *Dest, Operand *Source,
+                                 Type ArithmeticTypeOverride = IceType_void) {
+      const Type Ty = ArithmeticTypeOverride == IceType_void
+                          ? Dest->getType()
+                          : ArithmeticTypeOverride;
+      (void)Ty;
+      assert((Ty != IceType_f64 && Ty != IceType_i64) ||
+             InstX86Base::getTarget(Func)->getInstructionSet() >=
+                 Traits::SSE4_1);
        return new (Func->allocate<InstX86Pcmpeq>())
-          InstX86Pcmpeq(Func, Dest, Source);
+          InstX86Pcmpeq(Func, Dest, Source, ArithmeticTypeOverride);
      }
  
    private:
-    InstX86Pcmpeq(Cfg *Func, Variable *Dest, Operand *Source)
+    InstX86Pcmpeq(Cfg *Func, Variable *Dest, Operand *Source,
+                  Type ArithmeticTypeOverride)
          : InstX86BaseBinopXmm<InstX86Base::Pcmpeq, true,
-                              InstX86Base::SseSuffix::Integral>(Func, Dest,
-                                                                Source) {}
+                              InstX86Base::SseSuffix::Integral>(
+              Func, Dest, Source, ArithmeticTypeOverride) {}
    };
  
    class InstX86Pcmpgt
@@ -1931,6 +1941,9 @@ template <typename TraitsType> struct InstImpl {
                                     InstX86Base::SseSuffix::Integral> {
    public:
      static InstX86Pcmpgt *create(Cfg *Func, Variable *Dest, Operand *Source) {
+      assert(Dest->getType() != IceType_f64 ||
+             InstX86Base::getTarget(Func)->getInstructionSet() >=
+                 Traits::SSE4_1);
        return new (Func->allocate<InstX86Pcmpgt>())
            InstX86Pcmpgt(Func, Dest, Source);
      }
diff --git a/src/IceRegAlloc.cpp b/src/IceRegAlloc.cpp

index c25ec83..38ccaa0 100644 (file)
--- a/src/IceRegAlloc.cpp
+++ b/src/IceRegAlloc.cpp
@@ -904,8 +904,8 @@ void LinearScan::scan(const llvm::SmallBitVector &RegMaskFull,
        Ostream &Str = Ctx->getStrDump();
        for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
          if (Iter.RegMask[i]) {
-          Str << Target->getRegName(i, IceType_i32) << "(U=" << RegUses[i]
-              << ",F=" << Iter.Free[i]
+          Str << Target->getRegName(i, Iter.Cur->getType())
+              << "(U=" << RegUses[i] << ",F=" << Iter.Free[i]
                << ",P=" << Iter.PrecoloredUnhandledMask[i] << ") ";
          }
        }
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 8e78228..4a64b4a 100644 (file)
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -219,16 +219,17 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
    Variable *ReturnReg = nullptr;
    Variable *ReturnRegHi = nullptr;
    if (Dest) {
-    switch (Dest->getType()) {
+    const Type DestTy = Dest->getType();
+    switch (DestTy) {
      case IceType_NUM:
      case IceType_void:
-      llvm::report_fatal_error("Invalid Call dest type");
-      break;
      case IceType_i1:
      case IceType_i8:
      case IceType_i16:
+      llvm::report_fatal_error("Invalid Call dest type");
+      break;
      case IceType_i32:
-      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
+      ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
        break;
      case IceType_i64:
        ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
@@ -246,7 +247,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
      case IceType_v8i16:
      case IceType_v4i32:
      case IceType_v4f32:
-      ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
+      ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
        break;
      }
    }
@@ -303,10 +304,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
        _mov(DestLo, ReturnReg);
        _mov(DestHi, ReturnRegHi);
      } else {
-      assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
-             Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
-             isVectorType(Dest->getType()));
-      if (isVectorType(Dest->getType())) {
+      const Type DestTy = Dest->getType();
+      assert(DestTy == IceType_i32 || DestTy == IceType_i16 ||
+             DestTy == IceType_i8 || DestTy == IceType_i1 ||
+             isVectorType(DestTy));
+      if (isVectorType(DestTy)) {
          _movp(Dest, ReturnReg);
        } else {
          _mov(Dest, ReturnReg);
@@ -352,19 +354,21 @@ void TargetX8632::lowerRet(const InstRet *Inst) {
    Variable *Reg = nullptr;
    if (Inst->hasRetValue()) {
      Operand *Src0 = legalize(Inst->getRetValue());
+    const Type Src0Ty = Src0->getType();
      // TODO(jpp): this is not needed.
-    if (Src0->getType() == IceType_i64) {
+    if (Src0Ty == IceType_i64) {
        Variable *eax =
            legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
        Variable *edx =
            legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
        Reg = eax;
        Context.insert<InstFakeUse>(edx);
-    } else if (isScalarFloatingType(Src0->getType())) {
+    } else if (isScalarFloatingType(Src0Ty)) {
        _fld(Src0);
-    } else if (isVectorType(Src0->getType())) {
+    } else if (isVectorType(Src0Ty)) {
        Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
      } else {
+      assert(Src0Ty == IceType_i32);
        _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
      }
    }
diff --git a/src/IceTargetLoweringX8632Traits.h b/src/IceTargetLoweringX8632Traits.h

index c912160..0216d18 100644 (file)
--- a/src/IceTargetLoweringX8632Traits.h
+++ b/src/IceTargetLoweringX8632Traits.h
@@ -367,8 +367,19 @@ struct TargetX8632Traits {
      assert(RegNum != Variable::NoRegister);
      // TODO(stichnot): Rewrite this as a table lookup from a table computed in a
      // TargetLowering static initializer.
+    // [abcd]h registers are not convertible to their ?l, ?x, and e?x versions.
+    switch (RegNum) {
+    default:
+      break;
+    case RegisterSet::Reg_ah:
+    case RegisterSet::Reg_bh:
+    case RegisterSet::Reg_ch:
+    case RegisterSet::Reg_dh:
+      assert(isByteSizedType(Ty));
+      return RegNum;
+    }
      RegNum = getBaseReg(RegNum);
-    if (Ty == IceType_i8 || Ty == IceType_i1) {
+    if (isByteSizedType(Ty)) {
        switch (RegNum) {
        default:
          assert(0);
diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp

index 0616ba1..7e63780 100644 (file)
--- a/src/IceTargetLoweringX8664.cpp
+++ b/src/IceTargetLoweringX8664.cpp
@@ -412,13 +412,13 @@ void TargetX8664::lowerRet(const InstRet *Inst) {
    Variable *Reg = nullptr;
    if (Inst->hasRetValue()) {
      Operand *Src0 = legalize(Inst->getRetValue());
-    if (isVectorType(Src0->getType()) ||
-        isScalarFloatingType(Src0->getType())) {
+    const Type Src0Ty = Src0->getType();
+    if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) {
        Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
      } else {
-      assert(isScalarIntegerType(Src0->getType()));
-      _mov(Reg, Src0, Traits::getGprForType(Src0->getType(),
-                                            Traits::RegisterSet::Reg_rax));
+      assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64);
+      _mov(Reg, Src0,
+           Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax));
      }
    }
    // Add a ret instruction even if sandboxing is enabled, because addEpilog
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h

index f8a0578..f52838f 100644 (file)
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -565,8 +565,10 @@ protected:
    void _pblendvb(Variable *Dest, Operand *Src0, Operand *Src1) {
      Context.insert<typename Traits::Insts::Pblendvb>(Dest, Src0, Src1);
    }
-  void _pcmpeq(Variable *Dest, Operand *Src0) {
-    Context.insert<typename Traits::Insts::Pcmpeq>(Dest, Src0);
+  void _pcmpeq(Variable *Dest, Operand *Src0,
+               Type ArithmeticTypeOverride = IceType_void) {
+    Context.insert<typename Traits::Insts::Pcmpeq>(Dest, Src0,
+                                                   ArithmeticTypeOverride);
    }
    void _pcmpgt(Variable *Dest, Operand *Src0) {
      Context.insert<typename Traits::Insts::Pcmpgt>(Dest, Src0);
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h

index 2b94df0..0d4e6bb 100644 (file)
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1135,8 +1135,8 @@ bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
        return false;
      }
    }
-  // Lea optimization only works for i16 and i32 types, not i8.
-  if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
+  // Lea optimization only works for i32 type, not i1/i8/i16/i64.
+  if (Ty != IceType_i32 && (Count3 || Count5 || Count9))
      return false;
    // Limit the number of lea/shl operations for a single multiply, to a
    // somewhat arbitrary choice of 3.
@@ -5590,6 +5590,7 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
      Variable *Dest = Cast->getDest();
      const Type DestTy = Dest->getType();
      const char *HelperName = nullptr;
+    Variable *CallDest = Dest;
      switch (CastKind) {
      default:
        return;
@@ -5655,10 +5656,12 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
        case IceType_i8:
          assert(Src0->getType() == IceType_v8i1);
          HelperName = H_bitcast_8xi1_i8;
+        CallDest = Func->makeVariable(IceType_i32);
          break;
        case IceType_i16:
          assert(Src0->getType() == IceType_v16i1);
          HelperName = H_bitcast_16xi1_i16;
+        CallDest = Func->makeVariable(IceType_i32);
          break;
        case IceType_v8i1: {
          assert(Src0->getType() == IceType_i8);
@@ -5680,10 +5683,14 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
      } break;
      }
      constexpr SizeT MaxSrcs = 1;
-    InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
+    InstCall *Call = makeHelperCall(HelperName, CallDest, MaxSrcs);
      Call->addArg(Src0);
      StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
      Context.insert(Call);
+    // The PNaCl ABI disallows i8/i16 return types, so truncate the helper call
+    // result to the appropriate type as necessary.
+    if (CallDest->getType() != Dest->getType())
+      Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
      Cast->setDeleted();
    } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
      std::vector<Type> ArgTypes;
@@ -5841,7 +5848,14 @@ Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
    Variable *MinusOnes = makeReg(Ty, RegNum);
    // Insert a FakeDef so the live range of MinusOnes is not overestimated.
    Context.insert<InstFakeDef>(MinusOnes);
-  _pcmpeq(MinusOnes, MinusOnes);
+  if (Ty == IceType_f64)
+    // Making a vector of minus ones of type f64 is currently only used for the
+    // fabs intrinsic.  To use the f64 type to create this mask with pcmpeqq
+    // requires SSE 4.1.  Since we're just creating a mask, pcmpeqd does the
+    // same job and only requires SSE2.
+    _pcmpeq(MinusOnes, MinusOnes, IceType_f32);
+  else
+    _pcmpeq(MinusOnes, MinusOnes);
    return MinusOnes;
  }
  
diff --git a/tests_lit/assembler/x86/jump_encodings.ll b/tests_lit/assembler/x86/jump_encodings.ll

index e2f0e3b..e7d444e 100644 (file)
--- a/tests_lit/assembler/x86/jump_encodings.ll
+++ b/tests_lit/assembler/x86/jump_encodings.ll
@@ -140,15 +140,16 @@ next3:
    call void @llvm.nacl.atomic.store.i32(i32 %val, i32* %ptr, i32 6)
    br label %next1
  }
-; Forward branches for non-local labels currently use the fully relaxed
-; form to avoid needing a relaxation pass.
+; Note: forward branches for non-local labels in Subzero currently use the fully
+; relaxed form (4-byte offset) to avoid needing a relaxation pass.  When we use
+; llvm-mc, it performs the relaxation pass and uses a 1-byte offset.
  ; CHECK-LABEL: test_near_forward
-; CHECK:       8: {{.*}}            cmp
-; CHECK-NEXT:  b: 0f 82 05 00 00 00 jb 16
-; CHECK-NEXT: 11: {{.*}}            mov DWORD PTR
-; CHECK-NEXT: 13: {{.*}}            mfence
-; CHECK-NEXT: 16: {{.*}}            mov DWORD PTR
-; CHECK:      1b: eb eb             jmp 8
+; CHECK:      [[BACKLABEL:[0-9a-f]+]]: {{.*}} cmp
+; CHECK-NEXT: {{.*}} jb [[FORWARDLABEL:[0-9a-f]+]]
+; CHECK-NEXT: {{.*}} mov DWORD PTR
+; CHECK-NEXT: {{.*}} mfence
+; CHECK-NEXT: [[FORWARDLABEL]]: {{.*}} mov DWORD PTR
+; CHECK:      {{.*}} jmp [[BACKLABEL]]
  
  
  ; Unlike forward branches to cfg nodes, "local" forward branches
diff --git a/tests_lit/assembler/x86/opcode_register_encodings.ll b/tests_lit/assembler/x86/opcode_register_encodings.ll

index d926e8f..5aefe4a 100644 (file)
--- a/tests_lit/assembler/x86/opcode_register_encodings.ll
+++ b/tests_lit/assembler/x86/opcode_register_encodings.ll
@@ -2,8 +2,8 @@
  ; those for pmull vary more wildly depending on operand size (rather than
  ; follow a usual pattern).
  
-; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 -mattr=sse4.1 \
-; RUN:  -sandbox | FileCheck %s
+; RUN: %p2i --filetype=obj --disassemble --sandbox -i %s --args -O2 \
+; RUN:  -mattr=sse4.1 | FileCheck %s
  
  define internal <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
  entry:
@@ -161,7 +161,7 @@ entry:
    ret i32 %v
  }
  ; CHECK-LABEL: test_nacl_read_tp_more_addressing
-; CHECK: 65 8b 05 00 00 00 00  mov eax,DWORD PTR gs:0x0
+; CHECK: mov eax,{{(DWORD PTR )?}}gs:0x0
  ; CHECK: 8b 04 00              mov eax,DWORD PTR [eax+eax*1]
  ; CHECK: 65 8b 0d 00 00 00 00  mov ecx,DWORD PTR gs:0x0
  ; CHECK: 89 51 80              mov DWORD PTR [ecx-0x80],edx
diff --git a/tests_lit/lit.cfg b/tests_lit/lit.cfg

index 3090e60..b49ee84 100644 (file)
--- a/tests_lit/lit.cfg
+++ b/tests_lit/lit.cfg
@@ -89,6 +89,8 @@ iflc2i_atts_cmd = if_atts + [if_cond_flag('allow_llvm_ir_as_input'
  pnacl_sz_cmd = [os.path.join(pydir, 'run-pnacl-sz.py'),
                  '--pnacl-sz', pnacl_sz_tool,
                  '--pnacl-bin-path', pnaclbinpath]
+if 'FORCEASM' in lit_config.params:
+  pnacl_sz_cmd += ['--forceasm']
  
  # Run commands only if corresponding build attributes apply, including
  # for each compiler setup.
diff --git a/tests_lit/llvm2ice_tests/8bit.pnacl.ll b/tests_lit/llvm2ice_tests/8bit.pnacl.ll

index 8f8f515..6d5781b 100644 (file)
--- a/tests_lit/llvm2ice_tests/8bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/8bit.pnacl.ll
@@ -353,7 +353,7 @@ entry:
    ret i32 %ret_ext
  }
  ; CHECK-LABEL: load_i8_global
-; CHECK: mov {{[a-d]l}},BYTE PTR
+; CHECK: mov {{[a-d]l}},{{(BYTE PTR)?}}
  
  define void @store_i8(i32 %addr_arg, i32 %val) {
  entry:
diff --git a/tests_lit/llvm2ice_tests/address-mode-global.ll b/tests_lit/llvm2ice_tests/address-mode-global.ll

index a8f6d4d..a592648 100644 (file)
--- a/tests_lit/llvm2ice_tests/address-mode-global.ll
+++ b/tests_lit/llvm2ice_tests/address-mode-global.ll
@@ -17,8 +17,8 @@ entry:
    %result = add i32 %addr_lo.load, %addr_hi.load
    ret i32 %result
  ; CHECK-LABEL: load_global_direct
-; CHECK-NEXT: mov eax,DWORD PTR ds:0x0{{.*}}bytes
-; CHECK-NEXT: add eax,DWORD PTR ds:0x4{{.*}}bytes
+; CHECK-NEXT: mov eax,{{(DWORD PTR )?}}ds:0x0{{.*}}{{bytes|.bss}}
+; CHECK-NEXT: add eax,DWORD PTR ds:0x4{{.*}}{{bytes|.bss}}
  }
  
  define internal i32 @load_global_indexed(i32 %arg) {
diff --git a/tests_lit/llvm2ice_tests/bitcast.ll b/tests_lit/llvm2ice_tests/bitcast.ll

index 0519cfb..e61a165 100644 (file)
--- a/tests_lit/llvm2ice_tests/bitcast.ll
+++ b/tests_lit/llvm2ice_tests/bitcast.ll
@@ -49,8 +49,8 @@ entry:
    ret i64 %v0
  }
  ; CHECK-LABEL: cast_d2ll_const
-; CHECK: mov e{{..}},DWORD PTR ds:0x0 {{.*}} .L$double$0012345678901234
-; CHECK: mov e{{..}},DWORD PTR ds:0x4 {{.*}} .L$double$0012345678901234
+; CHECK: mov e{{..}},{{(DWORD PTR )?}}ds:0x0 {{.*}} .L$double$0012345678901234
+; CHECK: mov e{{..}},{{(DWORD PTR )?}}ds:0x4 {{.*}} .L$double$0012345678901234
  ; ARM32-LABEL: cast_d2ll_const
  ; ARM32-DAG: movw [[ADDR:r[0-9]+]], #:lower16:.L$
  ; ARM32-DAG: movt [[ADDR]], #:upper16:.L$
diff --git a/tests_lit/llvm2ice_tests/contract.ll b/tests_lit/llvm2ice_tests/contract.ll

index 05afb46..499ab69 100644 (file)
--- a/tests_lit/llvm2ice_tests/contract.ll
+++ b/tests_lit/llvm2ice_tests/contract.ll
@@ -15,4 +15,3 @@ label2:
  
  ; CHECK-LABEL: SimpleBranch
  ; CHECK-NEXT: jmp 0 <SimpleBranch>
-; CHECK-NEXT: hlt
diff --git a/tests_lit/llvm2ice_tests/convert.ll b/tests_lit/llvm2ice_tests/convert.ll

index 5c8e5d2..da01218 100644 (file)
--- a/tests_lit/llvm2ice_tests/convert.ll
+++ b/tests_lit/llvm2ice_tests/convert.ll
@@ -48,15 +48,15 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_int8
-; CHECK: mov {{.*}},BYTE PTR
-; CHECK: movsx e{{.*}},{{[a-d]l|BYTE PTR}}
-; CHECK: mov WORD PTR
+; CHECK: mov {{.*}},{{(BYTE PTR)?}}
+; CHECK: movsx {{.*}},{{[a-d]l|BYTE PTR}}
+; CHECK: mov {{(WORD PTR)?}}
  ; CHECK: movsx
-; CHECK: mov DWORD PTR
+; CHECK: mov {{(DWORD PTR)?}}
  ; CHECK: movsx
  ; CHECK: sar {{.*}},0x1f
-; CHECK-DAG: ds:0x0,{{.*}}i64v
-; CHECK-DAG: ds:0x4,{{.*}}i64v
+; CHECK-DAG: ds:0x{{.}},{{.*}}{{i64v|.bss}}
+; CHECK-DAG: ds:0x{{.}},{{.*}}{{i64v|.bss}}
  
  ; ARM32-LABEL: from_int8
  ; ARM32: movw {{.*}}i8v
@@ -89,13 +89,13 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_int16
-; CHECK: mov {{.*}},WORD PTR
-; CHECK: 0x0 {{.*}}i16v
+; CHECK: mov {{.*}},{{(WORD PTR)?}}
+; CHECK: 0x{{.}} {{.*}}{{i16v|.bss}}
  ; CHECK: movsx e{{.*}},{{.*x|[ds]i|bp|WORD PTR}}
-; CHECK: 0x0,{{.*}}i32v
+; CHECK: 0x{{.}},{{.*}}{{i32v|.bss}}
  ; CHECK: movsx e{{.*}},{{.*x|[ds]i|bp|WORD PTR}}
  ; CHECK: sar {{.*}},0x1f
-; CHECK: 0x0,{{.*}}i64v
+; CHECK: 0x{{.}},{{.*}}{{i64v|.bss}}
  
  ; ARM32-LABEL: from_int16
  ; ARM32: movw {{.*}}i16v
@@ -126,11 +126,11 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_int32
-; CHECK: 0x0 {{.*}} i32v
-; CHECK: 0x0,{{.*}} i8v
-; CHECK: 0x0,{{.*}} i16v
+; CHECK: 0x{{.}} {{.*}} {{i32v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i8v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i16v|.bss}}
  ; CHECK: sar {{.*}},0x1f
-; CHECK: 0x0,{{.*}} i64v
+; CHECK: 0x{{.}},{{.*}} {{i64v|.bss}}
  
  ; ARM32-LABEL: from_int32
  ; ARM32: movw {{.*}}i32v
@@ -159,10 +159,10 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_int64
-; CHECK: 0x0 {{.*}} i64v
-; CHECK: 0x0,{{.*}} i8v
-; CHECK: 0x0,{{.*}} i16v
-; CHECK: 0x0,{{.*}} i32v
+; CHECK: 0x{{.}} {{.*}} {{i64v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i8v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i16v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i32v|.bss}}
  
  ; ARM32-LABEL: from_int64
  ; ARM32: movw {{.*}}i64v
@@ -190,14 +190,14 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_uint8
-; CHECK: 0x0 {{.*}} u8v
-; CHECK: movzx e{{.*}},{{[a-d]l|BYTE PTR}}
-; CHECK: 0x0,{{.*}} i16v
+; CHECK: 0x{{.*}} {{.*}} {{u8v|.bss}}
+; CHECK: movzx {{.*}},{{[a-d]l|BYTE PTR}}
+; CHECK: 0x{{.}},{{.*}} {{i16v|.bss}}
  ; CHECK: movzx
-; CHECK: 0x0,{{.*}} i32v
+; CHECK: 0x{{.}},{{.*}} {{i32v|.bss}}
  ; CHECK: movzx
  ; CHECK: mov {{.*}},0x0
-; CHECK: 0x0,{{.*}} i64v
+; CHECK: 0x{{.}},{{.*}} {{i64v|.bss}}
  
  ; ARM32-LABEL: from_uint8
  ; ARM32: movw {{.*}}u8v
@@ -229,13 +229,13 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_uint16
-; CHECK: 0x0 {{.*}} u16v
-; CHECK: 0x0,{{.*}} i8v
+; CHECK: 0x{{.*}} {{.*}} {{u16v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i8v|.bss}}
  ; CHECK: movzx e{{.*}},{{.*x|[ds]i|bp|WORD PTR}}
-; CHECK: 0x0,{{.*}} i32v
+; CHECK: 0x{{.}},{{.*}} {{i32v|.bss}}
  ; CHECK: movzx e{{.*}},{{.*x|[ds]i|bp|WORD PTR}}
  ; CHECK: mov {{.*}},0x0
-; CHECK: 0x0,{{.*}} i64v
+; CHECK: 0x{{.}},{{.*}} {{i64v|.bss}}
  
  ; ARM32-LABEL: from_uint16
  ; ARM32: movw {{.*}}u16v
@@ -266,11 +266,11 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_uint32
-; CHECK: 0x0 {{.*}} u32v
-; CHECK: 0x0,{{.*}} i8v
-; CHECK: 0x0,{{.*}} i16v
+; CHECK: 0x{{.*}} {{.*}} {{u32v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i8v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i16v|.bss}}
  ; CHECK: mov {{.*}},0x0
-; CHECK: 0x0,{{.*}} i64v
+; CHECK: 0x{{.}},{{.*}} {{i64v|.bss}}
  
  ; ARM32-LABEL: from_uint32
  ; ARM32: movw {{.*}}u32v
@@ -299,10 +299,10 @@ entry:
    ret void
  }
  ; CHECK-LABEL: from_uint64
-; CHECK: 0x0 {{.*}} u64v
-; CHECK: 0x0,{{.*}} i8v
-; CHECK: 0x0,{{.*}} i16v
-; CHECK: 0x0,{{.*}} i32v
+; CHECK: 0x{{.*}} {{.*}} {{u64v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i8v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i16v|.bss}}
+; CHECK: 0x{{.}},{{.*}} {{i32v|.bss}}
  
  ; ARM32-LABEL: from_uint64
  ; ARM32: movw {{.*}}u64v
diff --git a/tests_lit/llvm2ice_tests/elf_container.ll b/tests_lit/llvm2ice_tests/elf_container.ll

index 734b5fc..9464b2e 100644 (file)
--- a/tests_lit/llvm2ice_tests/elf_container.ll
+++ b/tests_lit/llvm2ice_tests/elf_container.ll
@@ -3,7 +3,7 @@
  
  ; For the integrated ELF writer, we can't pipe the output because we need
  ; to seek backward and patch up the file headers. So, use a temporary file.
-; RUN: %p2i -i %s --filetype=obj --args -O2 --verbose none -o %t \
+; RUN: %p2i -i %s --filetype=obj --output %t --args -O2 --verbose none \
  ; RUN:   -allow-externally-defined-symbols \
  ; RUN:   && llvm-readobj -file-headers -sections -section-data \
  ; RUN:       -relocations -symbols %t | FileCheck %s
@@ -17,7 +17,7 @@
  ; RUN:   | %if --need=allow_dump --command FileCheck %s
  
  ; Add a run that shows relocations in code inline.
-; RUN: %p2i -i %s --filetype=obj --args -O2 --verbose none -o %t \
+; RUN: %p2i -i %s --filetype=obj --output %t --args -O2 --verbose none \
  ; RUN:   -allow-externally-defined-symbols \
  ; RUN:   && le32-nacl-objdump -w -d -r -Mintel %t \
  ; RUN:   | FileCheck --check-prefix=TEXT-RELOCS %s
@@ -82,7 +82,7 @@ entry:
    ret void
  }
  ; TEXT-RELOCS-LABEL: test_memcpy
-; TEXT-RELOCS: mov {{.*}} R_386_32 bytes
+; TEXT-RELOCS: mov {{.*}} R_386_32 {{bytes|.data}}
  
  define internal void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) {
  entry:
@@ -113,14 +113,14 @@ define internal i32 @test_ret_fp() {
    ret i32 %r
  }
  ; TEXT-RELOCS-LABEL: test_ret_fp
-; TEXT-RELOCS-NEXT: mov {{.*}} R_386_32 returnFloatConst
+; TEXT-RELOCS-NEXT: mov {{.*}} R_386_32 {{returnFloatConst|.text}}
  
  define internal i32 @test_ret_global_pointer() {
    %r = ptrtoint [7 x i8]* @bytes to i32
    ret i32 %r
  }
  ; TEXT-RELOCS-LABEL: test_ret_global_pointer
-; TEXT-RELOCS-NEXT: mov {{.*}} R_386_32 bytes
+; TEXT-RELOCS-NEXT: mov {{.*}} R_386_32 {{bytes|.data}}
  
  ; Test defining a non-internal function.
  define void @_start(i32) {
diff --git a/tests_lit/llvm2ice_tests/elf_function_sections.ll b/tests_lit/llvm2ice_tests/elf_function_sections.ll

index 4f38e33..804ba1d 100644 (file)
--- a/tests_lit/llvm2ice_tests/elf_function_sections.ll
+++ b/tests_lit/llvm2ice_tests/elf_function_sections.ll
@@ -1,6 +1,6 @@
  ; Tests filetype=obj with -ffunction-sections.
  
-; RUN: %p2i -i %s --filetype=obj --args -O2 -o %t -ffunction-sections && \
+; RUN: %p2i -i %s --filetype=obj --output %t --args -O2 -ffunction-sections && \
  ; RUN:   llvm-readobj -file-headers -sections -section-data \
  ; RUN:     -relocations -symbols %t | FileCheck %s
  
diff --git a/tests_lit/llvm2ice_tests/elf_nodata.ll b/tests_lit/llvm2ice_tests/elf_nodata.ll

index 2ea1edd..f286044 100644 (file)
--- a/tests_lit/llvm2ice_tests/elf_nodata.ll
+++ b/tests_lit/llvm2ice_tests/elf_nodata.ll
@@ -1,7 +1,7 @@
  ; Tests that we generate an ELF container correctly when there
  ; is no data section.
  
-; RUN: %p2i -i %s --filetype=obj --args -O2 -o %t \
+; RUN: %p2i -i %s --filetype=obj --output %t --args -O2 \
  ; RUN:   && llvm-readobj -file-headers -sections -section-data \
  ; RUN:       -relocations -symbols %t | FileCheck %s
  
diff --git a/tests_lit/llvm2ice_tests/loop-nest-depth.ll b/tests_lit/llvm2ice_tests/loop-nest-depth.ll

index a899e50..63c9670 100644 (file)
--- a/tests_lit/llvm2ice_tests/loop-nest-depth.ll
+++ b/tests_lit/llvm2ice_tests/loop-nest-depth.ll
@@ -2,9 +2,9 @@
  
  ; REQUIRES: allow_dump
  
-; Single threaded so that the dumps used for checking happen in order
+; Single threaded so that the dumps used for checking happen in order.
  ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 --verbose=loop \
-; RUN: --threads=0 | FileCheck %s
+; RUN:     -log=%t --threads=0 && FileCheck %s < %t
  
  define internal void @test_single_loop(i32 %a32) {
  entry:
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

index 76a7e19..cf37696 100644 (file)
--- a/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll
@@ -43,14 +43,14 @@ entry:
  ; CHECK: mov DWORD PTR {{.*}},0x3e7
  ;    atomic store (w/ its own mfence)
  ; The load + sub are optimized into one everywhere.
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
-; CHECK: mov DWORD PTR
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  ; CHECK: mfence
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
-; CHECK: mov DWORD PTR
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
  ; CHECK: mfence
-; CHECK: mov DWORD PTR
+; CHECK: mov {{(DWORD PTR)?}}
  
  ; Test with the fence moved up a bit.
  define internal i32 @test_fused_load_sub_b() {
@@ -81,16 +81,16 @@ entry:
  ;    alloca store
  ; CHECK: mov DWORD PTR {{.*}},0x3e7
  ;    atomic store (w/ its own mfence)
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
-; CHECK: mov DWORD PTR
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  ; CHECK: mfence
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
-; CHECK: mov DWORD PTR
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  ; CHECK: mfence
  ; Load + sub can still be optimized into one instruction
  ; because it is not separated by a fence.
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
-; CHECK: mov DWORD PTR
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  
  ; Test with the fence splitting a load/sub.
  define internal i32 @test_fused_load_sub_c() {
@@ -121,19 +121,19 @@ entry:
  ;    alloca store
  ; CHECK: mov DWORD PTR {{.*}},0x3e7
  ;    atomic store (w/ its own mfence)
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
-; CHECK: mov DWORD PTR
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  ; CHECK: mfence
  ; This load + sub are no longer optimized into one,
  ; though perhaps it should be legal as long as
  ; the load stays on the same side of the fence.
-; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b
+; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_b)|(.bss)}}
  ; CHECK: mfence
  ; CHECK: mov {{.*}},0x1
  ; CHECK: sub
-; CHECK: mov DWORD PTR
-; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
-; CHECK: mov DWORD PTR
+; CHECK: mov {{(DWORD PTR)?}}
+; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
+; CHECK: mov {{(DWORD PTR)?}}
  
  
  ; Test where a bunch of i8 loads could have been fused into one
@@ -171,7 +171,7 @@ entry:
    ret i32 %b1234
  }
  ; CHECK-LABEL: could_have_fused_loads
-; CHECK: mov {{.*}},BYTE PTR
+; CHECK: mov {{.*}},{{(BYTE PTR)?}}
  ; CHECK: mov {{.*}},BYTE PTR
  ; CHECK: mov {{.*}},BYTE PTR
  ; CHECK: mfence
@@ -195,8 +195,8 @@ branch2:
  }
  ; CHECK-LABEL: could_have_hoisted_loads
  ; CHECK: jne {{.*}}
-; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d
+; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}}
  ; CHECK: ret
  ; CHECK: mfence
-; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d
+; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}}
  ; CHECK: ret
diff --git a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll

index 45ee8b3..12d4267 100644 (file)
--- a/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll
@@ -46,10 +46,10 @@ declare void @llvm.nacl.atomic.fence(i32)
  declare void @llvm.nacl.atomic.fence.all()
  declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
  
-@Global8 = internal global [1 x i8] zeroinitializer, align 1
-@Global16 = internal global [2 x i8] zeroinitializer, align 2
-@Global32 = internal global [4 x i8] zeroinitializer, align 4
-@Global64 = internal global [8 x i8] zeroinitializer, align 8
+@SzGlobal8 = internal global [1 x i8] zeroinitializer, align 1
+@SzGlobal16 = internal global [2 x i8] zeroinitializer, align 2
+@SzGlobal32 = internal global [4 x i8] zeroinitializer, align 4
+@SzGlobal64 = internal global [8 x i8] zeroinitializer, align 8
  
  ; NOTE: The LLC equivalent for 16-bit atomic operations are expanded
  ; as 32-bit operations. For Subzero, assume that real 16-bit operations
@@ -350,7 +350,7 @@ entry:
  ; Same test as above, but with a global address to test FakeUse issues.
  define internal i64 @test_atomic_rmw_add_64_global(i64 %v) {
  entry:
-  %ptr = bitcast [8 x i8]* @Global64 to i64*
+  %ptr = bitcast [8 x i8]* @SzGlobal64 to i64*
    %a = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %v, i32 6)
    ret i64 %a
  }
@@ -595,15 +595,15 @@ entry:
  define internal i32 @test_atomic_rmw_or_8_global(i32 %v) {
  entry:
    %trunc = trunc i32 %v to i8
-  %ptr = bitcast [1 x i8]* @Global8 to i8*
+  %ptr = bitcast [1 x i8]* @SzGlobal8 to i8*
    %a = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %trunc, i32 6)
    %a_ext = zext i8 %a to i32
    ret i32 %a_ext
  }
  ; CHECK-LABEL: test_atomic_rmw_or_8_global
  ; ARM32-LABEL: test_atomic_rmw_or_8_global
-; ARM32: movw [[PTR:r[0-9]+]], #:lower16:Global8
-; ARM32: movt [[PTR]], #:upper16:Global8
+; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal8
+; ARM32: movt [[PTR]], #:upper16:SzGlobal8
  ; ARM32: dmb
  ; ARM32: ldrexb r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
  ; ARM32: orr
@@ -636,15 +636,15 @@ entry:
  define internal i32 @test_atomic_rmw_or_16_global(i32 %v) {
  entry:
    %trunc = trunc i32 %v to i16
-  %ptr = bitcast [2 x i8]* @Global16 to i16*
+  %ptr = bitcast [2 x i8]* @SzGlobal16 to i16*
    %a = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %trunc, i32 6)
    %a_ext = zext i16 %a to i32
    ret i32 %a_ext
  }
  ; CHECK-LABEL: test_atomic_rmw_or_16_global
  ; ARM32-LABEL: test_atomic_rmw_or_16_global
-; ARM32: movw [[PTR:r[0-9]+]], #:lower16:Global16
-; ARM32: movt [[PTR]], #:upper16:Global16
+; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal16
+; ARM32: movt [[PTR]], #:upper16:SzGlobal16
  ; ARM32: dmb
  ; ARM32: ldrexh r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
  ; ARM32: orr
@@ -674,14 +674,14 @@ entry:
  ; Same test as above, but with a global address to test FakeUse issues.
  define internal i32 @test_atomic_rmw_or_32_global(i32 %v) {
  entry:
-  %ptr = bitcast [4 x i8]* @Global32 to i32*
+  %ptr = bitcast [4 x i8]* @SzGlobal32 to i32*
    %a = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %v, i32 6)
    ret i32 %a
  }
  ; CHECK-LABEL: test_atomic_rmw_or_32_global
  ; ARM32-LABEL: test_atomic_rmw_or_32_global
-; ARM32: movw [[PTR:r[0-9]+]], #:lower16:Global32
-; ARM32: movt [[PTR]], #:upper16:Global32
+; ARM32: movw [[PTR:r[0-9]+]], #:lower16:SzGlobal32
+; ARM32: movt [[PTR]], #:upper16:SzGlobal32
  ; ARM32: dmb
  ; ARM32: ldrex r{{[0-9]+}}, {{[[]}}[[PTR]]{{[]]}}
  ; ARM32: orr
@@ -1393,7 +1393,7 @@ entry:
    br label %body
  body:
    %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
-  %g = bitcast [4 x i8]* @Global32 to i32*
+  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
    %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %g, i32 %i, i32 6)
    %i_plus_1 = add i32 %i, 1
    %cmp = icmp eq i32 %i_plus_1, 1001
@@ -1414,7 +1414,7 @@ entry:
    br label %body
  body:
    %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
-  %g = bitcast [4 x i8]* @Global32 to i32*
+  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
    %unused = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %g, i32 %i, i32 6)
    %i_plus_1 = add i32 %i, 1
    %cmp = icmp eq i32 %i_plus_1, 1001
@@ -1435,7 +1435,7 @@ entry:
    br label %body
  body:
    %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
-  %g = bitcast [4 x i8]* @Global32 to i32*
+  %g = bitcast [4 x i8]* @SzGlobal32 to i32*
    %unused = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %g, i32 %i, i32 %i, i32 6, i32 6)
    %i_plus_1 = add i32 %i, 1
    %cmp = icmp eq i32 %i_plus_1, 1001
@@ -1456,7 +1456,7 @@ entry:
    br label %body
  body:
    %i = phi i32 [ 1, %entry ], [ %i_plus_1, %body ]
-  %g = bitcast [8 x i8]* @Global64 to i64*
+  %g = bitcast [8 x i8]* @SzGlobal64 to i64*
    %i_64 = zext i32 %i to i64
    %unused = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %g, i64 %i_64, i64 %i_64, i32 6, i32 6)
    %i_plus_1 = add i32 %i, 1
diff --git a/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll

index 95f5807..7fb65f1 100644 (file)
--- a/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll
@@ -1,13 +1,13 @@
  ; This tests the NaCl intrinsics memset, memcpy and memmove.
  
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -O2 -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -O2 \
  ; RUN:   | %if --need=target_X8632 --command FileCheck %s
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -Om1 --fmem-intrin-opt -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -Om1 --fmem-intrin-opt \
  ; RUN:   | %if --need=target_X8632 --command FileCheck %s
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -Om1 -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -Om1 \
  ; RUN:   | %if --need=target_X8632 --command FileCheck --check-prefix OM1 %s
  
  ; RUN: %if --need=target_ARM32 --need=allow_dump \
diff --git a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll

index 433bd0a..371db88 100644 (file)
--- a/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
+++ b/tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll
@@ -1,11 +1,11 @@
  ; This tests the NaCl intrinsics not related to atomic operations.
  
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -O2 -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -O2 \
  ; RUN:   -allow-externally-defined-symbols \
  ; RUN:   | %if --need=target_X8632 --command FileCheck %s
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -Om1 -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -Om1 \
  ; RUN:   -allow-externally-defined-symbols \
  ; RUN:   | %if --need=target_X8632 --command FileCheck %s
  
@@ -13,7 +13,7 @@
  ; share the same "CHECK" prefix). This separate run helps check that
  ; some code is optimized out.
  ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
-; RUN:   --target x8632 -i %s --args -O2 -sandbox \
+; RUN:   --target x8632 --sandbox -i %s --args -O2 \
  ; RUN:   -allow-externally-defined-symbols \
  ; RUN:   | %if --need=target_X8632 \
  ; RUN:   --command FileCheck --check-prefix=CHECKO2REM %s
@@ -62,9 +62,9 @@ entry:
    ret i32 %__1
  }
  ; CHECK-LABEL: test_nacl_read_tp
-; CHECK: mov e{{.*}},DWORD PTR gs:0x0
+; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
  ; CHECKO2REM-LABEL: test_nacl_read_tp
-; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0
+; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
  ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp
  ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
  
@@ -86,11 +86,11 @@ entry:
    ret i32 %v
  }
  ; CHECK-LABEL: test_nacl_read_tp_more_addressing
-; CHECK: mov e{{.*}},DWORD PTR gs:0x0
-; CHECK: mov e{{.*}},DWORD PTR gs:0x0
+; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
+; CHECK: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
  ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing
-; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0
-; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0
+; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
+; CHECKO2REM: mov e{{.*}},{{(DWORD PTR )?}}gs:0x0
  ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing
  ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
  ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp
diff --git a/tests_lit/llvm2ice_tests/nonsfi.ll b/tests_lit/llvm2ice_tests/nonsfi.ll

index bf40f16..5b80c42 100644 (file)
--- a/tests_lit/llvm2ice_tests/nonsfi.ll
+++ b/tests_lit/llvm2ice_tests/nonsfi.ll
@@ -1,6 +1,8 @@
  ; RUN: %p2i -i %s --filetype=obj --assemble --disassemble --args -O2 -nonsfi=1 \
+; RUN:   --ffunction-sections \
  ; RUN:   | FileCheck --check-prefix=NONSFI %s
  ; RUN: %p2i -i %s --filetype=obj --assemble --disassemble --args -O2 -nonsfi=0 \
+; RUN:   --ffunction-sections \
  ; RUN:   | FileCheck --check-prefix=DEFAULT %s
  
  @G1 = internal global [4 x i8] zeroinitializer, align 4
@@ -8,12 +10,14 @@
  
  define internal void @testCallRegular() {
  entry:
-  call void @testCallRegular()
+  ; Make a call to a *different* function, plus use -ffunction-sections, to
+  ; force an appropriately-named relocation.
+  call i32 @testLoadBasic()
    ret void
  }
  ; Expect a simple direct call to testCallRegular.
  ; NONSFI-LABEL: testCallRegular
-; NONSFI: call {{.*}} R_386_PC32 testCallRegular
+; NONSFI: call {{.*}} R_386_PC32 {{.*}}testLoadBasic
  ; DEFAULT-LABEL: testCallRegular
  
  define internal double @testCallBuiltin(double %val) {
@@ -34,7 +38,7 @@ entry:
  }
  ; Expect a load with a R_386_GOTOFF relocation.
  ; NONSFI-LABEL: testLoadBasic
-; NONSFI: mov {{.*}} R_386_GOTOFF G1
+; NONSFI: mov {{.*}} R_386_GOTOFF {{G1|.bss}}
  ; DEFAULT-LABEL: testLoadBasic
  
  define internal i32 @testLoadFixedOffset() {
@@ -47,7 +51,7 @@ entry:
  }
  ; Expect a load with a R_386_GOTOFF relocation plus an immediate offset.
  ; NONSFI-LABEL: testLoadFixedOffset
-; NONSFI: mov {{.*}}+0x4] {{.*}} R_386_GOTOFF G1
+; NONSFI: mov {{.*}}+0x4] {{.*}} R_386_GOTOFF {{G1|.bss}}
  ; DEFAULT-LABEL: testLoadFixedOffset
  
  define internal i32 @testLoadIndexed(i32 %idx) {
@@ -63,7 +67,7 @@ entry:
  ; Expect a load with a R_386_GOTOFF relocation plus an immediate offset, plus a
  ; scaled index register.
  ; NONSFI-LABEL: testLoadIndexed
-; NONSFI: mov {{.*}}*4+0xc] {{.*}} R_386_GOTOFF G1
+; NONSFI: mov {{.*}}*4+0xc] {{.*}} R_386_GOTOFF {{G1|.bss}}
  ; DEFAULT-LABEL: testLoadIndexed
  
  define internal i32 @testLoadIndexedBase(i32 %base, i32 %idx) {
@@ -80,11 +84,11 @@ entry:
  ; Expect a load with a R_386_GOTOFF relocation plus an immediate offset, but
  ; without the scaled index.
  ; NONSFI-LABEL: testLoadIndexedBase
-; NONSFI: mov {{.*}}*1+0xc] {{.*}} R_386_GOTOFF G1
+; NONSFI: mov {{.*}}*1+0xc] {{.*}} R_386_GOTOFF {{G1|.bss}}
  ; By contrast, without -nonsfi, expect a load with a *R_386_32* relocation plus
  ; an immediate offset, and *with* the scaled index.
  ; DEFAULT-LABEL: testLoadIndexedBase
-; DEFAULT: mov {{.*}},DWORD PTR [{{.*}}+{{.*}}*4+0xc] {{.*}} R_386_32 G1
+; DEFAULT: mov {{.*}},DWORD PTR [{{.*}}+{{.*}}*4+0xc] {{.*}} R_386_32 {{G1|.bss}}
  
  define internal i32 @testLoadOpt() {
  entry:
@@ -97,8 +101,8 @@ entry:
  }
  ; Expect a load-folding optimization with a R_386_GOTOFF relocation.
  ; NONSFI-LABEL: testLoadOpt
-; NONSFI: mov [[REG:e..]],{{.*}}+0x0] {{.*}} R_386_GOTOFF G1
-; NONSFI-NEXT: add [[REG]],{{.*}}+0x0] {{.*}} R_386_GOTOFF G2
+; NONSFI: mov [[REG:e..]],{{.*}}+0x0] {{.*}} R_386_GOTOFF {{G1|.bss}}
+; NONSFI-NEXT: add [[REG]],{{.*}}+0x{{0|4}}] {{.*}} R_386_GOTOFF {{G2|.bss}}
  ; DEFAULT-LABEL: testLoadOpt
  
  define internal void @testRMW() {
@@ -111,5 +115,5 @@ entry:
  }
  ; Expect an RMW optimization with a R_386_GOTOFF relocation.
  ; NONSFI-LABEL: testRMW
-; NONSFI: add DWORD PTR {{.*}}+0x0],0x4d2 {{.*}} R_386_GOTOFF G1
+; NONSFI: add DWORD PTR {{.*}}+0x0],0x4d2 {{.*}} R_386_GOTOFF {{G1|.bss}}
  ; DEFAULT-LABEL: testRMW
diff --git a/tests_lit/llvm2ice_tests/nop-insertion.ll b/tests_lit/llvm2ice_tests/nop-insertion.ll

index c68dd0b..bfefdcc 100644 (file)
--- a/tests_lit/llvm2ice_tests/nop-insertion.ll
+++ b/tests_lit/llvm2ice_tests/nop-insertion.ll
@@ -14,7 +14,7 @@
  ; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
  ; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=2 \
  ; RUN:    | FileCheck %s --check-prefix=MAXNOPS2
-; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion -sandbox\
+; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \
  ; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
  ; RUN:    | FileCheck %s --check-prefix=SANDBOX50
  
diff --git a/tests_lit/llvm2ice_tests/phi_invalid.test b/tests_lit/llvm2ice_tests/phi_invalid.test

index d8323db..665f09c 100644 (file)
--- a/tests_lit/llvm2ice_tests/phi_invalid.test
+++ b/tests_lit/llvm2ice_tests/phi_invalid.test
@@ -2,8 +2,8 @@
  ; https://code.google.com/p/nativeclient/issues/detail?id=4304
  
  RUN: %p2i --expect-fail --tbc -i %p/Input/phi-invalid.tbc --insts 2>&1 \
-RUN:        --filetype=obj --args -o /dev/null \
-RUN:        -allow-externally-defined-symbols \
+RUN:        --filetype=obj --output /dev/null \
+RUN:        --args -allow-externally-defined-symbols \
  RUN:        | FileCheck --check-prefix=BADPHI %s
  
  ; BADPHI: Phi error:
diff --git a/tests_lit/llvm2ice_tests/randomize-pool-immediate-basic.ll b/tests_lit/llvm2ice_tests/randomize-pool-immediate-basic.ll

index a5d2eb4..d8ebefe 100644 (file)
--- a/tests_lit/llvm2ice_tests/randomize-pool-immediate-basic.ll
+++ b/tests_lit/llvm2ice_tests/randomize-pool-immediate-basic.ll
@@ -33,7 +33,7 @@ entry:
  ; BLINDINGOM1-NEXT: lea [[REG]],{{[[]}}[[REG]]-0x669c41aa{{[]]}}
  
  ; POOLING-LABEL: add_arg_plus_200000
-; POOLING: mov e{{[a-z]*}},DWORD PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00030d40
+; POOLING: mov e{{[a-z]*}},{{(DWORD PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00030d40
  }
  
  define internal float @load_arg_plus_200000(float* %arg) {
@@ -50,7 +50,7 @@ entry:
  ; BLINDINGOM1: lea [[REG:e[a-z]*]],{{[[]}}{{e[a-z]*}}-0x69ea41a7{{[]]}}
  
  ; POOLING-LABEL: load_arg_plus_200000
-; POOLING: mov e{{[a-z]*}},DWORD PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00030d40
+; POOLING: mov e{{[a-z]*}},{{(DWORD PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00030d40
  }
  
  define internal i64 @add_arg_plus_64bits(i32 %arg) {
@@ -70,7 +70,7 @@ entry:
  ; BLINDINGOM1-NEXT: lea [[RLO]],{{[[]}}[[RLO]]-0x6d3841a8{{[]]}}
  
  ; POOLING-LABEL: add_arg_plus_64bits
-; POOLING: mov e{{[a-z]*}},DWORD PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$f46b0400
+; POOLING: mov e{{[a-z]*}},{{(DWORD PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$f46b0400
  }
  
  define internal i64 @load_arg_plus_64bits(i64* %arg) {
@@ -90,7 +90,7 @@ entry:
  ; BLINDINGOM1-NEXT: lea e{{[a-z]*}},{{[[]}}e{{[a-z]*}}-0x708641a9{{[]]}}
  
  ; POOLING-LABEL: load_arg_plus_64bits
-; POOLING: mov e{{[a-z]x}},DWORD PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00000004
+; POOLING: mov e{{[a-z]x}},{{(DWORD PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i32$00000004
  }
  
  define internal i32 @add_const_8bits(i32 %a) {
@@ -109,7 +109,7 @@ entry:
  ; BLINDINGOM1-NEXT: e{{[a-z]*}},{{[[]}}e{{[a-z]*}}-0x73d441aa{{[]]}}
  
  ; POOLING-LABEL: add_const_8bits
-; POOLING: mov {{[a-z]l}},BYTE PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i8$0000007b
+; POOLING: mov {{[a-z]l}},{{(BYTE PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i8$0000007b
  }
  
  define internal i32 @add_const_16bits(i32 %a) {
@@ -128,6 +128,6 @@ entry:
  ; BLINDINGOM1-NEXT: e{{[a-z]*}},{{[[]}}e{{[a-z]*}}-0x772241a7{{[]]}}
  
  ; POOLING-LABEL: add_const_16bits
-; POOLING: mov {{[a-z]x}},WORD PTR ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i16$00007ffe
+; POOLING: mov {{[a-z]x}},{{(WORD PTR )?}}ds:0x0 {{[0-9a-f]*}}: R_386_32 .L$i16$00007ffe
  
  }
diff --git a/tests_lit/llvm2ice_tests/square.ll b/tests_lit/llvm2ice_tests/square.ll

index 50fa9e2..137310a 100644 (file)
--- a/tests_lit/llvm2ice_tests/square.ll
+++ b/tests_lit/llvm2ice_tests/square.ll
@@ -33,18 +33,22 @@ entry:
  ; CHECK-LABEL: Square_i32
  ; CHECK: imul [[REG:e..]],[[REG]]
  
-define internal i16 @Square_i16(i16 %a) {
+define internal i32 @Square_i16(i32 %a) {
  entry:
-  %result = mul i16 %a, %a
-  ret i16 %result
+  %a.16 = trunc i32 %a to i16
+  %result = mul i16 %a.16, %a.16
+  %result.i32 = sext i16 %result to i32
+  ret i32 %result.i32
  }
  ; CHECK-LABEL: Square_i16
  ; CHECK: imul [[REG:..]],[[REG]]
  
-define internal i8 @Square_i8(i8 %a) {
+define internal i32 @Square_i8(i32 %a) {
  entry:
-  %result = mul i8 %a, %a
-  ret i8 %result
+  %a.8 = trunc i32 %a to i8
+  %result = mul i8 %a.8, %a.8
+  %result.i32 = sext i8 %result to i32
+  ret i32 %result.i32
  }
  ; CHECK-LABEL: Square_i8
  ; CHECK: imul al
diff --git a/tests_lit/llvm2ice_tests/strength-reduce.ll b/tests_lit/llvm2ice_tests/strength-reduce.ll

index 50ca6e8..09188f7 100644 (file)
--- a/tests_lit/llvm2ice_tests/strength-reduce.ll
+++ b/tests_lit/llvm2ice_tests/strength-reduce.ll
@@ -43,25 +43,29 @@ define internal i32 @mul_i32_arg_m45(i32 %arg) {
  ; CHECK-DAG: lea [[REG]],{{\[}}[[REG]]+[[REG]]*4]
  ; CHECK: neg [[REG]]
  
-define internal i16 @mul_i16_arg_18(i16 %arg) {
-  %result = mul i16 %arg, 18
-  ret i16 %result
+define internal i32 @mul_i16_arg_18(i32 %arg) {
+  %arg.16 = trunc i32 %arg to i16
+  %result = mul i16 %arg.16, 18
+  %result.i32 = zext i16 %result to i32
+  ret i32 %result.i32
  }
-; Disassembly will look like "lea ax,[eax+eax*8]".
  ; CHECK-LABEL: mul_i16_arg_18
-; CHECK-DAG: lea [[REG:..]],{{\[}}e[[REG]]+e[[REG]]*8]
-; CHECK-DAG: shl [[REG]],1
+; CHECK: imul
  
-define internal i8 @mul_i8_arg_16(i8 %arg) {
-  %result = mul i8 %arg, 16
-  ret i8 %result
+define internal i32 @mul_i8_arg_16(i32 %arg) {
+  %arg.8 = trunc i32 %arg to i8
+  %result = mul i8 %arg.8, 16
+  %result.i32 = zext i8 %result to i32
+  ret i32 %result.i32
  }
  ; CHECK-LABEL: mul_i8_arg_16
  ; CHECK: shl {{.*}},0x4
  
-define internal i8 @mul_i8_arg_18(i8 %arg) {
-  %result = mul i8 %arg, 18
-  ret i8 %result
+define internal i32 @mul_i8_arg_18(i32 %arg) {
+  %arg.8 = trunc i32 %arg to i8
+  %result = mul i8 %arg.8, 18
+  %result.i32 = zext i8 %result to i32
+  ret i32 %result.i32
  }
  ; CHECK-LABEL: mul_i8_arg_18
  ; CHECK: imul
diff --git a/tests_lit/llvm2ice_tests/test_i1.ll b/tests_lit/llvm2ice_tests/test_i1.ll

index 3dcd5c1..514d1ed 100644 (file)
--- a/tests_lit/llvm2ice_tests/test_i1.ll
+++ b/tests_lit/llvm2ice_tests/test_i1.ll
@@ -169,7 +169,7 @@ entry:
  ; match the trunc instruction
  ; CHECK: and {{.*}},0x1
  ; match the sext i1 instruction
-; CHECK: movzx e[[REG:.*]],{{[a-d]l|BYTE PTR}}
+; CHECK: movzx {{e*}}[[REG:.*]],{{[a-d]l|BYTE PTR}}
  ; CHECK-NEXT: shl [[REG]],0xf
  ; CHECK-NEXT: sar [[REG]],0xf
  
diff --git a/tests_lit/llvm2ice_tests/vector-bitcast.ll b/tests_lit/llvm2ice_tests/vector-bitcast.ll

index 3283402..cadd2b6 100644 (file)
--- a/tests_lit/llvm2ice_tests/vector-bitcast.ll
+++ b/tests_lit/llvm2ice_tests/vector-bitcast.ll
@@ -150,10 +150,11 @@ entry:
  ; CHECK-NEXT: ret
  }
  
-define internal i8 @test_bitcast_v8i1_to_i8(<8 x i1> %arg) {
+define internal i32 @test_bitcast_v8i1_to_i8(<8 x i1> %arg) {
  entry:
    %res = bitcast <8 x i1> %arg to i8
-  ret i8 %res
+  %res.i32 = zext i8 %res to i32
+  ret i32 %res.i32
  
  ; CHECK-LABEL: test_bitcast_v8i1_to_i8
  ; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_8xi1_i8
@@ -162,10 +163,11 @@ entry:
  ; OPMT1: call -4
  }
  
-define internal i16 @test_bitcast_v16i1_to_i16(<16 x i1> %arg) {
+define internal i32 @test_bitcast_v16i1_to_i16(<16 x i1> %arg) {
  entry:
    %res = bitcast <16 x i1> %arg to i16
-  ret i16 %res
+  %res.i32 = zext i16 %res to i32
+  ret i32 %res.i32
  
  ; CHECK-LABEL: test_bitcast_v16i1_to_i16
  ; CHECK: call {{.*}} R_{{.*}} __Sz_bitcast_16xi1_i16
author	David Sehr <sehr@chromium.org>
	Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)
committer	David Sehr <sehr@chromium.org>
	Wed, 13 Jan 2016 22:17:37 +0000 (14:17 -0800)
Makefile.standalone		patch \| blob \| history
pydir/crosstest_generator.py		patch \| blob \| history
pydir/run-pnacl-sz.py		patch \| blob \| history
runtime/szrt_ll.ll		patch \| blob \| history
src/IceInstX86Base.h		patch \| blob \| history
src/IceRegAlloc.cpp		patch \| blob \| history
src/IceTargetLoweringX8632.cpp		patch \| blob \| history
src/IceTargetLoweringX8632Traits.h		patch \| blob \| history
src/IceTargetLoweringX8664.cpp		patch \| blob \| history
src/IceTargetLoweringX86Base.h		patch \| blob \| history
src/IceTargetLoweringX86BaseImpl.h		patch \| blob \| history
tests_lit/assembler/x86/jump_encodings.ll		patch \| blob \| history
tests_lit/assembler/x86/opcode_register_encodings.ll		patch \| blob \| history
tests_lit/lit.cfg		patch \| blob \| history
tests_lit/llvm2ice_tests/8bit.pnacl.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/address-mode-global.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/bitcast.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/contract.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/convert.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/elf_container.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/elf_function_sections.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/elf_nodata.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/loop-nest-depth.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nacl-other-intrinsics.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nonsfi.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/nop-insertion.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/phi_invalid.test		patch \| blob \| history
tests_lit/llvm2ice_tests/randomize-pool-immediate-basic.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/square.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/strength-reduce.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/test_i1.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/vector-bitcast.ll		patch \| blob \| history