Subzero: Align the stack at the point of function calls.

author Matt Wala <wala@chromium.org>

Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)

committer Matt Wala <wala@chromium.org>

Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)
author Matt Wala <wala@chromium.org>
Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)
committer Matt Wala <wala@chromium.org>
Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)
diff --git a/crosstest/crosstest.py b/crosstest/crosstest.py

index 9b64399..89124d6 100755 (executable)
--- a/crosstest/crosstest.py
+++ b/crosstest/crosstest.py
@@ -134,7 +134,6 @@ if __name__ == '__main__':
              objs.append(bitcode)
  
      linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
-    # TODO: Remove -mstackrealign after Subzero supports stack alignment.
-    shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32',
-              '-mstackrealign', args.driver] + objs +
+    shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
+             objs +
               ['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
diff --git a/crosstest/runtests.sh b/crosstest/runtests.sh

index 0b79f48..eedb3b3 100755 (executable)
--- a/crosstest/runtests.sh
+++ b/crosstest/runtests.sh
@@ -54,6 +54,14 @@ for optlevel in ${OPTLEVELS} ; do
              --output=test_bitmanip_O${optlevel}_${attribute}
  
          ./crosstest.py -O${optlevel} --mattr ${attribute} \
+            --prefix=Subzero_ --target=x8632 \
+            --dir="${OUTDIR}" \
+            --llvm-bin-path="${LLVM_BIN_PATH}" \
+            --test=test_calling_conv.cpp \
+            --driver=test_calling_conv_main.cpp \
+            --output=test_calling_conv_O${optlevel}_${attribute}
+
+        ./crosstest.py -O${optlevel} --mattr ${attribute} \
              --prefix=Subzero_ \
              --target=x8632 \
              --dir="${OUTDIR}" \
@@ -137,6 +145,7 @@ for optlevel in ${OPTLEVELS} ; do
          "${OUTDIR}"/mem_intrin_O${optlevel}_${attribute}
          "${OUTDIR}"/test_arith_O${optlevel}_${attribute}
          "${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute}
+        "${OUTDIR}"/test_calling_conv_O${optlevel}_${attribute}
          "${OUTDIR}"/test_cast_O${optlevel}_${attribute}
          "${OUTDIR}"/test_fcmp_O${optlevel}_${attribute}
          "${OUTDIR}"/test_global_O${optlevel}_${attribute}
diff --git a/crosstest/test_calling_conv.cpp b/crosstest/test_calling_conv.cpp

new file mode 100644 (file)

index 0000000..57bfa6f
--- /dev/null
+++ b/crosstest/test_calling_conv.cpp
@@ -0,0 +1,95 @@
+//===- subzero/crosstest/test_calling_conv.cpp - Implementation for tests -===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the test functions used to check that Subzero
+// generates code compatible with the calling convention used by
+// llc. "Caller" functions test the handling of out-args, and "callee"
+// functions test the handling of in-args.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstring>
+
+#include "test_calling_conv.h"
+
+#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
+
+void caller_i(void) {
+  int arg1 = 0x12345678;
+  CALL_AS_TYPE(callee_i_Ty, Callee)(arg1);
+}
+
+void caller_vvvvv(void) {
+  v4si32 arg1 = {0, 1, 2, 3};
+  v4si32 arg2 = {4, 5, 6, 7};
+  v4si32 arg3 = {8, 9, 10, 11};
+  v4si32 arg4 = {12, 13, 14, 15};
+  v4si32 arg5 = {16, 17, 18, 19};
+
+  CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5);
+}
+
+void caller_vlvlivfvdviv(void) {
+  v4f32 arg1 = {0, 1, 2, 3};
+  int64_t arg2 = 4;
+  v4f32 arg3 = {6, 7, 8, 9};
+  int64_t arg4 = 10;
+  int arg5 = 11;
+  v4f32 arg6 = {12, 13, 14, 15};
+  float arg7 = 16;
+  v4f32 arg8 = {17, 18, 19, 20};
+  double arg9 = 21;
+  v4f32 arg10 = {22, 23, 24, 25};
+  int arg11 = 26;
+  v4f32 arg12 = {27, 28, 29, 30};
+
+  CALL_AS_TYPE(callee_vlvlivfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5,
+                                               arg6, arg7, arg8, arg9, arg10,
+                                               arg11, arg12);
+}
+
+#define HANDLE_ARG(ARGNUM)                                                     \
+  case ARGNUM:                                                                 \
+    memcpy(&Buf[0], &arg##ARGNUM, sizeof(arg##ARGNUM));                        \
+    break;
+
+void __attribute__((noinline)) callee_i(int arg1) {
+  switch (ArgNum) { HANDLE_ARG(1); }
+}
+
+void __attribute__((noinline))
+callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
+  switch (ArgNum) {
+    HANDLE_ARG(1);
+    HANDLE_ARG(2);
+    HANDLE_ARG(3);
+    HANDLE_ARG(4);
+    HANDLE_ARG(5);
+  }
+}
+
+void __attribute__((noinline))
+callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4, int arg5,
+                    v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg10,
+                    int arg11, v4f32 arg12) {
+  switch (ArgNum) {
+    HANDLE_ARG(1);
+    HANDLE_ARG(2);
+    HANDLE_ARG(3);
+    HANDLE_ARG(4);
+    HANDLE_ARG(5);
+    HANDLE_ARG(6);
+    HANDLE_ARG(7);
+    HANDLE_ARG(8);
+    HANDLE_ARG(9);
+    HANDLE_ARG(10);
+    HANDLE_ARG(11);
+    HANDLE_ARG(12);
+  }
+}
diff --git a/crosstest/test_calling_conv.def b/crosstest/test_calling_conv.def

new file mode 100644 (file)

index 0000000..4c6c7f9
--- /dev/null
+++ b/crosstest/test_calling_conv.def
@@ -0,0 +1,26 @@
+//===- subzero/crosstest/test_calling_conv.def - testing macros -*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macros for testing the calling convention.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_CALLING_CONV_DEF
+#define TEST_CALLING_CONV_DEF
+
+#define STR(x) (#x)
+
+#define TEST_FUNC_TABLE                            \
+/* caller,             callee,           argc */   \
+X(caller_i,            callee_i,              1)   \
+X(caller_vvvvv,        callee_vvvvv,          5)   \
+X(caller_vlvlivfvdviv, callee_vlvlivfvdviv,  12)   \
+// #define X(caller, callee, argc)
+
+#endif // TEST_CALLING_CONV_DEF
diff --git a/crosstest/test_calling_conv.h b/crosstest/test_calling_conv.h

new file mode 100644 (file)

index 0000000..cd4a9d9
--- /dev/null
+++ b/crosstest/test_calling_conv.h
@@ -0,0 +1,36 @@
+//===- subzero/crosstest/test_calling_conv.h - Test prototypes --*- C++ -*-===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function prototypes for crosstesting the calling
+// convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "test_calling_conv.def"
+#include "vectors.h"
+
+typedef void (*CalleePtrTy)();
+extern CalleePtrTy Callee;
+extern size_t ArgNum;
+extern char *Buf;
+
+void caller_i();
+void caller_alloca_i();
+typedef void callee_i_Ty(int);
+callee_i_Ty callee_i;
+callee_i_Ty callee_alloca_i;
+
+void caller_vvvvv();
+typedef void (callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
+callee_vvvvv_Ty callee_vvvvv;
+
+void caller_vlvlivfvdviv();
+typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
+                                    float, v4f32, double, v4f32, int, v4f32);
+callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
diff --git a/crosstest/test_calling_conv_main.cpp b/crosstest/test_calling_conv_main.cpp

new file mode 100644 (file)

index 0000000..b5f2135
--- /dev/null
+++ b/crosstest/test_calling_conv_main.cpp
@@ -0,0 +1,177 @@
+//===- subzero/crosstest/test_calling_conv_main.cpp - Driver for tests ----===//
+//
+//                        The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the driver for cross testing the compatibility of
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+/* crosstest.py --test=test_calling_conv.cpp               \
+   --driver=test_calling_conv_main.cpp --prefix=Subzero_   \
+   --output=test_calling_conv */
+
+#include <cstring>
+#include <iostream>
+#include <sstream>
+
+#include "test_calling_conv.h"
+
+namespace Subzero_ {
+#include "test_calling_conv.h"
+}
+
+// The crosstest code consists of caller / callee function pairs.
+//
+// The caller function initializes a list of arguments and calls the
+// function located at Callee.
+//
+// The callee function writes the argument numbered ArgNum into the
+// location pointed to by Buf.
+//
+// testCaller() tests that caller functions, as compiled by Subzero and
+// llc, pass arguments to the callee in the same way.  The Caller() and
+// Subzero_Caller() functions both call the same callee (which has been
+// compiled by llc).  The result in the global buffer is compared to
+// check that it is the same value after the calls by both callers.
+//
+// testCallee() runs the same kind of test, except that the functions
+// Callee() and Subzero_Callee() are being tested to ensure that both
+// functions receive arguments from the caller in the same way.  The
+// caller is compiled by llc.
+
+size_t ArgNum, Subzero_ArgNum;
+CalleePtrTy Callee, Subzero_Callee;
+char *Buf, *Subzero_Buf;
+
+const static size_t BUF_SIZE = 16;
+
+std::string bufAsString(const char Buf[BUF_SIZE]) {
+  std::ostringstream OS;
+  for (size_t i = 0; i < BUF_SIZE; ++i) {
+    if (i > 0)
+      OS << " ";
+    OS << (unsigned) Buf[i];
+  }
+  return OS.str();
+}
+
+void testCaller(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+  static struct {
+    const char *CallerName, *CalleeName;
+    size_t Args;
+    void (*Caller)(void);
+    void (*Subzero_Caller)(void);
+    CalleePtrTy Callee;
+  } Funcs[] = {
+#define X(caller, callee, argc)                                                \
+  {                                                                            \
+    STR(caller), STR(callee), argc, &caller, &Subzero_::caller,                \
+        reinterpret_cast<CalleePtrTy>(&callee),                                \
+  }                                                                            \
+  ,
+    TEST_FUNC_TABLE
+#undef X
+  };
+
+  const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+  for (size_t f = 0; f < NumFuncs; ++f) {
+    char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
+    Callee = Subzero_Callee = Funcs[f].Callee;
+
+    for (size_t i = 0; i < Funcs[f].Args; ++i) {
+      memset(BufLlc, 0xff, sizeof(BufLlc));
+      memset(BufSz, 0xff, sizeof(BufSz));
+
+      ArgNum = Subzero_ArgNum = i;
+
+      Buf = BufLlc;
+      Funcs[f].Caller();
+
+      Buf = BufSz;
+      Funcs[f].Subzero_Caller();
+
+      ++TotalTests;
+      if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
+        ++Passes;
+      } else {
+        ++Failures;
+        std::cout << "testCaller(Caller=" << Funcs[f].CallerName
+                  << ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
+                  << ")\nsz =" << bufAsString(BufSz)
+                  << "\nllc=" << bufAsString(BufLlc) << "\n";
+      }
+    }
+  }
+}
+
+void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+  static struct {
+    const char *CallerName, *CalleeName;
+    size_t Args;
+    void (*Caller)(void);
+    CalleePtrTy Callee, Subzero_Callee;
+  } Funcs[] = {
+#define X(caller, callee, argc)                                                \
+  {                                                                            \
+    STR(caller), STR(callee), argc, &caller,                                   \
+        reinterpret_cast<CalleePtrTy>(&callee),                                \
+        reinterpret_cast<CalleePtrTy>(&Subzero_::callee)                       \
+  }                                                                            \
+  ,
+    TEST_FUNC_TABLE
+#undef X
+  };
+
+  const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+  for (size_t f = 0; f < NumFuncs; ++f) {
+    char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
+    Buf = BufLlc;
+    Subzero_Buf = BufSz;
+
+    for (size_t i = 0; i < Funcs[f].Args; ++i) {
+      memset(BufLlc, 0xff, sizeof(BufLlc));
+      memset(BufSz, 0xff, sizeof(BufSz));
+
+      ArgNum = Subzero_ArgNum = i;
+
+      Callee = Funcs[f].Callee;
+      Funcs[f].Caller();
+
+      Callee = Funcs[f].Subzero_Callee;
+      Funcs[f].Caller();
+
+      ++TotalTests;
+      if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
+        ++Passes;
+      } else {
+        ++Failures;
+        std::cout << "testCallee(Caller=" << Funcs[f].CallerName
+                  << ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
+                  << ")\nsz =" << bufAsString(BufSz)
+                  << "\nllc=" << bufAsString(BufLlc) << "\n";
+      }
+    }
+  }
+}
+
+int main(int argc, char *argv[]) {
+  size_t TotalTests = 0;
+  size_t Passes = 0;
+  size_t Failures = 0;
+
+  testCaller(TotalTests, Passes, Failures);
+  testCallee(TotalTests, Passes, Failures);
+
+  std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
+            << " Failures=" << Failures << "\n";
+
+  return Failures;
+}
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp

index 03cdb95..1f7508b 100644 (file)
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -92,6 +92,9 @@ OperandX8632Mem::OperandX8632Mem(Cfg *Func, Type Ty, Variable *Base,
    }
  }
  
+InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount)
+    : InstX8632(Func, InstX8632::Adjuststack, 0, NULL), Amount(Amount) {}
+
  InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
                             Operand *Source2)
      : InstX8632(Func, InstX8632::Mul, 2, Dest) {
@@ -226,6 +229,12 @@ InstX8632Movp::InstX8632Movp(Cfg *Func, Variable *Dest, Operand *Source)
    addSource(Source);
  }
  
+InstX8632StoreP::InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem)
+    : InstX8632(Func, InstX8632::StoreP, 2, NULL) {
+  addSource(Value);
+  addSource(Mem);
+}
+
  InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
      : InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
    addSource(Value);
@@ -933,6 +942,24 @@ void InstX8632Store::dump(const Cfg *Func) const {
    getSrc(0)->dump(Func);
  }
  
+void InstX8632StoreP::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  assert(getSrcSize() == 2);
+  Str << "\tmovups\t";
+  getSrc(1)->emit(Func);
+  Str << ", ";
+  getSrc(0)->emit(Func);
+  Str << "\n";
+}
+
+void InstX8632StoreP::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "storep." << getSrc(0)->getType() << " ";
+  getSrc(1)->dump(Func);
+  Str << ", ";
+  getSrc(0)->dump(Func);
+}
+
  void InstX8632StoreQ::emit(const Cfg *Func) const {
    Ostream &Str = Func->getContext()->getStrEmit();
    assert(getSrcSize() == 2);
@@ -1236,6 +1263,17 @@ void InstX8632Pop::dump(const Cfg *Func) const {
    Str << " = pop." << getDest()->getType() << " ";
  }
  
+void InstX8632AdjustStack::emit(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrEmit();
+  Str << "\tsub\tesp, " << Amount << "\n";
+  Func->getTarget()->updateStackAdjustment(Amount);
+}
+
+void InstX8632AdjustStack::dump(const Cfg *Func) const {
+  Ostream &Str = Func->getContext()->getStrDump();
+  Str << "esp = sub.i32 esp, " << Amount;
+}
+
  void InstX8632Push::emit(const Cfg *Func) const {
    Ostream &Str = Func->getContext()->getStrEmit();
    assert(getSrcSize() == 1);
diff --git a/src/IceInstX8632.h b/src/IceInstX8632.h

index bc9ac53..a2fd7dc 100644 (file)
--- a/src/IceInstX8632.h
+++ b/src/IceInstX8632.h
@@ -137,6 +137,7 @@ public:
      Add,
      Addps,
      Addss,
+    Adjuststack,
      And,
      Blendvps,
      Br,
@@ -204,6 +205,7 @@ public:
      Shufps,
      Sqrtss,
      Store,
+    StoreP,
      StoreQ,
      Sub,
      Subps,
@@ -340,6 +342,26 @@ private:
    InstX8632Label *Label; // Intra-block branch target
  };
  
+// AdjustStack instruction - subtracts esp by the given amount and
+// updates the stack offset during code emission.
+class InstX8632AdjustStack : public InstX8632 {
+public:
+  static InstX8632AdjustStack *create(Cfg *Func, SizeT Amount) {
+    return new (Func->allocate<InstX8632AdjustStack>())
+        InstX8632AdjustStack(Func, Amount);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
+
+private:
+  InstX8632AdjustStack(Cfg *Func, SizeT Amount);
+  InstX8632AdjustStack(const InstX8632AdjustStack &) LLVM_DELETED_FUNCTION;
+  InstX8632AdjustStack &operator=(const InstX8632AdjustStack &)
+      LLVM_DELETED_FUNCTION;
+  SizeT Amount;
+};
+
  // Call instruction.  Arguments should have already been pushed.
  class InstX8632Call : public InstX8632 {
  public:
@@ -960,6 +982,23 @@ private:
    virtual ~InstX8632Movp() {}
  };
  
+class InstX8632StoreP : public InstX8632 {
+public:
+  static InstX8632StoreP *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
+    return new (Func->allocate<InstX8632StoreP>())
+        InstX8632StoreP(Func, Value, Mem);
+  }
+  virtual void emit(const Cfg *Func) const;
+  virtual void dump(const Cfg *Func) const;
+  static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
+
+private:
+  InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem);
+  InstX8632StoreP(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
+  InstX8632StoreP &operator=(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
+  virtual ~InstX8632StoreP() {}
+};
+
  // This is essentially a "movq" instruction with an OperandX8632Mem
  // operand instead of Variable as the destination.  It's important
  // for liveness that there is no Dest operand.
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index de033a5..ebc0bae 100644 (file)
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -121,9 +121,21 @@ Type getInVectorElementType(Type Ty) {
  }
  
  // The maximum number of arguments to pass in XMM registers
-const unsigned X86_MAX_XMM_ARGS = 4;
+const uint32_t X86_MAX_XMM_ARGS = 4;
  // The number of bits in a byte
-const unsigned X86_CHAR_BIT = 8;
+const uint32_t X86_CHAR_BIT = 8;
+// Stack alignment
+const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
+// Size of the return address on the stack
+const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+
+// Value is a size in bytes.  Return Value adjusted to the next highest
+// multiple of the stack alignment.
+uint32_t applyStackAlignment(uint32_t Value) {
+  // power of 2
+  assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+  return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
+}
  
  // Instruction set options
  namespace cl = ::llvm::cl;
@@ -248,8 +260,8 @@ void __attribute__((unused)) xMacroIntegrityCheck() {
  
  TargetX8632::TargetX8632(Cfg *Func)
      : TargetLowering(Func), InstructionSet(CLInstructionSet),
-      IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0),
-      NextLabelNumber(0), ComputedLiveRanges(false),
+      IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
+      LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
        PhysicalRegisters(VarList(Reg_NUM)) {
    // TODO: Don't initialize IntegerRegisters and friends every time.
    // Instead, initialize in some sort of static initializer for the
@@ -543,6 +555,9 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
      finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
      return;
    }
+  if (isVectorType(Ty)) {
+    InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
+  }
    Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
    InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
    if (Arg->hasReg()) {
@@ -570,7 +585,6 @@ void TargetX8632::addProlog(CfgNode *Node) {
    // or B.
    const bool SimpleCoalescing = true;
    size_t InArgsSizeBytes = 0;
-  size_t RetIpSizeBytes = 4;
    size_t PreservedRegsSizeBytes = 0;
    LocalsSizeBytes = 0;
    Context.init(Node);
@@ -657,6 +671,13 @@ void TargetX8632::addProlog(CfgNode *Node) {
      _mov(ebp, esp);
    }
  
+  if (NeedsStackAlignment) {
+    uint32_t StackSize = applyStackAlignment(
+        X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);
+    LocalsSizeBytes =
+        StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
+  }
+
    // Generate "sub esp, LocalsSizeBytes"
    if (LocalsSizeBytes)
      _sub(getPhysicalRegister(Reg_esp),
@@ -668,7 +689,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
    // for those that were register-allocated.  Args are pushed right to
    // left, so Arg[0] is closest to the stack/frame pointer.
    Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
-  size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
+  size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
    if (!IsEbpBasedFrame)
      BasicFrameOffset += LocalsSizeBytes;
  
@@ -959,12 +980,42 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
  
  void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
    IsEbpBasedFrame = true;
-  // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
-  // the number of adjustments of esp, etc.
+  // Conservatively require the stack to be aligned.  Some stack
+  // adjustment operations implemented below assume that the stack is
+  // aligned before the alloca.  All the alloca code ensures that the
+  // stack alignment is preserved after the alloca.  The stack alignment
+  // restriction can be relaxed in some cases.
+  NeedsStackAlignment = true;
+
+  // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
    Variable *esp = getPhysicalRegister(Reg_esp);
    Operand *TotalSize = legalize(Inst->getSizeInBytes());
    Variable *Dest = Inst->getDest();
-  _sub(esp, TotalSize);
+  uint32_t AlignmentParam = Inst->getAlignInBytes();
+
+  // LLVM enforces power of 2 alignment.
+  assert((AlignmentParam & (AlignmentParam - 1)) == 0);
+  assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+
+  uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
+  if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
+    _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
+  }
+  if (ConstantInteger *ConstantTotalSize =
+          llvm::dyn_cast<ConstantInteger>(TotalSize)) {
+    uint32_t Value = ConstantTotalSize->getValue();
+    // Round Value up to the next highest multiple of the alignment.
+    Value = (Value + Alignment - 1) & -Alignment;
+    _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
+  } else {
+    // Non-constant sizes need to be adjusted to the next highest
+    // multiple of the required alignment at runtime.
+    Variable *T = makeReg(IceType_i32);
+    _mov(T, TotalSize);
+    _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
+    _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
+    _sub(esp, T);
+  }
    _mov(Dest, esp);
  }
  
@@ -1544,51 +1595,78 @@ void TargetX8632::lowerBr(const InstBr *Inst) {
  }
  
  void TargetX8632::lowerCall(const InstCall *Instr) {
+  // x86-32 calling convention:
+  //
+  // * At the point before the call, the stack must be aligned to 16
+  // bytes.
+  //
+  // * The first four arguments of vector type, regardless of their
+  // position relative to the other arguments in the argument list, are
+  // placed in registers xmm0 - xmm3.
+  //
+  // * Other arguments are pushed onto the stack in right-to-left order,
+  // such that the left-most argument ends up on the top of the stack at
+  // the lowest memory address.
+  //
+  // * Stack arguments of vector type are aligned to start at the next
+  // highest multiple of 16 bytes.  Other stack arguments are aligned to
+  // 4 bytes.
+  //
+  // This intends to match the section "IA-32 Function Calling
+  // Convention" of the document "OS X ABI Function Call Guide" by
+  // Apple.
+  NeedsStackAlignment = true;
+
+  OperandList XmmArgs;
+  OperandList StackArgs, StackArgLocations;
+  uint32_t ParameterAreaSizeBytes = 0;
+
    // Classify each argument operand according to the location where the
    // argument is passed.
-  OperandList XmmArgs;
-  OperandList StackArgs;
    for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
      Operand *Arg = Instr->getArg(i);
-    if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
+    Type Ty = Arg->getType();
+    // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+    assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
+           Ty == IceType_f64 || isVectorType(Ty));
+    if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
        XmmArgs.push_back(Arg);
      } else {
        StackArgs.push_back(Arg);
+      if (isVectorType(Arg->getType())) {
+        ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+      }
+      Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
+      Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);
+      StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
+      ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
      }
    }
-  // For stack arguments, generate a sequence of push instructions,
-  // pushing right to left, keeping track of stack offsets in case a
-  // push involves a stack operand and we are using an esp-based frame.
-  uint32_t StackOffset = 0;
-  // TODO: Consolidate the stack adjustment for function calls by
-  // reserving enough space for the arguments only once.
+
+  // Adjust the parameter area so that the stack is aligned.  It is
+  // assumed that the stack is already aligned at the start of the
+  // calling sequence.
+  ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+
+  // Subtract the appropriate amount for the argument area.  This also
+  // takes care of setting the stack adjustment during emission.
    //
    // TODO: If for some reason the call instruction gets dead-code
    // eliminated after lowering, we would need to ensure that the
-  // pre-call push instructions and the post-call esp adjustment get
-  // eliminated as well.
-  for (OperandList::reverse_iterator I = StackArgs.rbegin(),
-           E = StackArgs.rend(); I != E; ++I) {
-    Operand *Arg = legalize(*I);
-    if (Arg->getType() == IceType_i64) {
-      _push(hiOperand(Arg));
-      _push(loOperand(Arg));
-    } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
-      // If the Arg turns out to be a memory operand, more than one push
-      // instruction is required.  This ends up being somewhat clumsy in
-      // the current IR, so we use a workaround.  Force the operand into
-      // a (xmm) register, and then push the register.  An xmm register
-      // push is actually not possible in x86, but the Push instruction
-      // emitter handles this by decrementing the stack pointer and
-      // directly writing the xmm register value.
-      _push(legalize(Arg, Legal_Reg));
-    } else {
-      // Otherwise PNaCl requires parameter types to be at least 32-bits.
-      assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
-      _push(Arg);
-    }
-    StackOffset += typeWidthInBytesOnStack(Arg->getType());
+  // pre-call and the post-call esp adjustment get eliminated as well.
+  if (ParameterAreaSizeBytes) {
+    _adjust_stack(ParameterAreaSizeBytes);
    }
+
+  // Copy arguments that are passed on the stack to the appropriate
+  // stack locations.
+  for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+    lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+    // TODO: Consider calling postLower() here to reduce the register
+    // pressure associated with using too many infinite weight
+    // temporaries when lowering the call sequence in -Om1 mode.
+  }
+
    // Copy arguments to be passed in registers to the appropriate
    // registers.
    // TODO: Investigate the impact of lowering arguments passed in
@@ -1652,10 +1730,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
    if (ReturnRegHi)
      Context.insert(InstFakeDef::create(Func, ReturnRegHi));
  
-  // Add the appropriate offset to esp.
-  if (StackOffset) {
+  // Add the appropriate offset to esp.  The call instruction takes care
+  // of resetting the stack offset during emission.
+  if (ParameterAreaSizeBytes) {
      Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
-    _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
+    _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));
    }
  
    // Insert a register-kill pseudo instruction.
@@ -2134,9 +2213,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
    } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
      // Use pshufd and movd/movss.
      //
-    // ALIGNHACK: Force vector operands to registers in instructions that
-    // require aligned memory operands until support for stack alignment
-    // is implemented.
+    // ALIGNHACK: Force vector operands to registers in instructions
+    // that require aligned memory operands until support for data
+    // alignment is implemented.
  #define ALIGN_HACK(Vect) legalizeToVar((Vect))
      Operand *SourceVectRM =
          legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
@@ -2221,8 +2300,8 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
        Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
        Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
  
-      // ALIGNHACK: Without support for stack alignment, both operands to
-      // cmpps need to be forced into registers.  Once support for stack
+      // ALIGNHACK: Without support for data alignment, both operands to
+      // cmpps need to be forced into registers.  Once support for data
        // alignment is implemented, remove LEGAL_HACK.
  #define LEGAL_HACK(Vect) legalizeToVar((Vect))
        switch (Condition) {
@@ -2362,8 +2441,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
      }
  
      // TODO: ALIGNHACK: Both operands to compare instructions need to be
-    // in registers until stack alignment support is implemented.  Once
-    // there is support for stack alignment, LEGAL_HACK can be removed.
+    // in registers until data alignment support is implemented.  Once
+    // there is support for data alignment, LEGAL_HACK can be removed.
  #define LEGAL_HACK(Vect) legalizeToVar((Vect))
      Variable *T = makeReg(Ty);
      switch (Condition) {
@@ -2583,9 +2662,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
      Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
      Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
  
-    // ALIGNHACK: Force vector operands to registers in instructions that
-    // require aligned memory operands until support for stack alignment
-    // is implemented.
+    // ALIGNHACK: Force vector operands to registers in instructions
+    // that require aligned memory operands until support for data
+    // alignment is implemented.
  #define ALIGN_HACK(Vect) legalizeToVar((Vect))
      if (Index == 1) {
        SourceVectRM = ALIGN_HACK(SourceVectRM);
@@ -2873,7 +2952,8 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
    }
    case Intrinsics::Memset: {
      // The value operand needs to be extended to a stack slot size
-    // because "push" only works for a specific operand size.
+    // because the PNaCl ABI requires arguments to be at least 32 bits
+    // wide.
      Operand *ValOp = Instr->getArg(1);
      assert(ValOp->getType() == IceType_i8);
      Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
@@ -3560,9 +3640,9 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
      Variable *T = makeReg(SrcTy);
      Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
      Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
-    // ALIGNHACK: Until stack alignment support is implemented, vector
+    // ALIGNHACK: Until data alignment support is implemented, vector
      // instructions need to have vector operands in registers.  Once
-    // there is support for stack alignment, LEGAL_HACK can be removed.
+    // there is support for data alignment, LEGAL_HACK can be removed.
  #define LEGAL_HACK(Vect) legalizeToVar((Vect))
      if (InstructionSet >= SSE4_1) {
        // TODO(wala): If the condition operand is a constant, use blendps
@@ -3657,13 +3737,16 @@ void TargetX8632::lowerStore(const InstStore *Inst) {
    Operand *Value = Inst->getData();
    Operand *Addr = Inst->getAddr();
    OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
+  Type Ty = NewAddr->getType();
  
-  if (NewAddr->getType() == IceType_i64) {
+  if (Ty == IceType_i64) {
      Value = legalize(Value);
      Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
      Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
      _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
      _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
+  } else if (isVectorType(Ty)) {
+    _storep(legalizeToVar(Value), NewAddr);
    } else {
      Value = legalize(Value, Legal_Reg | Legal_Imm, true);
      _store(Value, NewAddr);
@@ -4039,9 +4122,9 @@ void TargetX8632::postLower() {
          llvm::SmallBitVector AvailableTypedRegisters =
              AvailableRegisters & getRegisterSetForType(Var->getType());
          if (!AvailableTypedRegisters.any()) {
-          // This is a hack in case we run out of physical registers
-          // due to an excessive number of "push" instructions from
-          // lowering a call.
+          // This is a hack in case we run out of physical registers due
+          // to an excessively long code sequence, as might happen when
+          // lowering arguments in lowerCall().
            AvailableRegisters = WhiteList;
            AvailableTypedRegisters =
                AvailableRegisters & getRegisterSetForType(Var->getType());
diff --git a/src/IceTargetLoweringX8632.h b/src/IceTargetLoweringX8632.h

index 2b189ad..6d209dc 100644 (file)
--- a/src/IceTargetLoweringX8632.h
+++ b/src/IceTargetLoweringX8632.h
@@ -188,6 +188,9 @@ protected:
    void _add(Variable *Dest, Operand *Src0) {
      Context.insert(InstX8632Add::create(Func, Dest, Src0));
    }
+  void _adjust_stack(int32_t Amount) {
+    Context.insert(InstX8632AdjustStack::create(Func, Amount));
+  }
    void _addps(Variable *Dest, Operand *Src0) {
      Context.insert(InstX8632Addps::create(Func, Dest, Src0));
    }
@@ -412,6 +415,9 @@ protected:
    void _store(Operand *Value, OperandX8632 *Mem) {
      Context.insert(InstX8632Store::create(Func, Value, Mem));
    }
+  void _storep(Operand *Value, OperandX8632 *Mem) {
+    Context.insert(InstX8632StoreP::create(Func, Value, Mem));
+  }
    void _storeq(Operand *Value, OperandX8632 *Mem) {
      Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
    }
@@ -450,6 +456,7 @@ protected:
  
    const X86InstructionSet InstructionSet;
    bool IsEbpBasedFrame;
+  bool NeedsStackAlignment;
    size_t FrameSizeLocals;
    size_t LocalsSizeBytes;
    llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
diff --git a/tests_lit/llvm2ice_tests/64bit.pnacl.ll b/tests_lit/llvm2ice_tests/64bit.pnacl.ll

index 9d13095..86a0a27 100644 (file)
--- a/tests_lit/llvm2ice_tests/64bit.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/64bit.pnacl.ll
@@ -33,40 +33,50 @@ entry:
    ret i32 %add3
  }
  ; CHECK: pass64BitArg:
-; CHECK:      push    123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call    ignore64BitArgNoInline
-; CHECK:      push
-; CHECK-NEXT: push
-; CHECK-NEXT: push    123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call    ignore64BitArgNoInline
-; CHECK:      push
-; CHECK-NEXT: push
-; CHECK-NEXT: push    123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call    ignore64BitArgNoInline
+; CHECK:      sub     esp
+; CHECK:      mov     dword ptr [esp+4]
+; CHECK:      mov     dword ptr [esp]
+; CHECK:      mov     dword ptr [esp+8], 123
+; CHECK:      mov     dword ptr [esp+16]
+; CHECK:      mov     dword ptr [esp+12]
+; CHECK:      call    ignore64BitArgNoInline
+; CHECK       sub     esp
+; CHECK:      mov     dword ptr [esp+4]
+; CHECK:      mov     dword ptr [esp]
+; CHECK:      mov     dword ptr [esp+8], 123
+; CHECK:      mov     dword ptr [esp+16]
+; CHECK:      mov     dword ptr [esp+12]
+; CHECK:      call    ignore64BitArgNoInline
+; CHECK:      sub     esp
+; CHECK:      mov     dword ptr [esp+4]
+; CHECK:      mov     dword ptr [esp]
+; CHECK:      mov     dword ptr [esp+8], 123
+; CHECK:      mov     dword ptr [esp+16]
+; CHECK:      mov     dword ptr [esp+12]
+; CHECK:      call    ignore64BitArgNoInline
  ;
  ; OPTM1: pass64BitArg:
-; OPTM1:      push    123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call    ignore64BitArgNoInline
-; OPTM1:      push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push    123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call    ignore64BitArgNoInline
-; OPTM1:      push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push    123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call    ignore64BitArgNoInline
+; OPTM1:      sub     esp
+; OPTM1:      mov     dword ptr [esp+4]
+; OPTM1:      mov     dword ptr [esp]
+; OPTM1:      mov     dword ptr [esp+8], 123
+; OPTM1:      mov     dword ptr [esp+16]
+; OPTM1:      mov     dword ptr [esp+12]
+; OPTM1:      call    ignore64BitArgNoInline
+; OPTM1       sub     esp
+; OPTM1:      mov     dword ptr [esp+4]
+; OPTM1:      mov     dword ptr [esp]
+; OPTM1:      mov     dword ptr [esp+8], 123
+; OPTM1:      mov     dword ptr [esp+16]
+; OPTM1:      mov     dword ptr [esp+12]
+; OPTM1:      call    ignore64BitArgNoInline
+; OPTM1:      sub     esp
+; OPTM1:      mov     dword ptr [esp+4]
+; OPTM1:      mov     dword ptr [esp]
+; OPTM1:      mov     dword ptr [esp+8], 123
+; OPTM1:      mov     dword ptr [esp+16]
+; OPTM1:      mov     dword ptr [esp+12]
+; OPTM1:      call    ignore64BitArgNoInline
  
  declare i32 @ignore64BitArgNoInline(i64, i32, i64)
  
@@ -76,19 +86,21 @@ entry:
    ret i32 %call
  }
  ; CHECK: pass64BitConstArg:
-; CHECK:      push    3735928559
-; CHECK-NEXT: push    305419896
-; CHECK-NEXT: push    123
-; CHECK-NEXT: push    ecx
-; CHECK-NEXT: push    eax
+; CHECK:      sub     esp
+; CHECK:      mov     dword ptr [esp+4]
+; CHECK-NEXT: mov     dword ptr [esp]
+; CHECK-NEXT: mov     dword ptr [esp+8], 123
+; CHECK-NEXT: mov     dword ptr [esp+16], 3735928559
+; CHECK-NEXT: mov     dword ptr [esp+12], 305419896
  ; CHECK-NEXT: call    ignore64BitArgNoInline
  ;
  ; OPTM1: pass64BitConstArg:
-; OPTM1:      push    3735928559
-; OPTM1-NEXT: push    305419896
-; OPTM1-NEXT: push    123
-; OPTM1-NEXT: push    dword ptr [
-; OPTM1-NEXT: push    dword ptr [
+; OPTM1:      sub     esp
+; OPTM1:      mov     dword ptr [esp+4]
+; OPTM1-NEXT: mov     dword ptr [esp]
+; OPTM1-NEXT: mov     dword ptr [esp+8], 123
+; OPTM1-NEXT: mov     dword ptr [esp+16], 3735928559
+; OPTM1-NEXT: mov     dword ptr [esp+12], 305419896
  ; OPTM1-NEXT: call    ignore64BitArgNoInline
  
  define internal i64 @return64BitArg(i64 %a) {
@@ -240,14 +252,14 @@ entry:
    ret i64 %div
  }
  ; CHECK-LABEL: div64BitSignedConst:
-; CHECK: push 2874
-; CHECK: push 1942892530
+; CHECK: mov     dword ptr [esp+12], 2874
+; CHECK: mov     dword ptr [esp+8],  1942892530
  ; CHECK: call    __divdi3
  ; CHECK: ret
  ;
  ; OPTM1-LABEL: div64BitSignedConst:
-; OPTM1: push 2874
-; OPTM1: push 1942892530
+; OPTM1: mov     dword ptr [esp+12], 2874
+; OPTM1: mov     dword ptr [esp+8],  1942892530
  ; OPTM1: call    __divdi3
  ; OPTM1: ret
  
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll

index 2b92a87..a1c0821 100644 (file)
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -1,8 +1,7 @@
-; This is a basic test of the alloca instruction - one test for alloca
-; of a fixed size, and one test for variable size.
+; This is a basic test of the alloca instruction.
  
  ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
-; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
  ; RUN: %llvm2ice -O2 --verbose none %s \
  ; RUN:               | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
  ; RUN: %llvm2ice -Om1 --verbose none %s \
@@ -12,45 +11,95 @@
  ; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
  ; RUN:                           | FileCheck --check-prefix=DUMP %s
  
-define void @fixed_400(i32 %n) {
+define void @fixed_416_align_16(i32 %n) {
  entry:
-  %array = alloca i8, i32 400, align 16
+  %array = alloca i8, i32 416, align 16
    %__2 = ptrtoint i8* %array to i32
    call void @f1(i32 %__2)
    ret void
  }
-; CHECK: fixed_400:
-; CHECK:      sub     esp, 400
-; CHECK-NEXT: mov     eax, esp
-; CHECK-NEXT: push    eax
-; CHECK-NEXT: call    f1
-;
-; OPTM1: fixed_400:
-; OPTM1:      sub     esp, 400
-; OPTM1-NEXT: mov     {{.*}}, esp
-; OPTM1:      push
-; OPTM1-NEXT: call    f1
+; CHECK-LABEL: fixed_416_align_16:
+; CHECK:      sub     esp, 416
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f1
+
+define void @fixed_416_align_32(i32 %n) {
+entry:
+  %array = alloca i8, i32 400, align 32
+  %__2 = ptrtoint i8* %array to i32
+  call void @f1(i32 %__2)
+  ret void
+}
+; CHECK-LABEL: fixed_416_align_32:
+; CHECK:      and     esp, 4294967264
+; CHECK:      sub     esp, 416
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f1
+
+define void @fixed_351_align_16(i32 %n) {
+entry:
+  %array = alloca i8, i32 351, align 16
+  %__2 = ptrtoint i8* %array to i32
+  call void @f1(i32 %__2)
+  ret void
+}
+; CHECK-LABEL: fixed_351_align_16:
+; CHECK:      sub     esp, 352
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f1
+
+define void @fixed_351_align_32(i32 %n) {
+entry:
+  %array = alloca i8, i32 351, align 32
+  %__2 = ptrtoint i8* %array to i32
+  call void @f1(i32 %__2)
+  ret void
+}
+; CHECK-LABEL: fixed_351_align_32:
+; CHECK:      and     esp, 4294967264
+; CHECK:      sub     esp, 352
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f1
  
  declare void @f1(i32)
  
-define void @variable_n(i32 %n) {
+define void @variable_n_align_16(i32 %n) {
  entry:
    %array = alloca i8, i32 %n, align 16
    %__2 = ptrtoint i8* %array to i32
    call void @f2(i32 %__2)
    ret void
  }
-; CHECK: variable_n:
+; CHECK-LABEL: variable_n_align_16:
  ; CHECK:      mov     eax, dword ptr [ebp+8]
-; CHECK-NEXT: sub     esp, eax
-; CHECK-NEXT: mov     eax, esp
-; CHECK-NEXT: push    eax
-; CHECK-NEXT: call    f2
-;
-; OPTM1: variable_n:
-; OPTM1:      mov     {{.*}}, esp
-; OPTM1:      push
-; OPTM1-NEXT: call    f2
+; CHECK:      add     eax, 15
+; CHECK:      and     eax, 4294967280
+; CHECK:      sub     esp, eax
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f2
+
+define void @variable_n_align_32(i32 %n) {
+entry:
+  %array = alloca i8, i32 %n, align 32
+  %__2 = ptrtoint i8* %array to i32
+  call void @f2(i32 %__2)
+  ret void
+}
+; In -O2, the order of the CHECK-DAG lines in the output is switched.
+; CHECK-LABEL: variable_n_align_32:
+; CHECK-DAG:  and     esp, 4294967264
+; CHECK-DAG:  mov     eax, dword ptr [ebp+8]
+; CHECK:      add     eax, 31
+; CHECK:      and     eax, 4294967264
+; CHECK:      sub     esp, eax
+; CHECK:      sub     esp, 16
+; CHECK:      mov     dword ptr [esp], eax
+; CHECK:      call    f2
  
  declare void @f2(i32)
  
diff --git a/tests_lit/llvm2ice_tests/ebp_args.ll b/tests_lit/llvm2ice_tests/ebp_args.ll

index e7acd50..6a33b2b 100644 (file)
--- a/tests_lit/llvm2ice_tests/ebp_args.ll
+++ b/tests_lit/llvm2ice_tests/ebp_args.ll
@@ -22,18 +22,23 @@ entry:
  ; lowering code changes.
  
  ; CHECK: memcpy_helper:
-; CHECK: push     ebp
-; CHECK: mov      ebp, esp
-; CHECK: sub      esp, 20
-; CHECK: mov      eax, dword ptr [ebp+12]
-; CHECK: mov      dword ptr [ebp-4], eax
-; CHECK: sub      esp, 128
-; CHECK: mov      dword ptr [ebp-8], esp
-; CHECK: mov      eax, dword ptr [ebp-8]
-; CHECK: mov      dword ptr [ebp-12], eax
-; CHECK: movzx    eax, byte ptr [ebp-4]
-; CHECK: mov      dword ptr [ebp-16], eax
-; CHECK: push     dword ptr [ebp-16]
-; CHECK: push     dword ptr [ebp-12]
-; CHECK: push     dword ptr [ebp+8]
-; CHECK: call     memcpy_helper2
+; CHECK:  push     ebx
+; CHECK:  push     ebp
+; CHECK:  mov      ebp, esp
+; CHECK:  sub      esp, 20
+; CHECK:  mov      eax, dword ptr [ebp+16]
+; CHECK:  mov      dword ptr [ebp-4], eax
+; CHECK:  sub      esp, 128
+; CHECK:  mov      dword ptr [ebp-8], esp
+; CHECK:  mov      eax, dword ptr [ebp-8]
+; CHECK:  mov      dword ptr [ebp-12], eax
+; CHECK:  movzx    eax, byte ptr [ebp-4]
+; CHECK:  mov      dword ptr [ebp-16], eax
+; CHECK:  sub      esp, 16
+; CHECK:  mov      ecx, dword ptr [ebp+12]
+; CHECK:  mov      dword ptr [esp], ecx
+; CHECK:  mov      edx, dword ptr [ebp-12]
+; CHECK:  mov      dword ptr [esp+4], edx
+; CHECK:  mov      ebx, dword ptr [ebp-16]
+; CHECK:  mov      dword ptr [esp+8], ebx
+; CHECK:  call     memcpy_helper2
diff --git a/tests_lit/llvm2ice_tests/fp.pnacl.ll b/tests_lit/llvm2ice_tests/fp.pnacl.ll

index a73db92..7fcf558 100644 (file)
--- a/tests_lit/llvm2ice_tests/fp.pnacl.ll
+++ b/tests_lit/llvm2ice_tests/fp.pnacl.ll
@@ -45,11 +45,11 @@ entry:
    ret i32 %add3
  }
  ; CHECK-LABEL: passFpArgs
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
  ; CHECK: call ignoreFpArgsNoInline
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
  ; CHECK: call ignoreFpArgsNoInline
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
  ; CHECK: call ignoreFpArgsNoInline
  
  declare i32 @ignoreFpArgsNoInline(float, i32, double)
@@ -60,7 +60,7 @@ entry:
    ret i32 %call
  }
  ; CHECK-LABEL: passFpConstArg
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
  ; CHECK: call ignoreFpArgsNoInline
  
  define internal i32 @passFp32ConstArg(float %a) {
@@ -69,8 +69,8 @@ entry:
    ret i32 %call
  }
  ; CHECK-LABEL: passFp32ConstArg
-; CHECK: push dword
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
+; CHECK: movss dword ptr [esp+8]
  ; CHECK: call ignoreFp32ArgsNoInline
  
  declare i32 @ignoreFp32ArgsNoInline(float, i32, float)
@@ -415,8 +415,8 @@ entry:
    ret double %conv
  }
  ; CHECK-LABEL: unsigned64ToDouble
-; CHECK: push 2874
-; CHECK: push 1942892530
+; CHECK: mov dword ptr [esp+4], 2874
+; CHECK: mov dword ptr [esp], 1942892530
  ; CHECK: call cvtui64tod
  ; CHECK: fstp
  
diff --git a/tests_lit/llvm2ice_tests/undef.ll b/tests_lit/llvm2ice_tests/undef.ll

index b5f6914..8b3fc71 100644 (file)
--- a/tests_lit/llvm2ice_tests/undef.ll
+++ b/tests_lit/llvm2ice_tests/undef.ll
@@ -37,8 +37,7 @@ define float @undef_float() {
  entry:
    ret float undef
  ; CHECK-LABEL: undef_float:
-; CHECK-NOT: sub esp
-; CHECK: fld
+; CHECK: [L$float$
  }
  
  define <4 x i1> @undef_v4i1() {
diff --git a/tests_lit/llvm2ice_tests/vector-arg.ll b/tests_lit/llvm2ice_tests/vector-arg.ll

index 74f72e7..5c43e14 100644 (file)
--- a/tests_lit/llvm2ice_tests/vector-arg.ll
+++ b/tests_lit/llvm2ice_tests/vector-arg.ll
@@ -1,17 +1,6 @@
  ; This file checks that Subzero generates code in accordance with the
  ; calling convention for vectors.
  
-; NOTE: CHECK / OPTM1 lines containing the following strings may be
-; subject to change:
-;
-; * movups: The movups instruction may be changed to movaps when the
-; load / store operation is 16 byte aligned.
-;
-; * stack offsets: These may need to be changed if stack alignment
-; support is implemented.
-;
-; * stack adjustment operations
-
  ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
  ; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
  ; RUN: %llvm2ice -O2 --verbose none %s \
@@ -150,7 +139,7 @@ define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl
  entry:
    ret <4 x float> %arg4
  ; CHECK-LABEL: test_returning_interspersed_arg4:
-; CHECK: movups xmm0, xmmword ptr [esp+44]
+; CHECK: movups xmm0, xmmword ptr [esp+52]
  ; CHECK: ret
  
  ; OPTM1-LABEL: test_returning_interspersed_arg4:
@@ -172,33 +161,69 @@ entry:
    call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)
    ret void
  ; CHECK-LABEL: test_passing_vectors:
-; CHECK: movups  [[ARG6:.*]], xmmword ptr [esp+4]
-; CHECK: sub esp, 16
-; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]]
-; CHECK: movups  [[ARG5:.*]], xmmword ptr [esp+36]
-; CHECK: sub esp, 16
-; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]]
-; CHECK: movups  xmm0, xmmword ptr [esp+116]
-; CHECK: movups  xmm1, xmmword ptr [esp+100]
-; CHECK: movups  xmm2, xmmword ptr [esp+84]
-; CHECK: movups  xmm3, xmmword ptr [esp+68]
+; CHECK: sub esp, 32
+; CHECK: movups  [[ARG5:.*]], xmmword ptr [esp+64]
+; CHECK: movups  xmmword ptr [esp], [[ARG5]]
+; CHECK: movups  [[ARG6:.*]], xmmword ptr [esp+48]
+; CHECK: movups  xmmword ptr [esp+16], [[ARG6]]
+; CHECK: movups  xmm0, xmmword ptr [esp+128]
+; CHECK: movups  xmm1, xmmword ptr [esp+112]
+; CHECK: movups  xmm2, xmmword ptr [esp+96]
+; CHECK: movups  xmm3, xmmword ptr [esp+80]
  ; CHECK: call VectorArgs
  ; CHECK-NEXT: add esp, 32
  ; CHECK: ret
  
  ; OPTM1-LABEL: test_passing_vectors:
-; OPTM1: movups  [[ARG6:.*]], xmmword ptr {{.*}}
-; OPTM1: sub esp, 16
-; OPTM1: movups xmmword ptr [esp], [[ARG6]]
+; OPTM1: sub esp, 32
  ; OPTM1: movups  [[ARG5:.*]], xmmword ptr {{.*}}
-; OPTM1: sub esp, 16
-; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]]
+; OPTM1: movups  xmmword ptr [esp], [[ARG5]]
+; OPTM1: movups  [[ARG6:.*]], xmmword ptr {{.*}}
+; OPTM1: movups  xmmword ptr [esp+16], [[ARG6]]
  ; OPTM1: movups  xmm0, xmmword ptr {{.*}}
  ; OPTM1: movups  xmm1, xmmword ptr {{.*}}
  ; OPTM1: movups  xmm2, xmmword ptr {{.*}}
  ; OPTM1: movups  xmm3, xmmword ptr {{.*}}
  ; OPTM1: call VectorArgs
-; OPTM1: add esp, 32
+; OPTM1-NEXT: add esp, 32
+; OPTM1: ret
+}
+
+declare void @InterspersedVectorArgs(<4 x float>, i64, <4 x float>, i64, <4 x float>, float, <4 x float>, double, <4 x float>, i32, <4 x float>)
+
+define void @test_passing_vectors_interspersed(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {
+entry:
+  ; Kills XMM registers so that no in-arg lowering code interferes
+  ; with the test.
+  call void @killXmmRegisters()
+  call void @InterspersedVectorArgs(<4 x float> %arg9, i64 0, <4 x float> %arg8, i64 1, <4 x float> %arg7, float 2.000000e+00, <4 x float> %arg6, double 3.000000e+00, <4 x float> %arg5, i32 4, <4 x float> %arg4)
+  ret void
+; CHECK-LABEL: test_passing_vectors_interspersed:
+; CHECK: sub esp, 80
+; CHECK: movups  [[ARG9:.*]], xmmword ptr [esp+112]
+; CHECK: movups  xmmword ptr [esp+32], [[ARG9]]
+; CHECK: movups  [[ARG11:.*]], xmmword ptr [esp+96]
+; CHECK: movups  xmmword ptr [esp+64], [[ARG11]]
+; CHECK: movups  xmm0, xmmword ptr [esp+176]
+; CHECK: movups  xmm1, xmmword ptr [esp+160]
+; CHECK: movups  xmm2, xmmword ptr [esp+144]
+; CHECK: movups  xmm3, xmmword ptr [esp+128]
+; CHECK: call InterspersedVectorArgs
+; CHECK-NEXT: add esp, 80
+; CHECK: ret
+
+; OPTM1-LABEL: test_passing_vectors_interspersed:
+; OPTM1: sub esp, 80
+; OPTM1: movups  [[ARG9:.*]], xmmword ptr {{.*}}
+; OPTM1: movups  xmmword ptr [esp+32], [[ARG9]]
+; OPTM1: movups  [[ARG11:.*]], xmmword ptr {{.*}}
+; OPTM1: movups  xmmword ptr [esp+64], [[ARG11]]
+; OPTM1: movups  xmm0, xmmword ptr {{.*}}
+; OPTM1: movups  xmm1, xmmword ptr {{.*}}
+; OPTM1: movups  xmm2, xmmword ptr {{.*}}
+; OPTM1: movups  xmm3, xmmword ptr {{.*}}
+; OPTM1: call InterspersedVectorArgs
+; OPTM1-NEXT: add esp, 80
  ; OPTM1: ret
  }
  
@@ -220,8 +245,8 @@ entry:
  
  ; OPTM1-LABEL: test_receiving_vectors:
  ; OPTM1: call VectorReturn
-; OPTM1: movups [[LOC:.*]], xmm0
-; OPTM1: movups xmm0, [[LOC]]
+; OPTM1: movups {{.*}}, xmm0
+; OPTM1: movups xmm0, {{.*}}
  ; OPTM1: call VectorReturn
  ; OPTM1: ret
  }
author	Matt Wala <wala@chromium.org>
	Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)
committer	Matt Wala <wala@chromium.org>
	Tue, 12 Aug 2014 02:56:19 +0000 (19:56 -0700)
crosstest/crosstest.py		patch \| blob \| history
crosstest/runtests.sh		patch \| blob \| history
crosstest/test_calling_conv.cpp	[new file with mode: 0644]	patch \| blob
crosstest/test_calling_conv.def	[new file with mode: 0644]	patch \| blob
crosstest/test_calling_conv.h	[new file with mode: 0644]	patch \| blob
crosstest/test_calling_conv_main.cpp	[new file with mode: 0644]	patch \| blob
src/IceInstX8632.cpp		patch \| blob \| history
src/IceInstX8632.h		patch \| blob \| history
src/IceTargetLoweringX8632.cpp		patch \| blob \| history
src/IceTargetLoweringX8632.h		patch \| blob \| history
tests_lit/llvm2ice_tests/64bit.pnacl.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/alloc.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/ebp_args.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/fp.pnacl.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/undef.ll		patch \| blob \| history
tests_lit/llvm2ice_tests/vector-arg.ll		patch \| blob \| history