objs.append(bitcode)
linker = 'clang' if os.path.splitext(args.driver)[1] == '.c' else 'clang++'
- # TODO: Remove -mstackrealign after Subzero supports stack alignment.
- shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32',
- '-mstackrealign', args.driver] + objs +
+ shellcmd([os.path.join(llvm_bin_path, linker), '-g', '-m32', args.driver] +
+ objs +
['-lm', '-lpthread', '-o', os.path.join(args.dir, args.output)])
--output=test_bitmanip_O${optlevel}_${attribute}
./crosstest.py -O${optlevel} --mattr ${attribute} \
+ --prefix=Subzero_ --target=x8632 \
+ --dir="${OUTDIR}" \
+ --llvm-bin-path="${LLVM_BIN_PATH}" \
+ --test=test_calling_conv.cpp \
+ --driver=test_calling_conv_main.cpp \
+ --output=test_calling_conv_O${optlevel}_${attribute}
+
+ ./crosstest.py -O${optlevel} --mattr ${attribute} \
--prefix=Subzero_ \
--target=x8632 \
--dir="${OUTDIR}" \
"${OUTDIR}"/mem_intrin_O${optlevel}_${attribute}
"${OUTDIR}"/test_arith_O${optlevel}_${attribute}
"${OUTDIR}"/test_bitmanip_O${optlevel}_${attribute}
+ "${OUTDIR}"/test_calling_conv_O${optlevel}_${attribute}
"${OUTDIR}"/test_cast_O${optlevel}_${attribute}
"${OUTDIR}"/test_fcmp_O${optlevel}_${attribute}
"${OUTDIR}"/test_global_O${optlevel}_${attribute}
--- /dev/null
+//===- subzero/crosstest/test_calling_conv.cpp - Implementation for tests -===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the test functions used to check that Subzero
+// generates code compatible with the calling convention used by
+// llc. "Caller" functions test the handling of out-args, and "callee"
+// functions test the handling of in-args.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstring>
+
+#include "test_calling_conv.h"
+
+#define CALL_AS_TYPE(Ty, Func) (reinterpret_cast<Ty *>(Func))
+
+void caller_i(void) {
+ int arg1 = 0x12345678;
+ CALL_AS_TYPE(callee_i_Ty, Callee)(arg1);
+}
+
+void caller_vvvvv(void) {
+ v4si32 arg1 = {0, 1, 2, 3};
+ v4si32 arg2 = {4, 5, 6, 7};
+ v4si32 arg3 = {8, 9, 10, 11};
+ v4si32 arg4 = {12, 13, 14, 15};
+ v4si32 arg5 = {16, 17, 18, 19};
+
+ CALL_AS_TYPE(callee_vvvvv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5);
+}
+
+void caller_vlvlivfvdviv(void) {
+ v4f32 arg1 = {0, 1, 2, 3};
+ int64_t arg2 = 4;
+ v4f32 arg3 = {6, 7, 8, 9};
+ int64_t arg4 = 10;
+ int arg5 = 11;
+ v4f32 arg6 = {12, 13, 14, 15};
+ float arg7 = 16;
+ v4f32 arg8 = {17, 18, 19, 20};
+ double arg9 = 21;
+ v4f32 arg10 = {22, 23, 24, 25};
+ int arg11 = 26;
+ v4f32 arg12 = {27, 28, 29, 30};
+
+ CALL_AS_TYPE(callee_vlvlivfvdviv_Ty, Callee)(arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9, arg10,
+ arg11, arg12);
+}
+
+#define HANDLE_ARG(ARGNUM) \
+ case ARGNUM: \
+ memcpy(&Buf[0], &arg##ARGNUM, sizeof(arg##ARGNUM)); \
+ break;
+
+void __attribute__((noinline)) callee_i(int arg1) {
+ switch (ArgNum) { HANDLE_ARG(1); }
+}
+
+void __attribute__((noinline))
+callee_vvvvv(v4si32 arg1, v4si32 arg2, v4si32 arg3, v4si32 arg4, v4si32 arg5) {
+ switch (ArgNum) {
+ HANDLE_ARG(1);
+ HANDLE_ARG(2);
+ HANDLE_ARG(3);
+ HANDLE_ARG(4);
+ HANDLE_ARG(5);
+ }
+}
+
+void __attribute__((noinline))
+callee_vlvlivfvdviv(v4f32 arg1, int64_t arg2, v4f32 arg3, int64_t arg4, int arg5,
+ v4f32 arg6, float arg7, v4f32 arg8, double arg9, v4f32 arg10,
+ int arg11, v4f32 arg12) {
+ switch (ArgNum) {
+ HANDLE_ARG(1);
+ HANDLE_ARG(2);
+ HANDLE_ARG(3);
+ HANDLE_ARG(4);
+ HANDLE_ARG(5);
+ HANDLE_ARG(6);
+ HANDLE_ARG(7);
+ HANDLE_ARG(8);
+ HANDLE_ARG(9);
+ HANDLE_ARG(10);
+ HANDLE_ARG(11);
+ HANDLE_ARG(12);
+ }
+}
--- /dev/null
+//===- subzero/crosstest/test_calling_conv.def - testing macros -*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macros for testing the calling convention.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_CALLING_CONV_DEF
+#define TEST_CALLING_CONV_DEF
+
+#define STR(x) (#x)
+
+#define TEST_FUNC_TABLE \
+/* caller, callee, argc */ \
+X(caller_i, callee_i, 1) \
+X(caller_vvvvv, callee_vvvvv, 5) \
+X(caller_vlvlivfvdviv, callee_vlvlivfvdviv, 12) \
+// #define X(caller, callee, argc)
+
+#endif // TEST_CALLING_CONV_DEF
--- /dev/null
+//===- subzero/crosstest/test_calling_conv.h - Test prototypes --*- C++ -*-===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function prototypes for crosstesting the calling
+// convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "test_calling_conv.def"
+#include "vectors.h"
+
+typedef void (*CalleePtrTy)();
+extern CalleePtrTy Callee;
+extern size_t ArgNum;
+extern char *Buf;
+
+void caller_i();
+void caller_alloca_i();
+typedef void callee_i_Ty(int);
+callee_i_Ty callee_i;
+callee_i_Ty callee_alloca_i;
+
+void caller_vvvvv();
+typedef void (callee_vvvvv_Ty)(v4si32, v4si32, v4si32, v4si32, v4si32);
+callee_vvvvv_Ty callee_vvvvv;
+
+void caller_vlvlivfvdviv();
+typedef void(callee_vlvlivfvdviv_Ty)(v4f32, int64_t, v4f32, int64_t, int, v4f32,
+ float, v4f32, double, v4f32, int, v4f32);
+callee_vlvlivfvdviv_Ty callee_vlvlivfvdviv;
--- /dev/null
+//===- subzero/crosstest/test_calling_conv_main.cpp - Driver for tests ----===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the driver for cross testing the compatibility of
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+/* crosstest.py --test=test_calling_conv.cpp \
+ --driver=test_calling_conv_main.cpp --prefix=Subzero_ \
+ --output=test_calling_conv */
+
+#include <cstring>
+#include <iostream>
+#include <sstream>
+
+#include "test_calling_conv.h"
+
+namespace Subzero_ {
+#include "test_calling_conv.h"
+}
+
+// The crosstest code consists of caller / callee function pairs.
+//
+// The caller function initializes a list of arguments and calls the
+// function located at Callee.
+//
+// The callee function writes the argument numbered ArgNum into the
+// location pointed to by Buf.
+//
+// testCaller() tests that caller functions, as compiled by Subzero and
+// llc, pass arguments to the callee in the same way. The Caller() and
+// Subzero_Caller() functions both call the same callee (which has been
+// compiled by llc). The result in the global buffer is compared to
+// check that it is the same value after the calls by both callers.
+//
+// testCallee() runs the same kind of test, except that the functions
+// Callee() and Subzero_Callee() are being tested to ensure that both
+// functions receive arguments from the caller in the same way. The
+// caller is compiled by llc.
+
+size_t ArgNum, Subzero_ArgNum;
+CalleePtrTy Callee, Subzero_Callee;
+char *Buf, *Subzero_Buf;
+
+const static size_t BUF_SIZE = 16;
+
+std::string bufAsString(const char Buf[BUF_SIZE]) {
+ std::ostringstream OS;
+ for (size_t i = 0; i < BUF_SIZE; ++i) {
+ if (i > 0)
+ OS << " ";
+ OS << (unsigned) Buf[i];
+ }
+ return OS.str();
+}
+
+void testCaller(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+ static struct {
+ const char *CallerName, *CalleeName;
+ size_t Args;
+ void (*Caller)(void);
+ void (*Subzero_Caller)(void);
+ CalleePtrTy Callee;
+ } Funcs[] = {
+#define X(caller, callee, argc) \
+ { \
+ STR(caller), STR(callee), argc, &caller, &Subzero_::caller, \
+ reinterpret_cast<CalleePtrTy>(&callee), \
+ } \
+ ,
+ TEST_FUNC_TABLE
+#undef X
+ };
+
+ const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+ for (size_t f = 0; f < NumFuncs; ++f) {
+ char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
+ Callee = Subzero_Callee = Funcs[f].Callee;
+
+ for (size_t i = 0; i < Funcs[f].Args; ++i) {
+ memset(BufLlc, 0xff, sizeof(BufLlc));
+ memset(BufSz, 0xff, sizeof(BufSz));
+
+ ArgNum = Subzero_ArgNum = i;
+
+ Buf = BufLlc;
+ Funcs[f].Caller();
+
+ Buf = BufSz;
+ Funcs[f].Subzero_Caller();
+
+ ++TotalTests;
+ if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "testCaller(Caller=" << Funcs[f].CallerName
+ << ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
+ << ")\nsz =" << bufAsString(BufSz)
+ << "\nllc=" << bufAsString(BufLlc) << "\n";
+ }
+ }
+ }
+}
+
+void testCallee(size_t &TotalTests, size_t &Passes, size_t &Failures) {
+ static struct {
+ const char *CallerName, *CalleeName;
+ size_t Args;
+ void (*Caller)(void);
+ CalleePtrTy Callee, Subzero_Callee;
+ } Funcs[] = {
+#define X(caller, callee, argc) \
+ { \
+ STR(caller), STR(callee), argc, &caller, \
+ reinterpret_cast<CalleePtrTy>(&callee), \
+ reinterpret_cast<CalleePtrTy>(&Subzero_::callee) \
+ } \
+ ,
+ TEST_FUNC_TABLE
+#undef X
+ };
+
+ const static size_t NumFuncs = sizeof(Funcs) / sizeof(*Funcs);
+
+ for (size_t f = 0; f < NumFuncs; ++f) {
+ char BufLlc[BUF_SIZE], BufSz[BUF_SIZE];
+ Buf = BufLlc;
+ Subzero_Buf = BufSz;
+
+ for (size_t i = 0; i < Funcs[f].Args; ++i) {
+ memset(BufLlc, 0xff, sizeof(BufLlc));
+ memset(BufSz, 0xff, sizeof(BufSz));
+
+ ArgNum = Subzero_ArgNum = i;
+
+ Callee = Funcs[f].Callee;
+ Funcs[f].Caller();
+
+ Callee = Funcs[f].Subzero_Callee;
+ Funcs[f].Caller();
+
+ ++TotalTests;
+ if (!memcmp(BufLlc, BufSz, sizeof(BufLlc))) {
+ ++Passes;
+ } else {
+ ++Failures;
+ std::cout << "testCallee(Caller=" << Funcs[f].CallerName
+ << ", Callee=" << Funcs[f].CalleeName << ", ArgNum=" << ArgNum
+ << ")\nsz =" << bufAsString(BufSz)
+ << "\nllc=" << bufAsString(BufLlc) << "\n";
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[]) {
+ size_t TotalTests = 0;
+ size_t Passes = 0;
+ size_t Failures = 0;
+
+ testCaller(TotalTests, Passes, Failures);
+ testCallee(TotalTests, Passes, Failures);
+
+ std::cout << "TotalTests=" << TotalTests << " Passes=" << Passes
+ << " Failures=" << Failures << "\n";
+
+ return Failures;
+}
}
}
+InstX8632AdjustStack::InstX8632AdjustStack(Cfg *Func, SizeT Amount)
+ : InstX8632(Func, InstX8632::Adjuststack, 0, NULL), Amount(Amount) {}
+
InstX8632Mul::InstX8632Mul(Cfg *Func, Variable *Dest, Variable *Source1,
Operand *Source2)
: InstX8632(Func, InstX8632::Mul, 2, Dest) {
addSource(Source);
}
+InstX8632StoreP::InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem)
+ : InstX8632(Func, InstX8632::StoreP, 2, NULL) {
+ addSource(Value);
+ addSource(Mem);
+}
+
InstX8632StoreQ::InstX8632StoreQ(Cfg *Func, Operand *Value, OperandX8632 *Mem)
: InstX8632(Func, InstX8632::StoreQ, 2, NULL) {
addSource(Value);
getSrc(0)->dump(Func);
}
+void InstX8632StoreP::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ assert(getSrcSize() == 2);
+ Str << "\tmovups\t";
+ getSrc(1)->emit(Func);
+ Str << ", ";
+ getSrc(0)->emit(Func);
+ Str << "\n";
+}
+
+void InstX8632StoreP::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "storep." << getSrc(0)->getType() << " ";
+ getSrc(1)->dump(Func);
+ Str << ", ";
+ getSrc(0)->dump(Func);
+}
+
void InstX8632StoreQ::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << " = pop." << getDest()->getType() << " ";
}
+void InstX8632AdjustStack::emit(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrEmit();
+ Str << "\tsub\tesp, " << Amount << "\n";
+ Func->getTarget()->updateStackAdjustment(Amount);
+}
+
+void InstX8632AdjustStack::dump(const Cfg *Func) const {
+ Ostream &Str = Func->getContext()->getStrDump();
+ Str << "esp = sub.i32 esp, " << Amount;
+}
+
void InstX8632Push::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Add,
Addps,
Addss,
+ Adjuststack,
And,
Blendvps,
Br,
Shufps,
Sqrtss,
Store,
+ StoreP,
StoreQ,
Sub,
Subps,
InstX8632Label *Label; // Intra-block branch target
};
+// AdjustStack instruction - subtracts esp by the given amount and
+// updates the stack offset during code emission.
+class InstX8632AdjustStack : public InstX8632 {
+public:
+ static InstX8632AdjustStack *create(Cfg *Func, SizeT Amount) {
+ return new (Func->allocate<InstX8632AdjustStack>())
+ InstX8632AdjustStack(Func, Amount);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
+
+private:
+ InstX8632AdjustStack(Cfg *Func, SizeT Amount);
+ InstX8632AdjustStack(const InstX8632AdjustStack &) LLVM_DELETED_FUNCTION;
+ InstX8632AdjustStack &operator=(const InstX8632AdjustStack &)
+ LLVM_DELETED_FUNCTION;
+ SizeT Amount;
+};
+
// Call instruction. Arguments should have already been pushed.
class InstX8632Call : public InstX8632 {
public:
virtual ~InstX8632Movp() {}
};
+class InstX8632StoreP : public InstX8632 {
+public:
+ static InstX8632StoreP *create(Cfg *Func, Operand *Value, OperandX8632 *Mem) {
+ return new (Func->allocate<InstX8632StoreP>())
+ InstX8632StoreP(Func, Value, Mem);
+ }
+ virtual void emit(const Cfg *Func) const;
+ virtual void dump(const Cfg *Func) const;
+ static bool classof(const Inst *Inst) { return isClassof(Inst, StoreP); }
+
+private:
+ InstX8632StoreP(Cfg *Func, Operand *Value, OperandX8632 *Mem);
+ InstX8632StoreP(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
+ InstX8632StoreP &operator=(const InstX8632StoreP &) LLVM_DELETED_FUNCTION;
+ virtual ~InstX8632StoreP() {}
+};
+
// This is essentially a "movq" instruction with an OperandX8632Mem
// operand instead of Variable as the destination. It's important
// for liveness that there is no Dest operand.
}
// The maximum number of arguments to pass in XMM registers
-const unsigned X86_MAX_XMM_ARGS = 4;
+const uint32_t X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte
-const unsigned X86_CHAR_BIT = 8;
+const uint32_t X86_CHAR_BIT = 8;
+// Stack alignment
+const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
+// Size of the return address on the stack
+const uint32_t X86_RET_IP_SIZE_BYTES = 4;
+
+// Value is a size in bytes. Return Value adjusted to the next highest
+// multiple of the stack alignment.
+uint32_t applyStackAlignment(uint32_t Value) {
+ // power of 2
+ assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+ return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
+}
// Instruction set options
namespace cl = ::llvm::cl;
TargetX8632::TargetX8632(Cfg *Func)
: TargetLowering(Func), InstructionSet(CLInstructionSet),
- IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0),
- NextLabelNumber(0), ComputedLiveRanges(false),
+ IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
+ LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
PhysicalRegisters(VarList(Reg_NUM)) {
// TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the
finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
return;
}
+ if (isVectorType(Ty)) {
+ InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
+ }
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) {
// or B.
const bool SimpleCoalescing = true;
size_t InArgsSizeBytes = 0;
- size_t RetIpSizeBytes = 4;
size_t PreservedRegsSizeBytes = 0;
LocalsSizeBytes = 0;
Context.init(Node);
_mov(ebp, esp);
}
+ if (NeedsStackAlignment) {
+ uint32_t StackSize = applyStackAlignment(
+ X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);
+ LocalsSizeBytes =
+ StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
+ }
+
// Generate "sub esp, LocalsSizeBytes"
if (LocalsSizeBytes)
_sub(getPhysicalRegister(Reg_esp),
// for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
- size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
+ size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += LocalsSizeBytes;
void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
IsEbpBasedFrame = true;
- // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
- // the number of adjustments of esp, etc.
+ // Conservatively require the stack to be aligned. Some stack
+ // adjustment operations implemented below assume that the stack is
+ // aligned before the alloca. All the alloca code ensures that the
+ // stack alignment is preserved after the alloca. The stack alignment
+ // restriction can be relaxed in some cases.
+ NeedsStackAlignment = true;
+
+ // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
Variable *esp = getPhysicalRegister(Reg_esp);
Operand *TotalSize = legalize(Inst->getSizeInBytes());
Variable *Dest = Inst->getDest();
- _sub(esp, TotalSize);
+ uint32_t AlignmentParam = Inst->getAlignInBytes();
+
+ // LLVM enforces power of 2 alignment.
+ assert((AlignmentParam & (AlignmentParam - 1)) == 0);
+ assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+
+ uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
+ if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
+ _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
+ }
+ if (ConstantInteger *ConstantTotalSize =
+ llvm::dyn_cast<ConstantInteger>(TotalSize)) {
+ uint32_t Value = ConstantTotalSize->getValue();
+ // Round Value up to the next highest multiple of the alignment.
+ Value = (Value + Alignment - 1) & -Alignment;
+ _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
+ } else {
+ // Non-constant sizes need to be adjusted to the next highest
+ // multiple of the required alignment at runtime.
+ Variable *T = makeReg(IceType_i32);
+ _mov(T, TotalSize);
+ _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
+ _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
+ _sub(esp, T);
+ }
_mov(Dest, esp);
}
}
void TargetX8632::lowerCall(const InstCall *Instr) {
+ // x86-32 calling convention:
+ //
+ // * At the point before the call, the stack must be aligned to 16
+ // bytes.
+ //
+ // * The first four arguments of vector type, regardless of their
+ // position relative to the other arguments in the argument list, are
+ // placed in registers xmm0 - xmm3.
+ //
+ // * Other arguments are pushed onto the stack in right-to-left order,
+ // such that the left-most argument ends up on the top of the stack at
+ // the lowest memory address.
+ //
+ // * Stack arguments of vector type are aligned to start at the next
+ // highest multiple of 16 bytes. Other stack arguments are aligned to
+ // 4 bytes.
+ //
+ // This intends to match the section "IA-32 Function Calling
+ // Convention" of the document "OS X ABI Function Call Guide" by
+ // Apple.
+ NeedsStackAlignment = true;
+
+ OperandList XmmArgs;
+ OperandList StackArgs, StackArgLocations;
+ uint32_t ParameterAreaSizeBytes = 0;
+
// Classify each argument operand according to the location where the
// argument is passed.
- OperandList XmmArgs;
- OperandList StackArgs;
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
- if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
+ Type Ty = Arg->getType();
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+ assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
+ Ty == IceType_f64 || isVectorType(Ty));
+ if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
+ if (isVectorType(Arg->getType())) {
+ ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+ }
+ Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
+ Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);
+ StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
- // For stack arguments, generate a sequence of push instructions,
- // pushing right to left, keeping track of stack offsets in case a
- // push involves a stack operand and we are using an esp-based frame.
- uint32_t StackOffset = 0;
- // TODO: Consolidate the stack adjustment for function calls by
- // reserving enough space for the arguments only once.
+
+ // Adjust the parameter area so that the stack is aligned. It is
+ // assumed that the stack is already aligned at the start of the
+ // calling sequence.
+ ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+
+ // Subtract the appropriate amount for the argument area. This also
+ // takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
- // pre-call push instructions and the post-call esp adjustment get
- // eliminated as well.
- for (OperandList::reverse_iterator I = StackArgs.rbegin(),
- E = StackArgs.rend(); I != E; ++I) {
- Operand *Arg = legalize(*I);
- if (Arg->getType() == IceType_i64) {
- _push(hiOperand(Arg));
- _push(loOperand(Arg));
- } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
- // If the Arg turns out to be a memory operand, more than one push
- // instruction is required. This ends up being somewhat clumsy in
- // the current IR, so we use a workaround. Force the operand into
- // a (xmm) register, and then push the register. An xmm register
- // push is actually not possible in x86, but the Push instruction
- // emitter handles this by decrementing the stack pointer and
- // directly writing the xmm register value.
- _push(legalize(Arg, Legal_Reg));
- } else {
- // Otherwise PNaCl requires parameter types to be at least 32-bits.
- assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
- _push(Arg);
- }
- StackOffset += typeWidthInBytesOnStack(Arg->getType());
+ // pre-call and the post-call esp adjustment get eliminated as well.
+ if (ParameterAreaSizeBytes) {
+ _adjust_stack(ParameterAreaSizeBytes);
}
+
+ // Copy arguments that are passed on the stack to the appropriate
+ // stack locations.
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ // TODO: Consider calling postLower() here to reduce the register
+ // pressure associated with using too many infinite weight
+ // temporaries when lowering the call sequence in -Om1 mode.
+ }
+
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to esp.
- if (StackOffset) {
+ // Add the appropriate offset to esp. The call instruction takes care
+ // of resetting the stack offset during emission.
+ if (ParameterAreaSizeBytes) {
Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
- _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
+ _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use pshufd and movd/movss.
//
- // ALIGNHACK: Force vector operands to registers in instructions that
- // require aligned memory operands until support for stack alignment
- // is implemented.
+ // ALIGNHACK: Force vector operands to registers in instructions
+ // that require aligned memory operands until support for data
+ // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- // ALIGNHACK: Without support for stack alignment, both operands to
- // cmpps need to be forced into registers. Once support for stack
+ // ALIGNHACK: Without support for data alignment, both operands to
+ // cmpps need to be forced into registers. Once support for data
// alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) {
}
// TODO: ALIGNHACK: Both operands to compare instructions need to be
- // in registers until stack alignment support is implemented. Once
- // there is support for stack alignment, LEGAL_HACK can be removed.
+ // in registers until data alignment support is implemented. Once
+ // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
Variable *T = makeReg(Ty);
switch (Condition) {
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
- // ALIGNHACK: Force vector operands to registers in instructions that
- // require aligned memory operands until support for stack alignment
- // is implemented.
+ // ALIGNHACK: Force vector operands to registers in instructions
+ // that require aligned memory operands until support for data
+ // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
if (Index == 1) {
SourceVectRM = ALIGN_HACK(SourceVectRM);
}
case Intrinsics::Memset: {
// The value operand needs to be extended to a stack slot size
- // because "push" only works for a specific operand size.
+ // because the PNaCl ABI requires arguments to be at least 32 bits
+ // wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
- // ALIGNHACK: Until stack alignment support is implemented, vector
+ // ALIGNHACK: Until data alignment support is implemented, vector
// instructions need to have vector operands in registers. Once
- // there is support for stack alignment, LEGAL_HACK can be removed.
+ // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
if (InstructionSet >= SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps
Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr();
OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
+ Type Ty = NewAddr->getType();
- if (NewAddr->getType() == IceType_i64) {
+ if (Ty == IceType_i64) {
Value = legalize(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
+ } else if (isVectorType(Ty)) {
+ _storep(legalizeToVar(Value), NewAddr);
} else {
Value = legalize(Value, Legal_Reg | Legal_Imm, true);
_store(Value, NewAddr);
llvm::SmallBitVector AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType());
if (!AvailableTypedRegisters.any()) {
- // This is a hack in case we run out of physical registers
- // due to an excessive number of "push" instructions from
- // lowering a call.
+ // This is a hack in case we run out of physical registers due
+ // to an excessively long code sequence, as might happen when
+ // lowering arguments in lowerCall().
AvailableRegisters = WhiteList;
AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType());
void _add(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Add::create(Func, Dest, Src0));
}
+ void _adjust_stack(int32_t Amount) {
+ Context.insert(InstX8632AdjustStack::create(Func, Amount));
+ }
void _addps(Variable *Dest, Operand *Src0) {
Context.insert(InstX8632Addps::create(Func, Dest, Src0));
}
void _store(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632Store::create(Func, Value, Mem));
}
+ void _storep(Operand *Value, OperandX8632 *Mem) {
+ Context.insert(InstX8632StoreP::create(Func, Value, Mem));
+ }
void _storeq(Operand *Value, OperandX8632 *Mem) {
Context.insert(InstX8632StoreQ::create(Func, Value, Mem));
}
const X86InstructionSet InstructionSet;
bool IsEbpBasedFrame;
+ bool NeedsStackAlignment;
size_t FrameSizeLocals;
size_t LocalsSizeBytes;
llvm::SmallBitVector TypeToRegisterSet[IceType_NUM];
ret i32 %add3
}
; CHECK: pass64BitArg:
-; CHECK: push 123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call ignore64BitArgNoInline
-; CHECK: push
-; CHECK-NEXT: push
-; CHECK-NEXT: push 123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call ignore64BitArgNoInline
-; CHECK: push
-; CHECK-NEXT: push
-; CHECK-NEXT: push 123
-; CHECK-NEXT: push
-; CHECK-NEXT: push
-; CHECK-NEXT: call ignore64BitArgNoInline
+; CHECK: sub esp
+; CHECK: mov dword ptr [esp+4]
+; CHECK: mov dword ptr [esp]
+; CHECK: mov dword ptr [esp+8], 123
+; CHECK: mov dword ptr [esp+16]
+; CHECK: mov dword ptr [esp+12]
+; CHECK: call ignore64BitArgNoInline
+; CHECK sub esp
+; CHECK: mov dword ptr [esp+4]
+; CHECK: mov dword ptr [esp]
+; CHECK: mov dword ptr [esp+8], 123
+; CHECK: mov dword ptr [esp+16]
+; CHECK: mov dword ptr [esp+12]
+; CHECK: call ignore64BitArgNoInline
+; CHECK: sub esp
+; CHECK: mov dword ptr [esp+4]
+; CHECK: mov dword ptr [esp]
+; CHECK: mov dword ptr [esp+8], 123
+; CHECK: mov dword ptr [esp+16]
+; CHECK: mov dword ptr [esp+12]
+; CHECK: call ignore64BitArgNoInline
;
; OPTM1: pass64BitArg:
-; OPTM1: push 123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call ignore64BitArgNoInline
-; OPTM1: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push 123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call ignore64BitArgNoInline
-; OPTM1: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push 123
-; OPTM1-NEXT: push
-; OPTM1-NEXT: push
-; OPTM1-NEXT: call ignore64BitArgNoInline
+; OPTM1: sub esp
+; OPTM1: mov dword ptr [esp+4]
+; OPTM1: mov dword ptr [esp]
+; OPTM1: mov dword ptr [esp+8], 123
+; OPTM1: mov dword ptr [esp+16]
+; OPTM1: mov dword ptr [esp+12]
+; OPTM1: call ignore64BitArgNoInline
+; OPTM1 sub esp
+; OPTM1: mov dword ptr [esp+4]
+; OPTM1: mov dword ptr [esp]
+; OPTM1: mov dword ptr [esp+8], 123
+; OPTM1: mov dword ptr [esp+16]
+; OPTM1: mov dword ptr [esp+12]
+; OPTM1: call ignore64BitArgNoInline
+; OPTM1: sub esp
+; OPTM1: mov dword ptr [esp+4]
+; OPTM1: mov dword ptr [esp]
+; OPTM1: mov dword ptr [esp+8], 123
+; OPTM1: mov dword ptr [esp+16]
+; OPTM1: mov dword ptr [esp+12]
+; OPTM1: call ignore64BitArgNoInline
declare i32 @ignore64BitArgNoInline(i64, i32, i64)
ret i32 %call
}
; CHECK: pass64BitConstArg:
-; CHECK: push 3735928559
-; CHECK-NEXT: push 305419896
-; CHECK-NEXT: push 123
-; CHECK-NEXT: push ecx
-; CHECK-NEXT: push eax
+; CHECK: sub esp
+; CHECK: mov dword ptr [esp+4]
+; CHECK-NEXT: mov dword ptr [esp]
+; CHECK-NEXT: mov dword ptr [esp+8], 123
+; CHECK-NEXT: mov dword ptr [esp+16], 3735928559
+; CHECK-NEXT: mov dword ptr [esp+12], 305419896
; CHECK-NEXT: call ignore64BitArgNoInline
;
; OPTM1: pass64BitConstArg:
-; OPTM1: push 3735928559
-; OPTM1-NEXT: push 305419896
-; OPTM1-NEXT: push 123
-; OPTM1-NEXT: push dword ptr [
-; OPTM1-NEXT: push dword ptr [
+; OPTM1: sub esp
+; OPTM1: mov dword ptr [esp+4]
+; OPTM1-NEXT: mov dword ptr [esp]
+; OPTM1-NEXT: mov dword ptr [esp+8], 123
+; OPTM1-NEXT: mov dword ptr [esp+16], 3735928559
+; OPTM1-NEXT: mov dword ptr [esp+12], 305419896
; OPTM1-NEXT: call ignore64BitArgNoInline
define internal i64 @return64BitArg(i64 %a) {
ret i64 %div
}
; CHECK-LABEL: div64BitSignedConst:
-; CHECK: push 2874
-; CHECK: push 1942892530
+; CHECK: mov dword ptr [esp+12], 2874
+; CHECK: mov dword ptr [esp+8], 1942892530
; CHECK: call __divdi3
; CHECK: ret
;
; OPTM1-LABEL: div64BitSignedConst:
-; OPTM1: push 2874
-; OPTM1: push 1942892530
+; OPTM1: mov dword ptr [esp+12], 2874
+; OPTM1: mov dword ptr [esp+8], 1942892530
; OPTM1: call __divdi3
; OPTM1: ret
-; This is a basic test of the alloca instruction - one test for alloca
-; of a fixed size, and one test for variable size.
+; This is a basic test of the alloca instruction.
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
-; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
+; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -O2 --verbose none %s \
; RUN: | llvm-mc -arch=x86 -x86-asm-syntax=intel -filetype=obj
; RUN: %llvm2ice -Om1 --verbose none %s \
; RUN: %llvm2iceinsts --pnacl %s | %szdiff %s \
; RUN: | FileCheck --check-prefix=DUMP %s
-define void @fixed_400(i32 %n) {
+define void @fixed_416_align_16(i32 %n) {
entry:
- %array = alloca i8, i32 400, align 16
+ %array = alloca i8, i32 416, align 16
%__2 = ptrtoint i8* %array to i32
call void @f1(i32 %__2)
ret void
}
-; CHECK: fixed_400:
-; CHECK: sub esp, 400
-; CHECK-NEXT: mov eax, esp
-; CHECK-NEXT: push eax
-; CHECK-NEXT: call f1
-;
-; OPTM1: fixed_400:
-; OPTM1: sub esp, 400
-; OPTM1-NEXT: mov {{.*}}, esp
-; OPTM1: push
-; OPTM1-NEXT: call f1
+; CHECK-LABEL: fixed_416_align_16:
+; CHECK: sub esp, 416
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f1
+
+define void @fixed_416_align_32(i32 %n) {
+entry:
+ %array = alloca i8, i32 400, align 32
+ %__2 = ptrtoint i8* %array to i32
+ call void @f1(i32 %__2)
+ ret void
+}
+; CHECK-LABEL: fixed_416_align_32:
+; CHECK: and esp, 4294967264
+; CHECK: sub esp, 416
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f1
+
+define void @fixed_351_align_16(i32 %n) {
+entry:
+ %array = alloca i8, i32 351, align 16
+ %__2 = ptrtoint i8* %array to i32
+ call void @f1(i32 %__2)
+ ret void
+}
+; CHECK-LABEL: fixed_351_align_16:
+; CHECK: sub esp, 352
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f1
+
+define void @fixed_351_align_32(i32 %n) {
+entry:
+ %array = alloca i8, i32 351, align 32
+ %__2 = ptrtoint i8* %array to i32
+ call void @f1(i32 %__2)
+ ret void
+}
+; CHECK-LABEL: fixed_351_align_32:
+; CHECK: and esp, 4294967264
+; CHECK: sub esp, 352
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f1
declare void @f1(i32)
-define void @variable_n(i32 %n) {
+define void @variable_n_align_16(i32 %n) {
entry:
%array = alloca i8, i32 %n, align 16
%__2 = ptrtoint i8* %array to i32
call void @f2(i32 %__2)
ret void
}
-; CHECK: variable_n:
+; CHECK-LABEL: variable_n_align_16:
; CHECK: mov eax, dword ptr [ebp+8]
-; CHECK-NEXT: sub esp, eax
-; CHECK-NEXT: mov eax, esp
-; CHECK-NEXT: push eax
-; CHECK-NEXT: call f2
-;
-; OPTM1: variable_n:
-; OPTM1: mov {{.*}}, esp
-; OPTM1: push
-; OPTM1-NEXT: call f2
+; CHECK: add eax, 15
+; CHECK: and eax, 4294967280
+; CHECK: sub esp, eax
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f2
+
+define void @variable_n_align_32(i32 %n) {
+entry:
+ %array = alloca i8, i32 %n, align 32
+ %__2 = ptrtoint i8* %array to i32
+ call void @f2(i32 %__2)
+ ret void
+}
+; In -O2, the order of the CHECK-DAG lines in the output is switched.
+; CHECK-LABEL: variable_n_align_32:
+; CHECK-DAG: and esp, 4294967264
+; CHECK-DAG: mov eax, dword ptr [ebp+8]
+; CHECK: add eax, 31
+; CHECK: and eax, 4294967264
+; CHECK: sub esp, eax
+; CHECK: sub esp, 16
+; CHECK: mov dword ptr [esp], eax
+; CHECK: call f2
declare void @f2(i32)
; lowering code changes.
; CHECK: memcpy_helper:
-; CHECK: push ebp
-; CHECK: mov ebp, esp
-; CHECK: sub esp, 20
-; CHECK: mov eax, dword ptr [ebp+12]
-; CHECK: mov dword ptr [ebp-4], eax
-; CHECK: sub esp, 128
-; CHECK: mov dword ptr [ebp-8], esp
-; CHECK: mov eax, dword ptr [ebp-8]
-; CHECK: mov dword ptr [ebp-12], eax
-; CHECK: movzx eax, byte ptr [ebp-4]
-; CHECK: mov dword ptr [ebp-16], eax
-; CHECK: push dword ptr [ebp-16]
-; CHECK: push dword ptr [ebp-12]
-; CHECK: push dword ptr [ebp+8]
-; CHECK: call memcpy_helper2
+; CHECK: push ebx
+; CHECK: push ebp
+; CHECK: mov ebp, esp
+; CHECK: sub esp, 20
+; CHECK: mov eax, dword ptr [ebp+16]
+; CHECK: mov dword ptr [ebp-4], eax
+; CHECK: sub esp, 128
+; CHECK: mov dword ptr [ebp-8], esp
+; CHECK: mov eax, dword ptr [ebp-8]
+; CHECK: mov dword ptr [ebp-12], eax
+; CHECK: movzx eax, byte ptr [ebp-4]
+; CHECK: mov dword ptr [ebp-16], eax
+; CHECK: sub esp, 16
+; CHECK: mov ecx, dword ptr [ebp+12]
+; CHECK: mov dword ptr [esp], ecx
+; CHECK: mov edx, dword ptr [ebp-12]
+; CHECK: mov dword ptr [esp+4], edx
+; CHECK: mov ebx, dword ptr [ebp-16]
+; CHECK: mov dword ptr [esp+8], ebx
+; CHECK: call memcpy_helper2
ret i32 %add3
}
; CHECK-LABEL: passFpArgs
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline
declare i32 @ignoreFpArgsNoInline(float, i32, double)
ret i32 %call
}
; CHECK-LABEL: passFpConstArg
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
; CHECK: call ignoreFpArgsNoInline
define internal i32 @passFp32ConstArg(float %a) {
ret i32 %call
}
; CHECK-LABEL: passFp32ConstArg
-; CHECK: push dword
-; CHECK: push 123
+; CHECK: mov dword ptr [esp+4], 123
+; CHECK: movss dword ptr [esp+8]
; CHECK: call ignoreFp32ArgsNoInline
declare i32 @ignoreFp32ArgsNoInline(float, i32, float)
ret double %conv
}
; CHECK-LABEL: unsigned64ToDouble
-; CHECK: push 2874
-; CHECK: push 1942892530
+; CHECK: mov dword ptr [esp+4], 2874
+; CHECK: mov dword ptr [esp], 1942892530
; CHECK: call cvtui64tod
; CHECK: fstp
entry:
ret float undef
; CHECK-LABEL: undef_float:
-; CHECK-NOT: sub esp
-; CHECK: fld
+; CHECK: [L$float$
}
define <4 x i1> @undef_v4i1() {
; This file checks that Subzero generates code in accordance with the
; calling convention for vectors.
-; NOTE: CHECK / OPTM1 lines containing the following strings may be
-; subject to change:
-;
-; * movups: The movups instruction may be changed to movaps when the
-; load / store operation is 16 byte aligned.
-;
-; * stack offsets: These may need to be changed if stack alignment
-; support is implemented.
-;
-; * stack adjustment operations
-
; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
; RUN: %llvm2ice -Om1 --verbose none %s | FileCheck --check-prefix=OPTM1 %s
; RUN: %llvm2ice -O2 --verbose none %s \
entry:
ret <4 x float> %arg4
; CHECK-LABEL: test_returning_interspersed_arg4:
-; CHECK: movups xmm0, xmmword ptr [esp+44]
+; CHECK: movups xmm0, xmmword ptr [esp+52]
; CHECK: ret
; OPTM1-LABEL: test_returning_interspersed_arg4:
call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)
ret void
; CHECK-LABEL: test_passing_vectors:
-; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+4]
-; CHECK: sub esp, 16
-; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]]
-; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+36]
-; CHECK: sub esp, 16
-; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]]
-; CHECK: movups xmm0, xmmword ptr [esp+116]
-; CHECK: movups xmm1, xmmword ptr [esp+100]
-; CHECK: movups xmm2, xmmword ptr [esp+84]
-; CHECK: movups xmm3, xmmword ptr [esp+68]
+; CHECK: sub esp, 32
+; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+64]
+; CHECK: movups xmmword ptr [esp], [[ARG5]]
+; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+48]
+; CHECK: movups xmmword ptr [esp+16], [[ARG6]]
+; CHECK: movups xmm0, xmmword ptr [esp+128]
+; CHECK: movups xmm1, xmmword ptr [esp+112]
+; CHECK: movups xmm2, xmmword ptr [esp+96]
+; CHECK: movups xmm3, xmmword ptr [esp+80]
; CHECK: call VectorArgs
; CHECK-NEXT: add esp, 32
; CHECK: ret
; OPTM1-LABEL: test_passing_vectors:
-; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}}
-; OPTM1: sub esp, 16
-; OPTM1: movups xmmword ptr [esp], [[ARG6]]
+; OPTM1: sub esp, 32
; OPTM1: movups [[ARG5:.*]], xmmword ptr {{.*}}
-; OPTM1: sub esp, 16
-; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]]
+; OPTM1: movups xmmword ptr [esp], [[ARG5]]
+; OPTM1: movups [[ARG6:.*]], xmmword ptr {{.*}}
+; OPTM1: movups xmmword ptr [esp+16], [[ARG6]]
; OPTM1: movups xmm0, xmmword ptr {{.*}}
; OPTM1: movups xmm1, xmmword ptr {{.*}}
; OPTM1: movups xmm2, xmmword ptr {{.*}}
; OPTM1: movups xmm3, xmmword ptr {{.*}}
; OPTM1: call VectorArgs
-; OPTM1: add esp, 32
+; OPTM1-NEXT: add esp, 32
+; OPTM1: ret
+}
+
+declare void @InterspersedVectorArgs(<4 x float>, i64, <4 x float>, i64, <4 x float>, float, <4 x float>, double, <4 x float>, i32, <4 x float>)
+
+define void @test_passing_vectors_interspersed(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {
+entry:
+ ; Kills XMM registers so that no in-arg lowering code interferes
+ ; with the test.
+ call void @killXmmRegisters()
+ call void @InterspersedVectorArgs(<4 x float> %arg9, i64 0, <4 x float> %arg8, i64 1, <4 x float> %arg7, float 2.000000e+00, <4 x float> %arg6, double 3.000000e+00, <4 x float> %arg5, i32 4, <4 x float> %arg4)
+ ret void
+; CHECK-LABEL: test_passing_vectors_interspersed:
+; CHECK: sub esp, 80
+; CHECK: movups [[ARG9:.*]], xmmword ptr [esp+112]
+; CHECK: movups xmmword ptr [esp+32], [[ARG9]]
+; CHECK: movups [[ARG11:.*]], xmmword ptr [esp+96]
+; CHECK: movups xmmword ptr [esp+64], [[ARG11]]
+; CHECK: movups xmm0, xmmword ptr [esp+176]
+; CHECK: movups xmm1, xmmword ptr [esp+160]
+; CHECK: movups xmm2, xmmword ptr [esp+144]
+; CHECK: movups xmm3, xmmword ptr [esp+128]
+; CHECK: call InterspersedVectorArgs
+; CHECK-NEXT: add esp, 80
+; CHECK: ret
+
+; OPTM1-LABEL: test_passing_vectors_interspersed:
+; OPTM1: sub esp, 80
+; OPTM1: movups [[ARG9:.*]], xmmword ptr {{.*}}
+; OPTM1: movups xmmword ptr [esp+32], [[ARG9]]
+; OPTM1: movups [[ARG11:.*]], xmmword ptr {{.*}}
+; OPTM1: movups xmmword ptr [esp+64], [[ARG11]]
+; OPTM1: movups xmm0, xmmword ptr {{.*}}
+; OPTM1: movups xmm1, xmmword ptr {{.*}}
+; OPTM1: movups xmm2, xmmword ptr {{.*}}
+; OPTM1: movups xmm3, xmmword ptr {{.*}}
+; OPTM1: call InterspersedVectorArgs
+; OPTM1-NEXT: add esp, 80
; OPTM1: ret
}
; OPTM1-LABEL: test_receiving_vectors:
; OPTM1: call VectorReturn
-; OPTM1: movups [[LOC:.*]], xmm0
-; OPTM1: movups xmm0, [[LOC]]
+; OPTM1: movups {{.*}}, xmm0
+; OPTM1: movups xmm0, {{.*}}
; OPTM1: call VectorReturn
; OPTM1: ret
}