[PowerPC] Fix small argument stack slot offset for LE

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index f6884d5..b20516e 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2433,6 +2433,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                        SmallVectorImpl<SDValue> &InVals) const {
    // TODO: add description of PPC stack frame format, or at least some docs.
    //
+  bool isLittleEndian = Subtarget.isLittleEndian();
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2533,7 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
        }
  
        // All aggregates smaller than 8 bytes must be passed right-justified.
-      if (ObjSize < PtrByteSize)
+      if (ObjSize < PtrByteSize && !isLittleEndian)
          CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
        // The value of the object is its address.
        int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
@@ -2683,9 +2684,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      // We need to load the argument to a virtual register if we determined
      // above that we ran out of physical registers of the appropriate type.
      if (needsLoad) {
-      int FI = MFI->CreateFixedObject(ObjSize,
-                                      CurArgOffset + (ArgSize - ObjSize),
-                                      isImmutable);
+      if (ObjSize < ArgSize && !isLittleEndian)
+        CurArgOffset += ArgSize - ObjSize;
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
        ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                             false, false, false, 0);
@@ -4034,6 +4035,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                      SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
  
+  bool isLittleEndian = Subtarget.isLittleEndian();
    unsigned NumOps = Outs.size();
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4177,9 +4179,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        }
  
        if (GPR_idx == NumGPRs && Size < 8) {
-        SDValue Const = DAG.getConstant(PtrByteSize - Size,
-                                        PtrOff.getValueType());
-        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue AddPtr = PtrOff;
+        if (!isLittleEndian) {
+          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                          PtrOff.getValueType());
+          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        }
          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                            CallSeqStart,
                                                            Flags, DAG, dl);
@@ -4214,8 +4219,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
          // small aggregates, particularly for packed ones.
          // FIXME: It would be preferable to use the slot in the
          // parameter save area instead of a new local variable.
-        SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
-        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue AddPtr = PtrOff;
+        if (!isLittleEndian) {
+          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        }
          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                            CallSeqStart,
                                                            Flags, DAG, dl);
@@ -4276,7 +4284,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            // must be passed right-justified in the stack doubleword, and
            // in the GPR, if one is available.
            SDValue StoreOff;
-          if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
+          if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
+              !isLittleEndian) {
              SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
              StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
            } else
@@ -4300,7 +4309,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        } else {
          // Single-precision floating-point values are mapped to the
          // second (rightmost) word of the stack doubleword.
-        if (Arg.getValueType() == MVT::f32) {
+        if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
          }
diff --git a/test/CodeGen/PowerPC/ppc64-smallarg.ll b/test/CodeGen/PowerPC/ppc64-smallarg.ll

new file mode 100644 (file)

index 0000000..0d5b078
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 124(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 124(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 156(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 156(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll

new file mode 100644 (file)

index 0000000..fcb1e92
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 120(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 120(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 152(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 152(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Fri, 20 Jun 2014 16:34:05 +0000 (16:34 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
test/CodeGen/PowerPC/ppc64-smallarg.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/ppc64le-smallarg.ll	[new file with mode: 0644]	patch \| blob