From 722e9e6d0a5b67d136be40bc015abc5b0b32f97b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 27 Jun 2013 09:27:40 +0000 Subject: [PATCH] [SystemZ] Add conditional store patterns Add pseudo conditional store instructions, so that we use: branch foo: store foo: instead of: load branch foo: move foo: store z196 has real 32-bit and 64-bit conditional stores, but we don't use any z196 instructions yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185065 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 90 +++++++ lib/Target/SystemZ/SystemZISelLowering.h | 3 + lib/Target/SystemZ/SystemZInstrFP.td | 7 +- lib/Target/SystemZ/SystemZInstrFormats.td | 13 + lib/Target/SystemZ/SystemZInstrInfo.td | 20 ++ lib/Target/SystemZ/SystemZOperators.td | 24 +- lib/Target/SystemZ/SystemZPatterns.td | 8 +- test/CodeGen/SystemZ/cond-store-01.ll | 396 +++++++++++++++++++++++++++++ test/CodeGen/SystemZ/cond-store-02.ll | 396 +++++++++++++++++++++++++++++ test/CodeGen/SystemZ/cond-store-03.ll | 322 +++++++++++++++++++++++ test/CodeGen/SystemZ/cond-store-04.ll | 214 ++++++++++++++++ test/CodeGen/SystemZ/cond-store-05.ll | 213 ++++++++++++++++ test/CodeGen/SystemZ/cond-store-06.ll | 213 ++++++++++++++++ 13 files changed, 1910 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/SystemZ/cond-store-01.ll create mode 100644 test/CodeGen/SystemZ/cond-store-02.ll create mode 100644 test/CodeGen/SystemZ/cond-store-03.ll create mode 100644 test/CodeGen/SystemZ/cond-store-04.ll create mode 100644 test/CodeGen/SystemZ/cond-store-05.ll create mode 100644 test/CodeGen/SystemZ/cond-store-06.ll diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 0b0dbeaef14..955b88e1630 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1696,6 +1696,59 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI, return JoinMBB; } +// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. +// StoreOpcode is the store to use and Invert says whether the store should +// happen when the condition is false rather than true. +MachineBasicBlock * +SystemZTargetLowering::emitCondStore(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned StoreOpcode, bool Invert) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + + MachineOperand Base = MI->getOperand(0); + int64_t Disp = MI->getOperand(1).getImm(); + unsigned IndexReg = MI->getOperand(2).getReg(); + unsigned SrcReg = MI->getOperand(3).getReg(); + unsigned CCMask = MI->getOperand(4).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); + + // Get the condition needed to branch around the store. + if (!Invert) + CCMask = CCMask ^ SystemZ::CCMASK_ANY; + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // BRC CCMask, JoinMBB + // # fallthrough to FalseMBB + // + // The original DAG glues comparisons to their uses, both to ensure + // that no CC-clobbering instructions are inserted between them, and + // to ensure that comparison results are not reused. This means that + // this CondStore is the sole user of any preceding comparison instruction + // and that we can try to use a fused compare and branch instead. + MBB = StartMBB; + if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB)) + BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // store %SrcReg, %Disp(%Index,%Base) + // # fallthrough to JoinMBB + MBB = FalseMBB; + BuildMI(MBB, DL, TII->get(StoreOpcode)) + .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); + MBB->addSuccessor(JoinMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. @@ -2100,6 +2153,43 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { case SystemZ::SelectF128: return emitSelect(MI, MBB); + case SystemZ::CondStore8_32: + return emitCondStore(MI, MBB, SystemZ::STC32, false); + case SystemZ::CondStore8_32Inv: + return emitCondStore(MI, MBB, SystemZ::STC32, true); + case SystemZ::CondStore16_32: + return emitCondStore(MI, MBB, SystemZ::STH32, false); + case SystemZ::CondStore16_32Inv: + return emitCondStore(MI, MBB, SystemZ::STH32, true); + case SystemZ::CondStore32_32: + return emitCondStore(MI, MBB, SystemZ::ST32, false); + case SystemZ::CondStore32_32Inv: + return emitCondStore(MI, MBB, SystemZ::ST32, true); + case SystemZ::CondStore8: + return emitCondStore(MI, MBB, SystemZ::STC, false); + case SystemZ::CondStore8Inv: + return emitCondStore(MI, MBB, SystemZ::STC, true); + case SystemZ::CondStore16: + return emitCondStore(MI, MBB, SystemZ::STH, false); + case SystemZ::CondStore16Inv: + return emitCondStore(MI, MBB, SystemZ::STH, true); + case SystemZ::CondStore32: + return emitCondStore(MI, MBB, SystemZ::ST, false); + case SystemZ::CondStore32Inv: + return emitCondStore(MI, MBB, SystemZ::ST, true); + case SystemZ::CondStore64: + return emitCondStore(MI, MBB, SystemZ::STG, false); + case SystemZ::CondStore64Inv: + return emitCondStore(MI, MBB, SystemZ::STG, true); + case SystemZ::CondStoreF32: + return emitCondStore(MI, MBB, SystemZ::STE, false); + case SystemZ::CondStoreF32Inv: + return emitCondStore(MI, MBB, SystemZ::STE, true); + case SystemZ::CondStoreF64: + return emitCondStore(MI, MBB, SystemZ::STD, false); + case SystemZ::CondStoreF64Inv: + return emitCondStore(MI, MBB, SystemZ::STD, true); + case SystemZ::AEXT128_64: return emitExt128(MI, MBB, false, SystemZ::subreg_low); case SystemZ::ZEXT128_32: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index f48cc4f9654..f6c49f066a9 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -203,6 +203,9 @@ private: // Implement EmitInstrWithCustomInserter for individual operation types. MachineBasicBlock *emitSelect(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitCondStore(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned StoreOpcode, bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 86ef14c69b5..7499d2fb8d9 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Control-flow instructions +// Select instructions //===----------------------------------------------------------------------===// // C's ?: operator for floating-point operands. @@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper; def SelectF64 : SelectWrapper; def SelectF128 : SelectWrapper; +defm CondStoreF32 : CondStores; +defm CondStoreF64 : CondStores; + //===----------------------------------------------------------------------===// // Move instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index ad050fd10cc..ac0300c95e0 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -956,6 +956,19 @@ class SelectWrapper let Uses = [CC]; } +// Stores $new to $addr if $cc is true ("" case) or false (Inv case). +multiclass CondStores { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + def "" : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + [(store (z_select_ccmask cls:$new, (load mode:$addr), + imm:$cc), mode:$addr)]>; + def Inv : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc), + [(store (z_select_ccmask (load mode:$addr), cls:$new, + imm:$cc), mode:$addr)]>; + } +} + // OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND // describe the second (non-memory) operand. class AtomicLoadBinary; defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">; defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">; +//===----------------------------------------------------------------------===// +// Select instructions +//===----------------------------------------------------------------------===// + def Select32 : SelectWrapper; def Select64 : SelectWrapper; +defm CondStore8_32 : CondStores; +defm CondStore16_32 : CondStores; +defm CondStore32_32 : CondStores; + +defm CondStore8 : CondStores; +defm CondStore16 : CondStores; +defm CondStore32 : CondStores; +defm CondStore64 : CondStores; + //===----------------------------------------------------------------------===// // Call instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ab01b2527a8..021824e23c5 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -120,6 +120,20 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +// Extending loads in which the extension type doesn't matter. +def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ + return cast(N)->getExtensionType() != ISD::NON_EXTLOAD; +}]>; +def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + // Aligned loads. class AlignedLoad : PatFrag<(ops node:$addr), (load node:$addr), [{ @@ -149,7 +163,10 @@ class NonvolatileLoad LoadSDNode *Load = cast(N); return !Load->isVolatile(); }]>; -def nonvolatile_load : NonvolatileLoad; +def nonvolatile_load : NonvolatileLoad; +def nonvolatile_anyextloadi8 : NonvolatileLoad; +def nonvolatile_anyextloadi16 : NonvolatileLoad; +def nonvolatile_anyextloadi32 : NonvolatileLoad; // Non-volatile stores. class NonvolatileStore @@ -157,7 +174,10 @@ class NonvolatileStore StoreSDNode *Store = cast(N); return !Store->isVolatile(); }]>; -def nonvolatile_store : NonvolatileStore; +def nonvolatile_store : NonvolatileStore; +def nonvolatile_truncstorei8 : NonvolatileStore; +def nonvolatile_truncstorei16 : NonvolatileStore; +def nonvolatile_truncstorei32 : NonvolatileStore; // Insertions. def inserti8 : PatFrag<(ops node:$src1, node:$src2), diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index 3689f74bfd4..fb6c2219df9 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -50,12 +50,8 @@ class RMWI { - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; - def : RMWI; + def : RMWI; + def : RMWI; } // Record that INSN performs insertion TYPE into a register of class CLS. diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll new file mode 100644 index 00000000000..fadcae5e90a --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-01.ll @@ -0,0 +1,396 @@ +; Test 8-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(i8 *) + +; Test the simple case, with the loaded value first. +define void @f1(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %alt, i8 %orig + store i8 %res, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 32 bits, with the +; loaded value first. +define void @f3(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i8 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f7(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f8(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = sext i8 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 64 bits, with the +; loaded value first. +define void @f9(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f10(i8 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f10: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %ext = zext i8 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i8 + store i8 %trunc, i8 *%ptr + ret void +} + +; Check the high end of the STC range. +define void @f11(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stc %r3, 4095(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 4095 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte up, which should use STCY instead of STC. +define void @f12(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 4096 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the high end of the STCY range. +define void @f13(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, 524287(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 524287 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f14(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f14: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 524288 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the low end of the STCY range. +define void @f15(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f15: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 -524288 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check the next byte down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f16(i8 *%base, i8 %alt, i32 %limit) { +; CHECK: f16: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524289 +; CHECK: stc %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i8 *%base, i64 -524289 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check that STCY allows an index. +define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) { +; CHECK: f17: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stcy %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i8 * + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f18(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK: f18: +; CHECK: lb {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stc {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f19(i8 *%ptr, i8 %alt, i32 %limit) { +; CHECK: f19: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lb %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: stc %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store volatile i8 %res, i8 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f20(i8 *%ptr, i8 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK: f20: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stc {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load atomic i8 *%ptr unordered, align 1 + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + ret void +} + +; ...likewise stores. +define void @f21(i8 *%ptr, i8 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK: f21: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lb %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store atomic i8 %res, i8 *%ptr unordered, align 1 + ret void +} + +; Try a frame index base. +define void @f22(i8 %alt, i32 %limit) { +; CHECK: f22: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: stc {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i8 + call void @foo(i8 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i8 *%ptr + %res = select i1 %cond, i8 %orig, i8 %alt + store i8 %res, i8 *%ptr + call void @foo(i8 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll new file mode 100644 index 00000000000..51f3ffc0f75 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-02.ll @@ -0,0 +1,396 @@ +; Test 16-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(i16 *) + +; Test the simple case, with the loaded value first. +define void @f1(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %alt, i16 %orig + store i16 %res, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 32 bits, with the +; loaded value first. +define void @f3(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i32 + %res = select i1 %cond, i32 %ext, i32 %alt + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i16 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i32 + %res = select i1 %cond, i32 %alt, i32 %ext + %trunc = trunc i32 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f7(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f8(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = sext i16 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 64 bits, with the +; loaded value first. +define void @f9(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f10(i16 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f10: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %ext = zext i16 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i16 + store i16 %trunc, i16 *%ptr + ret void +} + +; Check the high end of the aligned STH range. +define void @f11(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sth %r3, 4094(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2047 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword up, which should use STHY instead of STH. +define void @f12(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 2048 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the high end of the aligned STHY range. +define void @f13(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, 524286(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 262143 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f14(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f14: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 262144 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the low end of the STHY range. +define void @f15(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f15: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -262144 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f16(i16 *%base, i16 %alt, i32 %limit) { +; CHECK: f16: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524290 +; CHECK: sth %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i16 *%base, i64 -262145 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check that STHY allows an index. +define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) { +; CHECK: f17: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sthy %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f18(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK: f18: +; CHECK: lh {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: sth {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f19(i16 *%ptr, i16 %alt, i32 %limit) { +; CHECK: f19: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lh %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: sth %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store volatile i16 %res, i16 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f20(i16 *%ptr, i16 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK: f20: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: sth {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load atomic i16 *%ptr unordered, align 2 + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + ret void +} + +; ...likewise stores. +define void @f21(i16 *%ptr, i16 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK: f21: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lh %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-9]+}}, +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store atomic i16 %res, i16 *%ptr unordered, align 2 + ret void +} + +; Try a frame index base. +define void @f22(i16 %alt, i32 %limit) { +; CHECK: f22: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i16 + call void @foo(i16 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i16 *%ptr + %res = select i1 %cond, i16 %orig, i16 %alt + store i16 %res, i16 *%ptr + call void @foo(i16 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll new file mode 100644 index 00000000000..6f19fbc3598 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-03.ll @@ -0,0 +1,322 @@ +; Test 32-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(i32 *) + +; Test the simple case, with the loaded value first. +define void @f1(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %alt, i32 %orig + store i32 %res, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly sign-extended to 64 bits, with the +; loaded value first. +define void @f3(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f4(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = sext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Test cases where the value is explicitly zero-extended to 32 bits, with the +; loaded value first. +define void @f5(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %ext, i64 %alt + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f6(i32 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %ext = zext i32 %orig to i64 + %res = select i1 %cond, i64 %alt, i64 %ext + %trunc = trunc i64 %res to i32 + store i32 %trunc, i32 *%ptr + ret void +} + +; Check the high end of the aligned ST range. +define void @f7(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: st %r3, 4092(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1023 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up, which should use STY instead of ST. +define void @f8(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 1024 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the high end of the aligned STY range. +define void @f9(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, 524284(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f10(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f10: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the low end of the STY range. +define void @f11(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f11: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f12(i32 *%base, i32 %alt, i32 %limit) { +; CHECK: f12: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524292 +; CHECK: st %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i32 *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check that STY allows an index. +define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) { +; CHECK: f13: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: sty %r4, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f14(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f14: +; CHECK: l {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { +; CHECK: f15: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: l %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: st %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store volatile i32 %res, i32 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CS. +; CHECK: f16: +; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: st {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load atomic i32 *%ptr unordered, align 4 + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + ret void +} + +; ...likewise stores. +define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CS. +; CHECK: f17: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: l %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store atomic i32 %res, i32 *%ptr unordered, align 4 + ret void +} + +; Try a frame index base. +define void @f18(i32 %alt, i32 %limit) { +; CHECK: f18: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i32 + call void @foo(i32 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i32 *%ptr + %res = select i1 %cond, i32 %orig, i32 %alt + store i32 %res, i32 *%ptr + call void @foo(i32 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll new file mode 100644 index 00000000000..22f5fd42265 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-04.ll @@ -0,0 +1,214 @@ +; Test 64-bit conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(i64 *) + +; Test with the loaded value first. +define void @f1(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %alt, i64 %orig + store i64 %res, i64 *%ptr + ret void +} + +; Check the high end of the aligned STG range. +define void @f3(i64 *%base, i64 %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, 524280(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65535 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f4(i64 *%base, i64 %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the low end of the STG range. +define void @f5(i64 *%base, i64 %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r3, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65536 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(i64 *%base, i64 %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524296 +; CHECK: stg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr i64 *%base, i64 -65537 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check that STG allows an index. +define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stg %r4, 524287(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f8(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f8: +; CHECK: lg {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %r3. +define void @f9(i64 *%ptr, i64 %alt, i32 %limit) { +; CHECK: f9: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: stg %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store volatile i64 %res, i64 *%ptr + ret void +} + +; Check that atomic loads are not matched. The transformation is OK for +; the "unordered" case tested here, but since we don't try to handle atomic +; operations at all in this context, it seems better to assert that than +; to restrict the test to a stronger ordering. +define void @f10(i64 *%ptr, i64 %alt, i32 %limit) { +; FIXME: should use a normal load instead of CSG. +; CHECK: f10: +; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: stg {{%r[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load atomic i64 *%ptr unordered, align 8 + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + ret void +} + +; ...likewise stores. +define void @f11(i64 *%ptr, i64 %alt, i32 %limit) { +; FIXME: should use a normal store instead of CSG. +; CHECK: f11: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: lg %r3, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store atomic i64 %res, i64 *%ptr unordered, align 8 + ret void +} + +; Try a frame index base. +define void @f12(i64 %alt, i32 %limit) { +; CHECK: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca i64 + call void @foo(i64 *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load i64 *%ptr + %res = select i1 %cond, i64 %orig, i64 %alt + store i64 %res, i64 *%ptr + call void @foo(i64 *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-05.ll b/test/CodeGen/SystemZ/cond-store-05.ll new file mode 100644 index 00000000000..5bcfed0cd4a --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-05.ll @@ -0,0 +1,213 @@ +; Test f32 conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(float *) + +; Test with the loaded value first. +define void @f1(float *%ptr, float %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(float *%ptr, float %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %alt, float %orig + store float %res, float *%ptr + ret void +} + +; Check the high end of the aligned STE range. +define void @f3(float *%base, float %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: ste %f0, 4092(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1023 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word up, which should use STEY instead of STE. +define void @f4(float *%base, float %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 1024 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the high end of the aligned STEY range. +define void @f5(float *%base, float %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 524284(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 131071 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(float *%base, float %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 131072 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the low end of the STEY range. +define void @f7(float *%base, float %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -131072 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(float *%base, float %alt, i32 %limit) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524292 +; CHECK: ste %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr float *%base, i64 -131073 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check that STEY allows an index. +define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) { +; CHECK: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stey %f0, 4096(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f10(float *%ptr, float %alt, i32 %limit) { +; CHECK: f10: +; CHECK: le {{%f[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: ste {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %f0. +define void @f11(float *%ptr, float %alt, i32 %limit) { +; CHECK: f11: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: le %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: ste %f0, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store volatile float %res, float *%ptr + ret void +} + +; Try a frame index base. +define void @f12(float %alt, i32 %limit) { +; CHECK: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: ste {{%f[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca float + call void @foo(float *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load float *%ptr + %res = select i1 %cond, float %orig, float %alt + store float %res, float *%ptr + call void @foo(float *%ptr) + ret void +} diff --git a/test/CodeGen/SystemZ/cond-store-06.ll b/test/CodeGen/SystemZ/cond-store-06.ll new file mode 100644 index 00000000000..203a0b04fb9 --- /dev/null +++ b/test/CodeGen/SystemZ/cond-store-06.ll @@ -0,0 +1,213 @@ +; Test f64 conditional stores that are presented as selects. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo(double *) + +; Test with the loaded value first. +define void @f1(double *%ptr, double %alt, i32 %limit) { +; CHECK: f1: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; ...and with the loaded value second +define void @f2(double *%ptr, double %alt, i32 %limit) { +; CHECK: f2: +; CHECK-NOT: %r2 +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %alt, double %orig + store double %res, double *%ptr + ret void +} + +; Check the high end of the aligned STD range. +define void @f3(double *%base, double %alt, i32 %limit) { +; CHECK: f3: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: std %f0, 4088(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword up, which should use STDY instead of STD. +define void @f4(double *%base, double %alt, i32 %limit) { +; CHECK: f4: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 4096(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the high end of the aligned STDY range. +define void @f5(double *%base, double %alt, i32 %limit) { +; CHECK: f5: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 524280(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 65535 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f6(double *%base, double %alt, i32 %limit) { +; CHECK: f6: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, 524288 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 65536 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the low end of the STDY range. +define void @f7(double *%base, double %alt, i32 %limit) { +; CHECK: f7: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, -524288(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -65536 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define void @f8(double *%base, double %alt, i32 %limit) { +; CHECK: f8: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: agfi %r2, -524296 +; CHECK: std %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -65537 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check that STDY allows an index. +define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) { +; CHECK: f9: +; CHECK-NOT: %r2 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r2 +; CHECK: stdy %f0, 524287(%r3,%r2) +; CHECK: [[LABEL]]: +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to double * + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; Check that volatile loads are not matched. +define void @f10(double *%ptr, double %alt, i32 %limit) { +; CHECK: f10: +; CHECK: ld {{%f[0-5]}}, 0(%r2) +; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] +; CHECK: [[LABEL]]: +; CHECK: std {{%f[0-5]}}, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load volatile double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + ret void +} + +; ...likewise stores. In this case we should have a conditional load into %f0. +define void @f11(double *%ptr, double %alt, i32 %limit) { +; CHECK: f11: +; CHECK: jnl [[LABEL:[^ ]*]] +; CHECK: ld %f0, 0(%r2) +; CHECK: [[LABEL]]: +; CHECK: std %f0, 0(%r2) +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store volatile double %res, double *%ptr + ret void +} + +; Try a frame index base. +define void @f12(double %alt, i32 %limit) { +; CHECK: f12: +; CHECK: brasl %r14, foo@PLT +; CHECK-NOT: %r15 +; CHECK: jl [[LABEL:[^ ]*]] +; CHECK-NOT: %r15 +; CHECK: std {{%f[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: [[LABEL]]: +; CHECK: brasl %r14, foo@PLT +; CHECK: br %r14 + %ptr = alloca double + call void @foo(double *%ptr) + %cond = icmp ult i32 %limit, 42 + %orig = load double *%ptr + %res = select i1 %cond, double %orig, double %alt + store double %res, double *%ptr + call void @foo(double *%ptr) + ret void +} -- 2.11.0