virtual void PostprocessISelDAG();
private:
+ bool isInlineImmediate(SDNode *N) const;
inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
+bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
+ const SITargetLowering *TL
+ = static_cast<const SITargetLowering *>(getTargetLowering());
+ return TL->analyzeImmediate(N) == 0;
+}
+
/// \brief Determine the register class for \p OpNo
/// \returns The register class of the virtual register that will be used for
/// the given operand number \OpNo or NULL if the register class cannot be
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
+
+ case ISD::Constant:
+ case ISD::ConstantFP: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+ break;
+
+ uint64_t Imm;
+ if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
+ Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ else {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ assert(C);
+ Imm = C->getZExtValue();
+ }
+
+ SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
+ SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(Imm >> 32, MVT::i32));
+ const SDValue Ops[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
+ N->getValueType(0), Ops);
+ }
+
case AMDGPUISD::REGISTER_LOAD: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
(S_MOV_B64 InlineImm<i64>:$imm)
>;
-// i64 immediates aren't supported in hardware, split it into two 32bit values
-def : Pat <
- (i64 imm:$imm),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
- (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
->;
-
-def : Pat <
- (f64 fpimm:$imm),
- (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0),
- (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1)
->;
-
/********** ===================== **********/
/********** Interpolation Paterns **********/
/********** ===================== **********/
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
; CHECK: @fconst_f64
-; CHECK: V_MOV_B32_e32 {{v[0-9]+}}, 0.000000e+00
-; CHECK-NEXT: V_MOV_B32_e32 {{v[0-9]+}}, 2.312500e+00
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 1075052544
+; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%r1 = load double addrspace(1)* %in
; SI-LABEL: @trunc_shl_i64:
; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
-; SI: V_ADD_I32_e32 v[[LO_ADD:[0-9]+]], s[[LO_SREG]],
-; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
+; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
+; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.