From 9d41a401dcdfda1e3bfdabdedac239ef1d6b93e4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 24 May 2012 08:37:49 -0400 Subject: [PATCH] radeon/llvm: Remove AMDIL FTOI and ITOF instructions --- src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl | 2 +- src/gallium/drivers/radeon/AMDGPUUtil.cpp | 2 - src/gallium/drivers/radeon/AMDILConversions.td | 131 ----------------- src/gallium/drivers/radeon/AMDILISelLowering.cpp | 162 ---------------------- src/gallium/drivers/radeon/AMDILInstrPatterns.td | 6 - src/gallium/drivers/radeon/AMDILInstructions.td | 8 -- src/gallium/drivers/radeon/R600Instructions.td | 12 +- 7 files changed, 7 insertions(+), 316 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl index f0cb9ae0bfc..ab3415e10d8 100644 --- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl +++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl @@ -48,7 +48,7 @@ my $FILE_TYPE = $ARGV[0]; open AMDIL, '<', 'AMDILInstructions.td'; -my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'SMULHI_i32', 'SMUL_i32', 'LOG_f32', 'RSQ_f32', 'SIN_f32', 'COS_f32'); +my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'SMULHI_i32', 'SMUL_i32', 'LOG_f32', 'RSQ_f32', 'SIN_f32', 'COS_f32'); while () { if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+); - - def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)), (f32 (UTOF @@ -210,16 +200,6 @@ def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)), (LOADCONST_i32 24)))) >; -def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)), -(f64 (FTOD - (ITOF - (SHR_i32 - (SHL_i32 -(IL_ASINT_i8 GPRI8:$src), - (LOADCONST_i32 24)), - (LOADCONST_i32 24))))) >; - - def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)), (f64 (FTOD (UTOF @@ -229,16 +209,6 @@ def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)), (LOADCONST_i32 24)), (LOADCONST_i32 24))))) >; -def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)), -(f32 - (ITOF - (SHR_i32 - (SHL_i32 -(IL_ASINT_i16 GPRI16:$src), - (LOADCONST_i32 16)), - (LOADCONST_i32 16)))) >; - - def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)), (f32 (UTOF @@ -248,16 +218,6 @@ def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)), (LOADCONST_i32 16)), (LOADCONST_i32 16)))) >; -def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)), -(f64 (FTOD - (ITOF - (SHR_i32 - (SHL_i32 -(IL_ASINT_i16 GPRI16:$src), - (LOADCONST_i32 16)), - (LOADCONST_i32 16))))) >; - - def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)), (f64 (FTOD (UTOF @@ -452,16 +412,6 @@ def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)), -def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)), -(v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i8 GPRV2I8:$src), - (VCREATE_v2i32 (LOADCONST_i32 24))), - (VCREATE_v2i32 (LOADCONST_i32 24))))) >; - - def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)), (v2f32 (UTOF_v2f32 @@ -472,32 +422,6 @@ def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)), (VCREATE_v2i32 (LOADCONST_i32 24))))) >; -def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)), -(v2f64 - (VINSERT_v2f64 - (VCREATE_v2f64 - (FTOD - (VEXTRACT_v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i8 GPRV2I8:$src), - (VCREATE_v2i32 (LOADCONST_i32 24))), - (VCREATE_v2i32 (LOADCONST_i32 24)))), - 1) - )), - (FTOD - (VEXTRACT_v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i8 GPRV2I8:$src), - (VCREATE_v2i32 (LOADCONST_i32 24))), - (VCREATE_v2i32 (LOADCONST_i32 24)))), - 2) - ), 1, 256) - ) >; - def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)), (v2f64 (VINSERT_v2f64 @@ -524,15 +448,6 @@ def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)), ), 1, 256) ) >; -def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)), -(v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i16 GPRV2I16:$src), - (VCREATE_v2i32 (LOADCONST_i32 16))), - (VCREATE_v2i32 (LOADCONST_i32 16))))) >; - def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)), (v2f32 @@ -544,32 +459,6 @@ def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)), (VCREATE_v2i32 (LOADCONST_i32 16))))) >; -def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)), -(v2f64 - (VINSERT_v2f64 - (VCREATE_v2f64 - (FTOD - (VEXTRACT_v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i16 GPRV2I16:$src), - (VCREATE_v2i32 (LOADCONST_i32 16))), - (VCREATE_v2i32 (LOADCONST_i32 16)))), - 1) - )), - (FTOD - (VEXTRACT_v2f32 - (ITOF_v2f32 - (SHRVEC_v2i32 - (SHLVEC_v2i32 -(IL_ASV2INT_v2i16 GPRV2I16:$src), - (VCREATE_v2i32 (LOADCONST_i32 16))), - (VCREATE_v2i32 (LOADCONST_i32 16)))), - 2) - ), 1, 256) - ) >; - def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)), (v2f64 (VINSERT_v2f64 @@ -679,16 +568,6 @@ def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)), -def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)), -(v4f32 - (ITOF_v4f32 - (SHRVEC_v4i32 - (SHLVEC_v4i32 -(IL_ASV4INT_v4i8 GPRV4I8:$src), - (VCREATE_v4i32 (LOADCONST_i32 24))), - (VCREATE_v4i32 (LOADCONST_i32 24))))) >; - - def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)), (v4f32 (UTOF_v4f32 @@ -699,16 +578,6 @@ def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)), (VCREATE_v4i32 (LOADCONST_i32 24))))) >; -def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)), -(v4f32 - (ITOF_v4f32 - (SHRVEC_v4i32 - (SHLVEC_v4i32 -(IL_ASV4INT_v4i16 GPRV4I16:$src), - (VCREATE_v4i32 (LOADCONST_i32 16))), - (VCREATE_v4i32 (LOADCONST_i32 16))))) >; - - def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)), (v4f32 (UTOF_v4f32 diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 7a75c332189..ca213d1845e 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -713,9 +713,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const // TODO: Implement custom UREM/SREM routines setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); - setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); setOperationAction(ISDBITCAST, VT, Custom); setOperationAction(ISD::GlobalAddress, VT, Custom); @@ -809,9 +807,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setOperationAction(ISD::Constant , MVT::i64 , Legal); setOperationAction(ISD::UDIV, MVT::v2i64, Expand); setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); @@ -830,9 +826,7 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setOperationAction(ISD::FDIV, MVT::v2f64, Expand); // We want to expand vector conversions into their scalar // counterparts. - setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); @@ -1579,9 +1573,7 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const LOWER(JumpTable); LOWER(ConstantPool); LOWER(ExternalSymbol); - LOWER(FP_TO_SINT); LOWER(FP_TO_UINT); - LOWER(SINT_TO_FP); LOWER(UINT_TO_FP); LOWER(MUL); LOWER(SUB); @@ -2505,62 +2497,6 @@ AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, } return res; } -SDValue -AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const -{ - SDValue RHS = Op.getOperand(0); - EVT RHSVT = RHS.getValueType(); - MVT RST = RHSVT.getScalarType().getSimpleVT(); - EVT LHSVT = Op.getValueType(); - MVT LST = LHSVT.getScalarType().getSimpleVT(); - DebugLoc DL = Op.getDebugLoc(); - SDValue DST; - const AMDILTargetMachine* - amdtm = reinterpret_cast - (&this->getTargetMachine()); - const AMDILSubtarget* - stm = static_cast( - amdtm->getSubtargetImpl()); - if (RST == MVT::f64 && RHSVT.isVector() - && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // We dont support vector 64bit floating point convertions. - for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { - SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); - op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); - if (!x) { - DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); - } else { - DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, - DST, op, DAG.getTargetConstant(x, MVT::i32)); - } - } - } else { - if (RST == MVT::f64 - && LST == MVT::i32) { - if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = SDValue(Op.getNode(), 0); - } else { - DST = genf64toi32(RHS, DAG, true); - } - } else if (RST == MVT::f64 - && LST == MVT::i64) { - DST = genf64toi64(RHS, DAG, true); - } else if (RST == MVT::f64 - && (LST == MVT::i8 || LST == MVT::i16)) { - if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); - } else { - SDValue ToInt = genf64toi32(RHS, DAG, true); - DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); - } - - } else { - DST = SDValue(Op.getNode(), 0); - } - } - return DST; -} SDValue AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const @@ -2855,104 +2791,6 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const } SDValue -AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const -{ - SDValue RHS = Op.getOperand(0); - EVT RHSVT = RHS.getValueType(); - MVT RST = RHSVT.getScalarType().getSimpleVT(); - EVT INTVT; - EVT LONGVT; - SDValue DST; - bool isVec = RHSVT.isVector(); - DebugLoc DL = Op.getDebugLoc(); - EVT LHSVT = Op.getValueType(); - MVT LST = LHSVT.getScalarType().getSimpleVT(); - const AMDILTargetMachine* - amdtm = reinterpret_cast - (&this->getTargetMachine()); - const AMDILSubtarget* - stm = static_cast( - amdtm->getSubtargetImpl()); - if (LST == MVT::f64 && LHSVT.isVector() - && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - // We dont support vector 64bit floating point convertions. - for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { - SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); - op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); - if (!x) { - DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); - } else { - DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, - op, DAG.getTargetConstant(x, MVT::i32)); - } - - } - } else { - - if (isVec) { - LONGVT = EVT(MVT::getVectorVT(MVT::i64, - RHSVT.getVectorNumElements())); - INTVT = EVT(MVT::getVectorVT(MVT::i32, - RHSVT.getVectorNumElements())); - } else { - LONGVT = EVT(MVT::i64); - INTVT = EVT(MVT::i32); - } - MVT RST = RHSVT.getScalarType().getSimpleVT(); - if ((RST == MVT::i32 || RST == MVT::i64) - && LST == MVT::f64) { - if (RST == MVT::i32) { - if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { - DST = SDValue(Op.getNode(), 0); - return DST; - } - } - SDValue c31 = DAG.getConstant( 31, INTVT ); - SDValue cSbit = DAG.getConstant( 0x80000000, INTVT ); - - SDValue S; // Sign, as 0 or -1 - SDValue Sbit; // Sign bit, as one bit, MSB only. - if (RST == MVT::i32) { - Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit ); - S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 ); - } else { // 64-bit case... SRA of 64-bit values is slow - SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS ); - Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit ); - SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 ); - S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp ); - } - - // get abs() of input value, given sign as S (0 or -1) - // SpI = RHS + S - SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S); - // SpIxS = SpI ^ S - SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S); - - // Convert unsigned value to double precision - SDValue R; - if (RST == MVT::i32) { - // r = cast_u32_to_f64(SpIxS) - R = genu32tof64(SpIxS, LHSVT, DAG); - } else { - // r = cast_u64_to_f64(SpIxS) - R = genu64tof64(SpIxS, LHSVT, DAG); - } - - // drop in the sign bit - SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R ); - SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t ); - SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t ); - thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit ); - t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi ); - DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t ); - } else { - DST = SDValue(Op.getNode(), 0); - } - } - return DST; -} -SDValue AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); diff --git a/src/gallium/drivers/radeon/AMDILInstrPatterns.td b/src/gallium/drivers/radeon/AMDILInstrPatterns.td index 4a78aa1278c..cd01ea4166b 100644 --- a/src/gallium/drivers/radeon/AMDILInstrPatterns.td +++ b/src/gallium/drivers/radeon/AMDILInstrPatterns.td @@ -17,15 +17,9 @@ // unsigned: f32 -> i64 def FTOUL : Pat<(i64 (fp_to_uint GPRF32:$src)), (LCREATE (FTOU GPRF32:$src), (LOADCONST_i32 0))>; -// signed: f32 -> i64 -def FTOL : Pat<(i64 (fp_to_sint GPRF32:$src)), - (LCREATE (FTOI GPRF32:$src), (LOADCONST_i32 0))>; // unsigned: i64 -> f32 def ULTOF : Pat<(f32 (uint_to_fp GPRI64:$src)), (UTOF (LLO GPRI64:$src))>; -// signed: i64 -> f32 -def LTOF : Pat<(f32 (sint_to_fp GPRI64:$src)), - (ITOF (LLO GPRI64:$src))>; // LLVM isn't lowering this correctly, so writing a pattern that // matches it isntead. diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td index 304d91e4da3..cdc280803f1 100644 --- a/src/gallium/drivers/radeon/AMDILInstructions.td +++ b/src/gallium/drivers/radeon/AMDILInstructions.td @@ -664,14 +664,6 @@ def ULLT : TwoInOneOut; // f64 ==> f32 def DTOF : UnaryOp; -// f32 ==> i32 signed -def FTOI : UnaryOp; -def FTOI_v2i32 : UnaryOp; -def FTOI_v4i32 : UnaryOp; -// i32 ==> f32 signed -def ITOF : UnaryOp; -def ITOF_v2f32 : UnaryOp; -def ITOF_v4f32 : UnaryOp; // f32 ==> i32 unsigned def FTOU : UnaryOp; def FTOU_v2i32 : UnaryOp; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index c1458fba482..6873fc6bdad 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -626,14 +626,14 @@ class EXP_IEEE_Common inst> : R600_1OP < >; class FLT_TO_INT_Common inst> : R600_1OP < - inst, "FLT_TO_INT", []> { - let AMDILOp = AMDILInst.FTOI; -} + inst, "FLT_TO_INT", + [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] +>; class INT_TO_FLT_Common inst> : R600_1OP < - inst, "INT_TO_FLT", []> { - let AMDILOp = AMDILInst.ITOF; -} + inst, "INT_TO_FLT", + [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] +>; class LOG_CLAMPED_Common inst> : R600_1OP < inst, "LOG_CLAMPED", -- 2.11.0