SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
- unsigned Offset, bool Signed) const {
+ unsigned Offset, bool Signed,
+ const ISD::InputArg *Arg) const {
const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
- SDValue Val;
+ SDValue Val = Load;
+ if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
+ VT.bitsLT(MemVT)) {
+ unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
+ Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
+ }
+
if (MemVT.isFloatingPoint())
- Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
+ Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
else if (Signed)
- Val = DAG.getSExtOrTrunc(Load, SL, VT);
+ Val = DAG.getSExtOrTrunc(Val, SL, VT);
else
- Val = DAG.getZExtOrTrunc(Load, SL, VT);
-
- SDValue Ops[] = {
- Val,
- Load.getValue(1)
- };
+ Val = DAG.getZExtOrTrunc(Val, SL, VT);
- return DAG.getMergeValues(Ops, SL);
+ return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}
SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
- Offset, Ins[i].Flags.isSExt());
+ Offset, Ins[i].Flags.isSExt(),
+ &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
-; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
-; SI: v_min_i32_e32
+; GCN: v_min_i32_e32
; EG: MIN_INT
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
-; SI: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
-; SI: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
-; SI: s_min_i32
-; SI: s_min_i32
-; SI: s_min_i32
-; SI: s_min_i32
+; GCN: s_min_i32
+; GCN: s_min_i32
+; GCN: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
; EG: MIN_INT
}
; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
-; SI: s_load_dword
-; SI: s_load_dword
-; SI: s_sext_i32_i8
-; SI: s_sext_i32_i8
-; SI: s_min_i32
+; GCN: s_load_dword
+; GCN: s_load_dword
+; GCN: s_sext_i32_i8
+; GCN: s_sext_i32_i8
+; GCN: s_min_i32
define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
%cmp = icmp sle i8 %a, %b
%val = select i1 %cmp, i8 %a, i8 %b
; extloads with mubuf instructions.
; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
-; SI: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
+; GCN: buffer_load_sbyte
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
; SI: v_min_i32
-; SI: s_endpgm
+; VI: v_min_i32
+; VI: v_min_i32
+; VI: v_min_i32
+; VI: v_min_i32
+
+; GCN: s_endpgm
; EG: MIN_INT
; EG: MIN_INT
}
; FUNC-LABEL: @v_test_imin_slt_i32
-; SI: v_min_i32_e32
+; GCN: v_min_i32_e32
; EG: MIN_INT
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
}
; FUNC-LABEL: @s_test_imin_slt_i32
-; SI: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
-; SI: s_min_i32
-; SI: s_min_i32
+; GCN: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
; EG: MIN_INT
}
; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
-; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
-; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+; GCN: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
; EG: MIN_INT {{.*}}literal.{{[xyzw]}}
define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
}
; FUNC-LABEL: @v_test_umin_ule_i32
-; SI: v_min_u32_e32
+; GCN: v_min_u32_e32
; EG: MIN_UINT
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
}
; FUNC-LABEL: @v_test_umin_ule_v3i32
-; SI: v_min_u32_e32
-; SI: v_min_u32_e32
-; SI: v_min_u32_e32
+; GCN: v_min_u32_e32
+; GCN: v_min_u32_e32
+; GCN: v_min_u32_e32
; SI-NOT: v_min_u32_e32
-; SI: s_endpgm
+; GCN: s_endpgm
; EG: MIN_UINT
; EG: MIN_UINT
ret void
}
; FUNC-LABEL: @s_test_umin_ule_i32
-; SI: s_min_u32
+; GCN: s_min_u32
; EG: MIN_UINT
define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
}
; FUNC-LABEL: @v_test_umin_ult_i32
-; SI: v_min_u32_e32
+; GCN: v_min_u32_e32
; EG: MIN_UINT
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
}
; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: v_min_u32_e32
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: v_min_u32_e32
; EG: MIN_UINT
define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
}
; FUNC-LABEL: @s_test_umin_ult_i32
-; SI: s_min_u32
+; GCN: s_min_u32
; EG: MIN_UINT
define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
; SI-NOT: v_min
-; SI: v_cmp_lt_u32
+; GCN: v_cmp_lt_u32
; SI-NEXT: v_cndmask_b32
; SI-NOT: v_min
-; SI: s_endpgm
+; GCN: s_endpgm
; EG-NOT: MIN_UINT
define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
ret void
}
+; FUNC-LABEL: @v_test_umin_ult_i16_multi_use
+; GCN-NOT: v_min
+; GCN: v_cmp_lt_u32
+; GCN-NEXT: v_cndmask_b32
+; GCN-NOT: v_min
+; GCN: s_endpgm
+
+; EG-NOT: MIN_UINT
+define void @v_test_umin_ult_i16_multi_use(i16 addrspace(1)* %out0, i1 addrspace(1)* %out1, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind {
+ %a = load i16, i16 addrspace(1)* %aptr, align 2
+ %b = load i16, i16 addrspace(1)* %bptr, align 2
+ %cmp = icmp ult i16 %a, %b
+ %val = select i1 %cmp, i16 %a, i16 %b
+ store i16 %val, i16 addrspace(1)* %out0, align 2
+ store i1 %cmp, i1 addrspace(1)* %out1
+ ret void
+}
+
; FUNC-LABEL: @s_test_umin_ult_v1i32
-; SI: s_min_u32
+; GCN: s_min_u32
; EG: MIN_UINT
define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
}
; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
-; SI: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
+; GCN: s_min_u32
; EG: MIN_UINT
; EG: MIN_UINT
}
; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
-; SI: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
+; GCN: v_min_u32
; EG: MIN_UINT
; EG: MIN_UINT
; Make sure redundant and removed
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI: buffer_store_dword [[VMIN]]
+; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; GCN: buffer_store_dword [[VMIN]]
; EG: MIN_UINT
define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
; Make sure redundant sign_extend_inreg removed.
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
-; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI: buffer_store_dword [[VMIN]]
+; GCN-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
+; GCN: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
+; GCN: buffer_store_dword [[VMIN]]
; EG: MIN_INT
define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
}
; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
-; SI: s_min_i32
+; GCN: s_min_i32
; EG: MIN_INT
define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
; 64 bit
; FUNC-LABEL: {{^}}test_umin_ult_i64
-; SI: s_endpgm
+; GCN: s_endpgm
; EG: MIN_UINT
; EG: MIN_UINT
}
; FUNC-LABEL: {{^}}test_umin_ule_i64
-; SI: s_endpgm
+; GCN: s_endpgm
; EG: MIN_UINT
; EG: MIN_UINT
}
; FUNC-LABEL: {{^}}test_imin_slt_i64
-; SI: s_endpgm
+; GCN: s_endpgm
; EG-DAG: MIN_UINT
; EG-DAG: MIN_INT
}
; FUNC-LABEL: {{^}}test_imin_sle_i64
-; SI: s_endpgm
+; GCN: s_endpgm
; EG-DAG: MIN_UINT
; EG-DAG: MIN_INT