From 30d84d8dfa0433088d541c66b92af0da3855bc9c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 16 Aug 2013 01:12:16 +0000 Subject: [PATCH] R600: Add support for global vector loads with element types less than 32-bits Tested-by: Aaron Watry git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188521 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 13 +++ test/CodeGen/R600/load.ll | 176 +++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 78495ca8daa..746c4794d12 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -91,6 +91,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 22aed6ab4d0..abb10617dd0 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -35,6 +35,94 @@ entry: ret void } +; R600-CHECK: @load_v2i8 +; R600-CHECK: VTX_READ_8 +; R600-CHECK: VTX_READ_8 +; SI-CHECK: @load_v2i8 +; SI-CHECK: BUFFER_LOAD_UBYTE +; SI-CHECK: BUFFER_LOAD_UBYTE +define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { +entry: + %0 = load <2 x i8> addrspace(1)* %in + %1 = zext <2 x i8> %0 to <2 x i32> + store <2 x i32> %1, <2 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v2i8_sext +; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-CHECK-DAG: 24 +; SI-CHECK: @load_v2i8_sext +; SI-CHECK: BUFFER_LOAD_SBYTE +; SI-CHECK: BUFFER_LOAD_SBYTE +define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { +entry: + %0 = load <2 x i8> addrspace(1)* %in + %1 = sext <2 x i8> %0 to <2 x i32> + store <2 x i32> %1, <2 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v4i8 +; R600-CHECK: VTX_READ_8 +; R600-CHECK: VTX_READ_8 +; R600-CHECK: VTX_READ_8 +; R600-CHECK: VTX_READ_8 +; SI-CHECK: @load_v4i8 +; SI-CHECK: BUFFER_LOAD_UBYTE +; SI-CHECK: BUFFER_LOAD_UBYTE +; SI-CHECK: BUFFER_LOAD_UBYTE +; SI-CHECK: BUFFER_LOAD_UBYTE +define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { +entry: + %0 = load <4 x i8> addrspace(1)* %in + %1 = zext <4 x i8> %0 to <4 x i32> + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v4i8_sext +; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] +; R600-CHECK-DAG: 24 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] +; R600-CHECK-DAG: 24 +; SI-CHECK: @load_v4i8_sext +; SI-CHECK: BUFFER_LOAD_SBYTE +; SI-CHECK: BUFFER_LOAD_SBYTE +; SI-CHECK: BUFFER_LOAD_SBYTE +; SI-CHECK: BUFFER_LOAD_SBYTE +define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { +entry: + %0 = load <4 x i8> addrspace(1)* %in + %1 = sext <4 x i8> %0 to <4 x i32> + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} + ; Load an i16 value from the global address space. ; R600-CHECK: @load_i16 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} @@ -64,6 +152,94 @@ entry: ret void } +; R600-CHECK: @load_v2i16 +; R600-CHECK: VTX_READ_16 +; R600-CHECK: VTX_READ_16 +; SI-CHECK: @load_v2i16 +; SI-CHECK: BUFFER_LOAD_USHORT +; SI-CHECK: BUFFER_LOAD_USHORT +define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { +entry: + %0 = load <2 x i16> addrspace(1)* %in + %1 = zext <2 x i16> %0 to <2 x i32> + store <2 x i32> %1, <2 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v2i16_sext +; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-CHECK-DAG: 16 +; SI-CHECK: @load_v2i16_sext +; SI-CHECK: BUFFER_LOAD_SSHORT +; SI-CHECK: BUFFER_LOAD_SSHORT +define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { +entry: + %0 = load <2 x i16> addrspace(1)* %in + %1 = sext <2 x i16> %0 to <2 x i32> + store <2 x i32> %1, <2 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v4i16 +; R600-CHECK: VTX_READ_16 +; R600-CHECK: VTX_READ_16 +; R600-CHECK: VTX_READ_16 +; R600-CHECK: VTX_READ_16 +; SI-CHECK: @load_v4i16 +; SI-CHECK: BUFFER_LOAD_USHORT +; SI-CHECK: BUFFER_LOAD_USHORT +; SI-CHECK: BUFFER_LOAD_USHORT +; SI-CHECK: BUFFER_LOAD_USHORT +define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { +entry: + %0 = load <4 x i16> addrspace(1)* %in + %1 = zext <4 x i16> %0 to <4 x i32> + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} + +; R600-CHECK: @load_v4i16_sext +; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] +; R600-CHECK-DAG: 16 +; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] +; R600-CHECK-DAG: 16 +; SI-CHECK: @load_v4i16_sext +; SI-CHECK: BUFFER_LOAD_SSHORT +; SI-CHECK: BUFFER_LOAD_SSHORT +; SI-CHECK: BUFFER_LOAD_SSHORT +; SI-CHECK: BUFFER_LOAD_SSHORT +define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { +entry: + %0 = load <4 x i16> addrspace(1)* %in + %1 = sext <4 x i16> %0 to <4 x i32> + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} + ; load an i32 value from the global address space. ; R600-CHECK: @load_i32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -- 2.11.0