[AMDGPU] Optimize image_[load|store]_mip

author Piotr Sobczak <piotr.sobczak@amd.com>

Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)

committer Piotr Sobczak <piotr.sobczak@amd.com>

Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)
author Piotr Sobczak <piotr.sobczak@amd.com>
Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)
committer Piotr Sobczak <piotr.sobczak@amd.com>
Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td

index 3f525ef..6f7dbc7 100644 (file)
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -101,6 +101,22 @@ def MIMGLZMappingTable : GenericTable {
    let PrimaryKeyName = "getMIMGLZMappingInfo";
  }
  
+class MIMGMIPMapping<MIMGBaseOpcode mip, MIMGBaseOpcode nonmip> {
+  MIMGBaseOpcode MIP = mip;
+  MIMGBaseOpcode NONMIP = nonmip;
+}
+
+def MIMGMIPMappingTable : GenericTable {
+  let FilterClass = "MIMGMIPMapping";
+  let CppTypeName = "MIMGMIPMappingInfo";
+  let Fields = ["MIP", "NONMIP"];
+  GenericEnum TypeOf_MIP = MIMGBaseOpcode;
+  GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
+
+  let PrimaryKey = ["MIP"];
+  let PrimaryKeyName = "getMIMGMIPMappingInfo";
+}
+
  class MIMG <dag outs, string dns = "">
    : InstSI <outs, (ins), "", []> {
  
@@ -808,3 +824,7 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
  def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
  def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
  def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
+
+// MIP to NONMIP Optimization Mapping
+def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
+def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 5a8e8b1..1a764ea 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4863,6 +4863,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
    const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
    const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
        AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+  const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
+      AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
    unsigned IntrOpcode = Intr->BaseOpcode;
    bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
  
@@ -4966,6 +4968,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
      }
    }
  
+  // Optimize _mip away, when 'lod' is zero
+  if (MIPMappingInfo) {
+    if (auto ConstantLod =
+         dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+      if (ConstantLod->isNullValue()) {
+        IntrOpcode = MIPMappingInfo->NONMIP;  // set new opcode to variant without _mip
+        NumMIVAddrs--;               // remove 'lod'
+      }
+    }
+  }
+
    // Check for 16 bit addresses and pack if true.
    unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
    MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

index 24b2ab5..a78e302 100644 (file)
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -104,6 +104,7 @@ namespace AMDGPU {
  #define GET_MIMGDimInfoTable_IMPL
  #define GET_MIMGInfoTable_IMPL
  #define GET_MIMGLZMappingTable_IMPL
+#define GET_MIMGMIPMappingTable_IMPL
  #include "AMDGPUGenSearchableTables.inc"
  
  int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

index b8cd8d2..58d7d8f 100644 (file)
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -45,6 +45,7 @@ namespace AMDGPU {
  #define GET_MIMGDim_DECL
  #define GET_MIMGEncoding_DECL
  #define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
  #include "AMDGPUGenSearchableTables.inc"
  
  namespace IsaInfo {
@@ -218,10 +219,18 @@ struct MIMGLZMappingInfo {
    MIMGBaseOpcode LZ;
  };
  
+struct MIMGMIPMappingInfo {
+  MIMGBaseOpcode MIP;
+  MIMGBaseOpcode NONMIP;
+};
+
  LLVM_READONLY
  const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
  
  LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
+LLVM_READONLY
  int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
                    unsigned VDataDwords, unsigned VAddrDwords);
  
diff --git a/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll b/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll

new file mode 100644 (file)

index 0000000..7e3270f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
@@ -0,0 +1,132 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+
+; GCN-LABEL: {{^}}load_mip_1d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_3d:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_1darray:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_2darray:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}load_mip_cube:
+; GCN-NOT: image_load_mip
+; GCN: image_load
+define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+
+
+; GCN-LABEL: {{^}}store_mip_1d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_3d:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_1darray:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_2darray:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_mip_cube:
+; GCN-NOT: image_store_mip
+; GCN: image_store
+define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+main_body:
+  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
author	Piotr Sobczak <piotr.sobczak@amd.com>
	Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)
committer	Piotr Sobczak <piotr.sobczak@amd.com>
	Mon, 10 Jun 2019 15:58:51 +0000 (15:58 +0000)
lib/Target/AMDGPU/MIMGInstructions.td		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h		patch \| blob \| history
test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll	[new file with mode: 0644]	patch \| blob