AMDGPU: Don't error on ds.ordered intrinsic in function

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sun, 5 Jan 2020 19:16:22 +0000 (14:16 -0500)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 24 Jan 2020 21:06:44 +0000 (13:06 -0800)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sun, 5 Jan 2020 19:16:22 +0000 (14:16 -0500)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 24 Jan 2020 21:06:44 +0000 (13:06 -0800)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index 84f26cc..cb03b90 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
           cast<ConstantSDNode>(Offset)->getSExtValue();
  }
  
+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+  switch (MF.getFunction().getCallingConv()) {
+  case CallingConv::AMDGPU_PS:
+    return 1;
+  case CallingConv::AMDGPU_VS:
+    return 2;
+  case CallingConv::AMDGPU_GS:
+    return 3;
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_LS:
+  case CallingConv::AMDGPU_ES:
+    report_fatal_error("ds_ordered_count unsupported for this calling conv");
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::C:
+  case CallingConv::Fast:
+  default:
+    // Assume other calling conventions are various compute callable functions
+    return 0;
+  }
+}
+
  SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                   SelectionDAG &DAG) const {
    unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      unsigned IndexOperand = M->getConstantOperandVal(7);
      unsigned WaveRelease = M->getConstantOperandVal(8);
      unsigned WaveDone = M->getConstantOperandVal(9);
-    unsigned ShaderType;
-    unsigned Instruction;
  
      unsigned OrderedCountIndex = IndexOperand & 0x3f;
      IndexOperand &= ~0x3f;
@@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      if (IndexOperand)
        report_fatal_error("ds_ordered_count: bad index operand");
  
-    switch (IntrID) {
-    case Intrinsic::amdgcn_ds_ordered_add:
-      Instruction = 0;
-      break;
-    case Intrinsic::amdgcn_ds_ordered_swap:
-      Instruction = 1;
-      break;
-    }
-
      if (WaveDone && !WaveRelease)
        report_fatal_error("ds_ordered_count: wave_done requires wave_release");
  
-    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
-    case CallingConv::AMDGPU_CS:
-    case CallingConv::AMDGPU_KERNEL:
-      ShaderType = 0;
-      break;
-    case CallingConv::AMDGPU_PS:
-      ShaderType = 1;
-      break;
-    case CallingConv::AMDGPU_VS:
-      ShaderType = 2;
-      break;
-    case CallingConv::AMDGPU_GS:
-      ShaderType = 3;
-      break;
-    default:
-      report_fatal_error("ds_ordered_count unsupported for this calling conv");
-    }
-
+    unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+    unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
      unsigned Offset0 = OrderedCountIndex << 2;
      unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
                         (Instruction << 4);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll

index ad489de..4009c5a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
    ret float %r
  }
  
+; FUNC-LABEL: {{^}}ds_ordered_add_default_cc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_default_cc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_fastcc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define fastcc float @ds_ordered_add_fastcc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_func:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_func() {
+  %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
  ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
  ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
  ; GCN: s_mov_b32 m0, s0
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sun, 5 Jan 2020 19:16:22 +0000 (14:16 -0500)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 24 Jan 2020 21:06:44 +0000 (13:06 -0800)
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll		patch \| blob \| history