AMDGPU: Remove modifiers from v_div_scale_*

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 2b4fc53..5bf347e 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
    unsigned Opc
      = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
  
-  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
-  // omod
-  SDValue Ops[8];
-
-  SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
-  SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
-  SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
-  CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
  }
  
  bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td

index 5efa64d..c2a4d4b 100644 (file)
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
  }
  
  class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+  // v_div_scale_{f32|f64} do not support input modifiers.
+  let HasModifiers = 0;
    let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-  let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+  let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
  }
  
  def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
  def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
    let SchedRW = [WriteFloatFMA, WriteSALU];
    let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
  }
  
  // Double precision division pre-scale.
  def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
    let SchedRW = [WriteDouble, WriteSALU];
    let hasExtraSrcRegAllocReq = 1;
+  let AsmMatchConverter = "";
  }
  
  def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir

index 7cc9c7c..85cd903 100644 (file)
--- a/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -63,7 +63,7 @@ body: |
      S_BRANCH %bb.3
  
    bb.3:
-    %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec
+    %vgpr4, %vcc = V_DIV_SCALE_F32 %vgpr1, %vgpr1, %vgpr3, implicit %exec
      %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
      S_ENDPGM
  
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll

index 38e4b84..8e5c62c 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -322,7 +322,8 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a
  ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num:
  ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
+; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]]
  ; SI: buffer_store_dword [[RESULT0]]
  ; SI: s_endpgm
  define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
@@ -344,7 +345,8 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa
  ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den:
  ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
+; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]]
  ; SI: buffer_store_dword [[RESULT0]]
  ; SI: s_endpgm
  define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s

index 908db41..e1dceef 100644 (file)
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@@ -365,10 +365,6 @@ v_div_scale_f32  v24, vcc, v22, v22, v20
  // SICI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x04]
  // VI:   v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
  
-v_div_scale_f32  v24, vcc, s[10:11], v22, v20
-// SICI: v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x0a,0x2c,0x52,0x04]
-// VI:   v_div_scale_f32 v24, vcc, s[10:11], v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x0a,0x2c,0x52,0x04]
-
  v_div_scale_f32  v24, s[10:11], v22, v22, v20
  // SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04]
  // VI:   v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
@@ -378,8 +374,8 @@ v_div_scale_f32  v24, vcc, v22, 1.0, v22
  // VI:   v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0xe5,0x59,0x04]
  
  v_div_scale_f32  v24, vcc, v22, v22, -2.0
-// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd2,0x83]
-// VI:   v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd2,0x83]
+// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03]
+// VI:   v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd6,0x03]
  
  v_div_scale_f32 v24, vcc, v22, v22, 0xc0000000
  // SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03]
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 19 Jan 2017 06:04:12 +0000 (06:04 +0000)
lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/AMDGPU/VOP3Instructions.td		patch \| blob \| history
test/CodeGen/AMDGPU/inserted-wait-states.mir		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll		patch \| blob \| history
test/MC/AMDGPU/vop3.s		patch \| blob \| history