From 8352b7457721b917463532983f568da118a10873 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 26 May 2015 15:55:52 +0000 Subject: [PATCH] R600/SI: Add assembler support for all CI and VI VOP2 instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238211 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.td | 14 +- lib/Target/R600/SIInstructions.td | 8 + lib/Target/R600/VIInstructions.td | 51 ++++++ test/MC/R600/vop2.s | 325 +++++++++++++++++++++++++++++--------- 4 files changed, 322 insertions(+), 76 deletions(-) diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 587ddb5cd1a..4fc24989b3b 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1034,12 +1034,16 @@ class VOPProfile _ArgVT> { field string Asm64 = getAsm64.ret; } -// FIXME: I think these F16 profiles will need to use f16 types in order +// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order // for the instruction patterns to work. def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>; +def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>; +def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>; +def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>; + def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; @@ -1533,12 +1537,16 @@ let isCodeGenOnly = 0 in { def _si : VOP2Common , SIMCInstr , - VOP2_MADKe ; + VOP2_MADKe { + let AssemblerPredicates = [isSICI]; + } def _vi : VOP2Common , SIMCInstr , - VOP2_MADKe ; + VOP2_MADKe { + let AssemblerPredicates = [isVI]; + } } // End isCodeGenOnly = 0 } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d92c4b62398..839c2e9ecdd 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -3424,4 +3424,12 @@ def : Pat < def : SHA256MaPattern ; +//============================================================================// +// Assembler aliases +//============================================================================// + +def : MnemonicAlias<"v_add_u32", "v_add_i32">; +def : MnemonicAlias<"v_sub_u32", "v_sub_i32">; +def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">; + } // End isGCN predicate diff --git a/lib/Target/R600/VIInstructions.td b/lib/Target/R600/VIInstructions.td index 9b85cbced68..5bf86e649ce 100644 --- a/lib/Target/R600/VIInstructions.td +++ b/lib/Target/R600/VIInstructions.td @@ -38,6 +38,57 @@ defm V_FRACT_F16 : VOP1Inst , "v_fract_f16", VOP_F16_F16>; defm V_SIN_F16 : VOP1Inst , "v_sin_f16", VOP_F16_F16>; defm V_COS_F16 : VOP1Inst , "v_cos_f16", VOP_F16_F16>; +//===----------------------------------------------------------------------===// +// VOP2 Instructions +//===----------------------------------------------------------------------===// + +let isCommutable = 1 in { + +defm V_ADD_F16 : VOP2Inst , "v_add_f16", VOP_F16_F16_F16>; +defm V_SUB_F16 : VOP2Inst , "v_sub_f16", VOP_F16_F16_F16>; +defm V_SUBREV_F16 : VOP2Inst , "v_subrev_f16", VOP_F16_F16_F16, + null_frag, "v_sub_f16" +>; +defm V_MUL_F16 : VOP2Inst , "v_mul_f16", VOP_F16_F16_F16>; +defm V_MAC_F16 : VOP2Inst , "v_mac_f16", VOP_F16_F16_F16>; +} // End isCommutable = 1 +defm V_MADMK_F16 : VOP2MADK , "v_madmk_f16">; +let isCommutable = 1 in { +defm V_MADAK_F16 : VOP2MADK , "v_madak_f16">; +defm V_ADD_U16 : VOP2Inst , "v_add_u16", VOP_I16_I16_I16>; +defm V_SUB_U16 : VOP2Inst , "v_sub_u16" , VOP_I16_I16_I16>; +defm V_SUBREV_U16 : VOP2Inst , "v_subrev_u16", VOP_I16_I16_I16>; +defm V_MUL_LO_U16 : VOP2Inst , "v_mul_lo_u16", VOP_I16_I16_I16>; +} // End isCommutable = 1 +defm V_LSHLREV_B16 : VOP2Inst , "v_lshlrev_b16", VOP_I16_I16_I16>; +defm V_LSHRREV_B16 : VOP2Inst , "v_lshrrev_b16", VOP_I16_I16_I16>; +defm V_ASHRREV_B16 : VOP2Inst , "v_ashrrev_b16", VOP_I16_I16_I16>; +let isCommutable = 1 in { +defm V_MAX_F16 : VOP2Inst , "v_max_f16", VOP_F16_F16_F16>; +defm V_MIN_F16 : VOP2Inst , "v_min_f16", VOP_F16_F16_F16>; +defm V_MAX_U16 : VOP2Inst , "v_max_u16", VOP_I16_I16_I16>; +defm V_MAX_I16 : VOP2Inst , "v_max_i16", VOP_I16_I16_I16>; +defm V_MIN_U16 : VOP2Inst , "v_min_u16", VOP_I16_I16_I16>; +defm V_MIN_I16 : VOP2Inst , "v_min_i16", VOP_I16_I16_I16>; +} // End isCommutable = 1 +defm V_LDEXP_F16 : VOP2Inst , "v_ldexp_f16", VOP_F16_F16_I16>; + +// Aliases to simplify matching of floating-pint instructions that are VOP2 on +// SI and VOP3 on VI. + +class SI2_VI3Alias : InstAlias < + name#" $dst, $src0, $src1", + (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0) +>, PredicateControl { + let UseInstAsmMatchConverter = 0; +} + +def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; +def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI //===----------------------------------------------------------------------===// diff --git a/test/MC/R600/vop2.s b/test/MC/R600/vop2.s index 678008852f1..a1f3b8d8936 100644 --- a/test/MC/R600/vop2.s +++ b/test/MC/R600/vop2.s @@ -1,5 +1,12 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s -// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=VI + +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI //===----------------------------------------------------------------------===// // Generic Checks for floating-point instructions (These have modifiers). @@ -8,23 +15,23 @@ // TODO: 64-bit encoding of instructions with modifiers // _e32 suffix -// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +// SICI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] v_add_f32_e32 v1, v2, v3 // src0 inline immediate -// CHECK: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06] +// SICI: v_add_f32_e32 v1, 1.0, v3 ; encoding: [0xf2,0x06,0x02,0x06] v_add_f32 v1, 1.0, v3 // src0 negative inline immediate -// CHECK: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06] +// SICI: v_add_f32_e32 v1, -1.0, v3 ; encoding: [0xf3,0x06,0x02,0x06] v_add_f32 v1, -1.0, v3 // src0 literal -// CHECK: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42] +// SICI: v_add_f32_e32 v1, 0x42c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0x42] v_add_f32 v1, 100.0, v3 // src0 negative literal -// CHECK: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2] +// SICI: v_add_f32_e32 v1, 0xc2c80000, v3 ; encoding: [0xff,0x06,0x02,0x06,0x00,0x00,0xc8,0xc2] v_add_f32 v1, -100.0, v3 //===----------------------------------------------------------------------===// @@ -32,35 +39,35 @@ v_add_f32 v1, -100.0, v3 //===----------------------------------------------------------------------===// // _e32 suffix -// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +// SICI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] v_mul_i32_i24_e32 v1, v2, v3 // _e64 suffix -// CHECK: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00] +// SICI: v_mul_i32_i24_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x02,0x00] v_mul_i32_i24_e64 v1, v2, v3 // src0 inline -// CHECK: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] +// SICI: v_mul_i32_i24_e32 v1, 3, v3 ; encoding: [0x83,0x06,0x02,0x12] v_mul_i32_i24 v1, 3, v3 // src0 negative inline -// CHECK: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] +// SICI: v_mul_i32_i24_e32 v1, -3, v3 ; encoding: [0xc3,0x06,0x02,0x12] v_mul_i32_i24 v1, -3, v3 // src1 inline -// CHECK: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] +// SICI: v_mul_i32_i24_e64 v1, v2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x01,0x00] v_mul_i32_i24 v1, v2, 3 // src1 negative inline -// CHECK: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] +// SICI: v_mul_i32_i24_e64 v1, v2, -3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x87,0x01,0x00] v_mul_i32_i24 v1, v2, -3 // src0 literal -// CHECK: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00] +// SICI: v_mul_i32_i24_e32 v1, 0x64, v3 ; encoding: [0xff,0x06,0x02,0x12,0x64,0x00,0x00,0x00] v_mul_i32_i24 v1, 100, v3 // src1 negative literal -// CHECK: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff] +// SICI: v_mul_i32_i24_e32 v1, 0xffffff9c, v3 ; encoding: [0xff,0x06,0x02,0x12,0x9c,0xff,0xff,0xff] v_mul_i32_i24 v1, -100, v3 //===----------------------------------------------------------------------===// @@ -68,175 +75,347 @@ v_mul_i32_i24 v1, -100, v3 //===----------------------------------------------------------------------===// // src0 sgpr -// CHECK: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12] +// SICI: v_mul_i32_i24_e32 v1, s2, v3 ; encoding: [0x02,0x06,0x02,0x12] v_mul_i32_i24 v1, s2, v3 // src1 sgpr -// CHECK: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00] +// SICI: v_mul_i32_i24_e64 v1, v2, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x07,0x00,0x00] v_mul_i32_i24 v1, v2, s3 // src0, src1 same sgpr -// CHECK: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00] +// SICI: v_mul_i32_i24_e64 v1, s2, s2 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x04,0x00,0x00] v_mul_i32_i24 v1, s2, s2 // src0 sgpr, src1 inline -// CHECK: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00] +// SICI: v_mul_i32_i24_e64 v1, s2, 3 ; encoding: [0x01,0x00,0x12,0xd2,0x02,0x06,0x01,0x00] v_mul_i32_i24 v1, s2, 3 // src0 inline src1 sgpr -// CHECK: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] +// SICI: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] v_mul_i32_i24 v1, 3, s3 //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -// CHECK: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00] +// GCN: v_cndmask_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x00] v_cndmask_b32 v1, v2, v3 -// CHECK: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02] +// SICI: v_readlane_b32 s1, v2, s3 ; encoding: [0x02,0x07,0x02,0x02] +// VI: v_readlane_b32 s1, v2, s3 ; encoding: [0x01,0x00,0x89,0xd2,0x02,0x07,0x00,0x00] v_readlane_b32 s1, v2, s3 -// CHECK: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04] +// SICI: v_writelane_b32 v1, s2, s3 ; encoding: [0x02,0x06,0x02,0x04] +// VI: v_writelane_b32 v1, s2, s3 ; encoding: [0x01,0x00,0x8a,0xd2,0x02,0x06,0x00,0x00] v_writelane_b32 v1, s2, s3 -// CHECK: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +// SICI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] +// VI: v_add_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x02] v_add_f32 v1, v2, v3 -// CHECK: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08] +// SICI: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08] +// VI: v_sub_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x04] v_sub_f32 v1, v2, v3 -// CHECK: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a] +// SICI: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a] +// VI: v_subrev_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x06] v_subrev_f32 v1, v2, v3 -// CHECK: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] +// SICI: v_mac_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_mac_legacy_f32 v1, v2, v3 v_mac_legacy_f32 v1, v2, v3 -// CHECK: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] +// SICI: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] +// VI: v_mul_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x08] v_mul_legacy_f32_e32 v1, v2, v3 -// CHECK: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] +// SICI: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] +// VI: v_mul_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0a] v_mul_f32 v1, v2, v3 -// CHECK: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +// SICI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] +// VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] v_mul_i32_i24 v1, v2, v3 -// CHECK: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] +// SICI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] +// VI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] v_mul_hi_i32_i24 v1, v2, v3 -// CHECK: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] +// SICI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] +// VI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] v_mul_u32_u24 v1, v2, v3 -// CHECK: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] +// SICI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] +// VI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] v_mul_hi_u32_u24 v1, v2, v3 -// CHECK: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] +// SICI: v_min_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_min_legacy_f32_e32 v1, v2, v3 v_min_legacy_f32_e32 v1, v2, v3 -// CHECK: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] +// SICI: v_max_legacy_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_max_legacy_f32 v1, v2, v3 v_max_legacy_f32 v1, v2, v3 -// CHECK: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] +// SICI: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] +// VI: v_min_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x14] v_min_f32_e32 v1, v2, v3 -// CHECK: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] +// SICI: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] +// VI: v_max_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x16] v_max_f32 v1, v2 v3 -// CHECK: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] +// SICI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] +// VI: v_min_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x18] v_min_i32 v1, v2, v3 -// CHECK: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] +// SICI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] +// VI: v_max_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1a] v_max_i32 v1, v2, v3 -// CHECK: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] +// SICI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] +// VI: v_min_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1c] v_min_u32 v1, v2, v3 -// CHECK: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] +// SICI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] +// VI: v_max_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x1e] v_max_u32 v1, v2, v3 -// CHECK: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] +// SICI: v_lshr_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_lshr_b32 v1, v2, v3 v_lshr_b32 v1, v2, v3 -// CHECK: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] +// SICI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] +// VI: v_lshrrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x20] v_lshrrev_b32 v1, v2, v3 -// CHECK: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] +// SICI: v_ashr_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2e] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_ashr_i32 v1, v2, v3 v_ashr_i32 v1, v2, v3 -// CHECK: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] +// SICI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x30] +// VI: v_ashrrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x22] v_ashrrev_i32 v1, v2, v3 -// CHECK: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +// SICI: v_lshl_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +// NOVI: error: instruction not supported on this GPU +// NOVI: v_lshl_b32_e32 v1, v2, v3 v_lshl_b32_e32 v1, v2, v3 -// CHECK: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +// SICI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +// VI: v_lshlrev_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x24] v_lshlrev_b32 v1, v2, v3 -// CHECK: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +// SICI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +// VI: v_and_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x26] v_and_b32 v1, v2, v3 -// CHECK: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +// SICI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +// VI: v_or_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x28] v_or_b32 v1, v2, v3 -// CHECK: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +// SICI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +// VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] v_xor_b32 v1, v2, v3 -// CHECK: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +// SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] v_bfm_b32 v1, v2, v3 -// CHECK: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] +// SICI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] +// VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] v_mac_f32 v1, v2, v3 -// CHECK: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] +// SICI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42] +// VI: v_madmk_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42] v_madmk_f32 v1, v2, v3, 64.0 -// CHECK: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42] +// SICI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42] +// VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] v_madak_f32 v1, v2, v3, 64.0 -// CHECK: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +// SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] v_bcnt_u32_b32 v1, v2, v3 -// CHECK: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +// SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_lo_u32_b32 v1, v2, v3 -// CHECK: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] -v_mbcnt_hi_u32_b32_e32 v1, v2, v3 +// SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] +// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] +v_mbcnt_hi_u32_b32 v1, v2, v3 -// CHECK: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] v_add_i32 v1, v2, v3 -// CHECK: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -v_sub_i32_e32 v1, v2, v3 +// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_add_u32 v1, v2, v3 + +// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_i32 v1, v2, v3 + +// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_u32 v1, v2, v3 -// CHECK: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] v_subrev_i32 v1, v2, v3 -// CHECK : v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_subrev_u32 v1, v2, v3 + +// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] v_addc_u32 v1, v2, v3 -// CHECK: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] v_subb_u32 v1, v2, v3 -// CHECK: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] v_subbrev_u32 v1, v2, v3 -// CHECK: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] +// SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] +// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] v_ldexp_f32 v1, v2, v3 -// CHECK: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] +// SICI: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] +// VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00] v_cvt_pkaccum_u8_f32 v1, v2, v3 -// CHECK: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] +// SICI: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] +// VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00] v_cvt_pknorm_i16_f32 v1, v2, v3 -// CHECK: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] +// SICI: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] +// VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00] v_cvt_pknorm_u16_f32 v1, v2, v3 -// CHECK: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] +// SICI: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] +// VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] v_cvt_pkrtz_f16_f32 v1, v2, v3 -// CHECK: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] -v_cvt_pk_u16_u32_e32 v1, v2, v3 +// SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] +v_cvt_pk_u16_u32 v1, v2, v3 -// CHECK: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +// SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_i16_i32 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_add_f16 v1, v2, v3 +// VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] +v_add_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sub_f16 v1, v2, v3 +// VI: v_sub_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x40] +v_sub_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_subrev_f16 v1, v2, v3 +// VI: v_subrev_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x42] +v_subrev_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mul_f16 v1, v2, v3 +// VI: v_mul_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] +v_mul_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mac_f16 v1, v2, v3 +// VI: v_mac_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] +v_mac_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_madmk_f16 v1, v2, v3, 64.0 +// VI: v_madmk_f16_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42] +v_madmk_f16 v1, v2, v3, 64.0 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_madak_f16 v1, v2, v3, 64.0 +// VI: v_madak_f16_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x00,0x80,0x42] +v_madak_f16 v1, v2, v3, 64.0 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_add_u16 v1, v2, v3 +// VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +v_add_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_sub_u16 v1, v2, v3 +// VI: v_sub_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +v_sub_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_subrev_u16 v1, v2, v3 +// VI: v_subrev_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +v_subrev_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_mul_lo_u16 v1, v2, v3 +// VI: v_mul_lo_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +v_mul_lo_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_lshlrev_b16 v1, v2, v3 +// VI: v_lshlrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +v_lshlrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_lshrrev_b16 v1, v2, v3 +// VI: v_lshrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] +v_lshrrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_ashrrev_b16 v1, v2, v3 +// VI: v_ashrrev_b16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] +v_ashrrev_b16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_f16 v1, v2, v3 +// VI: v_max_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] +v_max_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_f16 v1, v2, v3 +// VI: v_min_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] +v_min_f16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_u16 v1, v2, v3 +// VI: v_max_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] +v_max_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_max_i16 v1, v2, v3 +// VI: v_max_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] +v_max_i16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_u16 v1, v2, v3 +// VI: v_min_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] +v_min_u16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_min_i16 v1, v2, v3 +// VI: v_min_i16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x64] +v_min_i16 v1, v2, v3 + +// NOSICI: error: instruction not supported on this GPU +// NOSICI: v_ldexp_f16 v1, v2, v3 +// VI: v_ldexp_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x66] +v_ldexp_f16 v1, v2, v3 -- 2.11.0