From: Amy Kwan Date: Thu, 18 Jun 2020 18:11:09 +0000 (-0500) Subject: [PowerPC][Power10] Implement Parallel Bits Deposit/Extract Builtins in LLVM/Clang X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=c45c1611303b4609016fa69c1c987ede3bf92006;p=android-x86%2Fexternal-llvm-project.git [PowerPC][Power10] Implement Parallel Bits Deposit/Extract Builtins in LLVM/Clang This patch implements builtins for the following prototypes: vector unsigned long long vec_pdep(vector unsigned long long, vector unsigned long long); vector unsigned long long vec_pext(vector unsigned long long, vector unsigned long long __b); unsigned long long __builtin_pdepd (unsigned long long, unsigned long long); unsigned long long __builtin_pextd (unsigned long long, unsigned long long); Revision Depends on D80758 Differential Revision: https://reviews.llvm.org/D80935 --- diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 314e1cc0590..30077e2e8d0 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,10 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Parallel Bits built-ins. +BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -470,6 +474,8 @@ BUILTIN(__builtin_divweu, "UiUiUi", "") BUILTIN(__builtin_divde, "SLLiSLLiSLLi", "") BUILTIN(__builtin_divdeu, "ULLiULLiULLi", "") BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "") +BUILTIN(__builtin_pdepd, "ULLiULLiULLi", "") +BUILTIN(__builtin_pextd, "ULLiULLiULLi", "") // Vector int128 (un)pack BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 7e231a2a428..1e1e57cd1ff 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16761,6 +16761,23 @@ static vector signed short __ATTRS_o_ai vec_nabs(vector signed short __a) { static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { return __builtin_altivec_vminsb(__a, -__a); } + +#ifdef __POWER10_VECTOR__ +/* vec_pdep */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_pdep(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vpdepd(__a, __b); +} + +/* vec_pext */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_pext(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vpextd(__a, __b); +} +#endif /* __POWER10_VECTOR__ */ + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ diff --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/builtins-ppc-p10.c new file mode 100644 index 00000000000..c21e8026d0c --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-p10.c @@ -0,0 +1,15 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +unsigned long long ulla, ullb; + +unsigned long long test_pdepd(void) { + // CHECK: @llvm.ppc.pdepd + return __builtin_pdepd(ulla, ullb); +} + +unsigned long long test_pextd(void) { + // CHECK: @llvm.ppc.pextd + return __builtin_pextd(ulla, ullb); +} diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c new file mode 100644 index 00000000000..31c24f382f1 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -0,0 +1,20 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s + +#include + +vector unsigned long long vulla, vullb; + +vector unsigned long long test_vpdepd(void) { + // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_pdep(vulla, vullb); +} + +vector unsigned long long test_vpextd(void) { + // CHECK: @llvm.ppc.altivec.vpextd(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_pext(vulla, vullb); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 8e4bfed83e0..79a3221f014 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -60,6 +60,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Parallel Bits Deposit/Extract Doubleword Builtins. + def int_ppc_pdepd + : GCCBuiltin<"__builtin_pdepd">, + Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_pextd + : GCCBuiltin<"__builtin_pextd">, + Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_truncf128_round_to_odd : GCCBuiltin<"__builtin_truncf128_round_to_odd">, Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>; @@ -402,6 +410,13 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. + def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vpextd : GCCBuiltin<"__builtin_altivec_vpextd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; } // Vector average. diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index a4a9688e3de..a90cba09c61 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -191,6 +191,7 @@ multiclass 8LS_DForm_R_SI34_XT6_RA5_p opcode, dag OOL, dag IOL, } def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">; +def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">; let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { @@ -500,3 +501,19 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in { def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; } +let Predicates = [IsISA3_1] in { + def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpdepd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>; + def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vpextd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>; + def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "pdepd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>; + def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "pextd $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; +} diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index f4309dbde48..0a1ae7e55b3 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -41,8 +41,10 @@ def P9Model : SchedMachineModel { let CompleteModel = 1; // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing - // Engine), prefixed instructions on Power 9 or PC relative mem ops. - let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops]; + // Engine), prefixed instructions on Power 9, PC relative mem ops, or + // instructions introduced in ISA 3.1. + let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops, + IsISA3_1]; } diff --git a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll new file mode 100644 index 00000000000..fc2ebf89079 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; These test cases aim to test the bit manipulation operations on Power10. + +declare <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64>, <2 x i64>) +declare i64 @llvm.ppc.pdepd(i64, i64) +declare i64 @llvm.ppc.pextd(i64, i64) + +define <2 x i64> @test_vpdepd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpdepd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpdepd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vpdepd(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %tmp +} + +define <2 x i64> @test_vpextd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpextd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpextd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %tmp +} + +define i64 @test_pdepd(i64 %a, i64 %b) { +; CHECK-LABEL: test_pdepd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pdepd r3, r3, r4 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.pdepd(i64 %a, i64 %b) + ret i64 %tmp +} + +define i64 @test_pextd(i64 %a, i64 %b) { +; CHECK-LABEL: test_pextd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pextd r3, r3, r4 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.pextd(i64 %a, i64 %b) + ret i64 %tmp +} diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt new file mode 100644 index 00000000000..ac95e30fbde --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -0,0 +1,14 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \ +# RUN: -mcpu=pwr10 | FileCheck %s + +# CHECK: vpdepd 1, 2, 0 +0x10 0x22 0x05 0xcd + +# CHECK: vpextd 1, 2, 0 +0x10 0x22 0x05 0x8d + +# CHECK: pdepd 1, 2, 4 +0x7c 0x41 0x21 0x38 + +# CHECK: pextd 1, 2, 4 +0x7c 0x41 0x21 0x78 diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s new file mode 100644 index 00000000000..d2b399c531b --- /dev/null +++ b/llvm/test/MC/PowerPC/p10.s @@ -0,0 +1,17 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-LE %s + +# CHECK-BE: vpdepd 1, 2, 0 # encoding: [0x10,0x22,0x05,0xcd] +# CHECK-LE: vpdepd 1, 2, 0 # encoding: [0xcd,0x05,0x22,0x10] + vpdepd 1, 2, 0 +# CHECK-BE: vpextd 1, 2, 0 # encoding: [0x10,0x22,0x05,0x8d] +# CHECK-LE: vpextd 1, 2, 0 # encoding: [0x8d,0x05,0x22,0x10] + vpextd 1, 2, 0 +# CHECK-BE: pdepd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x38] +# CHECK-LE: pdepd 1, 2, 4 # encoding: [0x38,0x21,0x41,0x7c] + pdepd 1, 2, 4 +# CHECK-BE: pextd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x78] +# CHECK-LE: pextd 1, 2, 4 # encoding: [0x78,0x21,0x41,0x7c] + pextd 1, 2, 4