From 02524a88e42adad84fcdbcf4c3ab2df314a5712a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 11 Feb 2017 07:01:40 +0000 Subject: [PATCH] [AVX-512] Add VPINSRB/W/D/Q instructions to load folding tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 4 ++++ test/CodeGen/X86/stack-folding-int-avx512.ll | 32 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 639838c20b7..eb606ba3214 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1923,6 +1923,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, { X86::VPERMQZrr, X86::VPERMQZrm, 0 }, { X86::VPERMWZrr, X86::VPERMWZrm, 0 }, + { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 }, + { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 }, + { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 }, + { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 }, { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 }, { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 }, { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, diff --git a/test/CodeGen/X86/stack-folding-int-avx512.ll b/test/CodeGen/X86/stack-folding-int-avx512.ll index ae56a9a8750..ab4dfd09c98 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512.ll @@ -583,6 +583,38 @@ define <32 x i16> @stack_fold_permwvar_maskz(<32 x i16> %a0, <32 x i16> %a1, i32 ret <32 x i16> %4 } +define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) { + ;CHECK-LABEL: stack_fold_pinsrb + ;CHECK: vpinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1 + ret <16 x i8> %2 +} + +define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) { + ;CHECK-LABEL: stack_fold_pinsrd + ;CHECK: vpinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1 + ret <4 x i32> %2 +} + +define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) { + ;CHECK-LABEL: stack_fold_pinsrq + ;CHECK: vpinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1 + ret <2 x i64> %2 +} + +define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) { + ;CHECK-LABEL: stack_fold_pinsrw + ;CHECK: vpinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1 + ret <8 x i16> %2 +} + define <32 x i16> @stack_fold_pmaddubsw_zmm(<64 x i8> %a0, <64 x i8> %a1) { ;CHECK-LABEL: stack_fold_pmaddubsw_zmm ;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload -- 2.11.0