1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+avx512vl,+avx512dq,+avx512bw < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
3 ; RUN: llc -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL
5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-unknown-linux-gnu"
10 define <16 x float> @test1(float* %base) {
13 ; ALL-NEXT: movw $-2049, %ax # imm = 0xF7FF
14 ; ALL-NEXT: kmovw %eax, %k1
15 ; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z}
17 %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
21 define <16 x float> @test2(float* %base, <16 x float> %src0) {
24 ; ALL-NEXT: movw $30719, %ax # imm = 0x77FF
25 ; ALL-NEXT: kmovw %eax, %k1
26 ; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1}
28 %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x float> %src0)
32 define <8 x double> @test3(double* %base, <8 x double> %src0, <8 x i1> %mask) {
35 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
36 ; SKX-NEXT: vpmovw2m %xmm1, %k1
37 ; SKX-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
42 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
43 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
44 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
45 ; KNL-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
47 %res = call <8 x double> @llvm.masked.expandload.v8f64(double* %base, <8 x i1> %mask, <8 x double> %src0)
51 define <4 x float> @test4(float* %base, <4 x float> %src0) {
54 ; SKX-NEXT: movb $7, %al
55 ; SKX-NEXT: kmovb %eax, %k1
56 ; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1}
61 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
62 ; KNL-NEXT: movw $7, %ax
63 ; KNL-NEXT: kmovw %eax, %k1
64 ; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1}
65 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
67 %res = call <4 x float> @llvm.masked.expandload.v4f32(float* %base, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> %src0)
71 define <2 x i64> @test5(i64* %base, <2 x i64> %src0) {
74 ; SKX-NEXT: movb $2, %al
75 ; SKX-NEXT: kmovb %eax, %k1
76 ; SKX-NEXT: vpexpandq (%rdi), %xmm0 {%k1}
81 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
82 ; KNL-NEXT: movb $2, %al
83 ; KNL-NEXT: kmovw %eax, %k1
84 ; KNL-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
85 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
87 %res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0)
91 declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
92 declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
93 declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
94 declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
96 define void @test6(float* %base, <16 x float> %V) {
99 ; ALL-NEXT: movw $-2049, %ax # imm = 0xF7FF
100 ; ALL-NEXT: kmovw %eax, %k1
101 ; ALL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
103 call void @llvm.masked.compressstore.v16f32(<16 x float> %V, float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>)
107 define void @test7(float* %base, <8 x float> %V, <8 x i1> %mask) {
110 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
111 ; SKX-NEXT: vpmovw2m %xmm1, %k1
112 ; SKX-NEXT: vcompressps %ymm0, (%rdi) {%k1}
117 ; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
118 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
119 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
120 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
121 ; KNL-NEXT: kshiftlw $8, %k0, %k0
122 ; KNL-NEXT: kshiftrw $8, %k0, %k1
123 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
125 call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask)
129 define void @test8(double* %base, <8 x double> %V, <8 x i1> %mask) {
132 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
133 ; SKX-NEXT: vpmovw2m %xmm1, %k1
134 ; SKX-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
139 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
140 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
141 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
142 ; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
144 call void @llvm.masked.compressstore.v8f64(<8 x double> %V, double* %base, <8 x i1> %mask)
148 define void @test9(i64* %base, <8 x i64> %V, <8 x i1> %mask) {
151 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
152 ; SKX-NEXT: vpmovw2m %xmm1, %k1
153 ; SKX-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
158 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
159 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
160 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
161 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
163 call void @llvm.masked.compressstore.v8i64(<8 x i64> %V, i64* %base, <8 x i1> %mask)
167 define void @test10(i64* %base, <4 x i64> %V, <4 x i1> %mask) {
170 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1
171 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
172 ; SKX-NEXT: vpcompressq %ymm0, (%rdi) {%k1}
177 ; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
178 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
179 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
180 ; KNL-NEXT: vpmovsxdq %xmm1, %ymm1
181 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
182 ; KNL-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1
183 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
184 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
185 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
187 call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask)
191 define void @test11(i64* %base, <2 x i64> %V, <2 x i1> %mask) {
194 ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
195 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
196 ; SKX-NEXT: vpcompressq %xmm0, (%rdi) {%k1}
201 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
202 ; KNL-NEXT: vpsllq $63, %xmm1, %xmm1
203 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
204 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
205 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
206 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
207 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
208 ; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
209 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
211 call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask)
215 define void @test12(float* %base, <4 x float> %V, <4 x i1> %mask) {
218 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1
219 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
220 ; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1}
225 ; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
226 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
227 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
228 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
229 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
230 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
231 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
232 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
234 call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask)
238 declare void @llvm.masked.compressstore.v16f32(<16 x float>, float* , <16 x i1>)
239 declare void @llvm.masked.compressstore.v8f32(<8 x float>, float* , <8 x i1>)
240 declare void @llvm.masked.compressstore.v8f64(<8 x double>, double* , <8 x i1>)
241 declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32* , <16 x i1>)
242 declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32* , <8 x i1>)
243 declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64* , <8 x i1>)
244 declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32* , <4 x i1>)
245 declare void @llvm.masked.compressstore.v4f32(<4 x float>, float* , <4 x i1>)
246 declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64* , <4 x i1>)
247 declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64* , <2 x i1>)