1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
5 ; This test is an assembly of avx512 instructions to check their scheduling
7 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 ; GENERIC-LABEL: addpd512:
9 ; GENERIC: # %bb.0: # %entry
10 ; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
11 ; GENERIC-NEXT: retq # sched: [1:1.00]
13 ; SKX-LABEL: addpd512:
14 ; SKX: # %bb.0: # %entry
15 ; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
16 ; SKX-NEXT: retq # sched: [7:1.00]
18 %add.i = fadd <8 x double> %x, %y
19 ret <8 x double> %add.i
22 define <8 x double> @addpd512fold(<8 x double> %y) {
23 ; GENERIC-LABEL: addpd512fold:
24 ; GENERIC: # %bb.0: # %entry
25 ; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
26 ; GENERIC-NEXT: retq # sched: [1:1.00]
28 ; SKX-LABEL: addpd512fold:
29 ; SKX: # %bb.0: # %entry
30 ; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
31 ; SKX-NEXT: retq # sched: [7:1.00]
33 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
34 ret <8 x double> %add.i
37 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
38 ; GENERIC-LABEL: addps512:
39 ; GENERIC: # %bb.0: # %entry
40 ; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
41 ; GENERIC-NEXT: retq # sched: [1:1.00]
43 ; SKX-LABEL: addps512:
44 ; SKX: # %bb.0: # %entry
45 ; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
46 ; SKX-NEXT: retq # sched: [7:1.00]
48 %add.i = fadd <16 x float> %x, %y
49 ret <16 x float> %add.i
52 define <16 x float> @addps512fold(<16 x float> %y) {
53 ; GENERIC-LABEL: addps512fold:
54 ; GENERIC: # %bb.0: # %entry
55 ; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [7:1.00]
56 ; GENERIC-NEXT: retq # sched: [1:1.00]
58 ; SKX-LABEL: addps512fold:
59 ; SKX: # %bb.0: # %entry
60 ; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
61 ; SKX-NEXT: retq # sched: [7:1.00]
63 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
64 ret <16 x float> %add.i
67 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
68 ; GENERIC-LABEL: subpd512:
69 ; GENERIC: # %bb.0: # %entry
70 ; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
71 ; GENERIC-NEXT: retq # sched: [1:1.00]
73 ; SKX-LABEL: subpd512:
74 ; SKX: # %bb.0: # %entry
75 ; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
76 ; SKX-NEXT: retq # sched: [7:1.00]
78 %sub.i = fsub <8 x double> %x, %y
79 ret <8 x double> %sub.i
82 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
83 ; GENERIC-LABEL: subpd512fold:
84 ; GENERIC: # %bb.0: # %entry
85 ; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
86 ; GENERIC-NEXT: retq # sched: [1:1.00]
88 ; SKX-LABEL: subpd512fold:
89 ; SKX: # %bb.0: # %entry
90 ; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
91 ; SKX-NEXT: retq # sched: [7:1.00]
93 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
94 %sub.i = fsub <8 x double> %y, %tmp2
95 ret <8 x double> %sub.i
98 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
99 ; GENERIC-LABEL: subps512:
100 ; GENERIC: # %bb.0: # %entry
101 ; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
102 ; GENERIC-NEXT: retq # sched: [1:1.00]
104 ; SKX-LABEL: subps512:
105 ; SKX: # %bb.0: # %entry
106 ; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
107 ; SKX-NEXT: retq # sched: [7:1.00]
109 %sub.i = fsub <16 x float> %x, %y
110 ret <16 x float> %sub.i
113 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
114 ; GENERIC-LABEL: subps512fold:
115 ; GENERIC: # %bb.0: # %entry
116 ; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
117 ; GENERIC-NEXT: retq # sched: [1:1.00]
119 ; SKX-LABEL: subps512fold:
120 ; SKX: # %bb.0: # %entry
121 ; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
122 ; SKX-NEXT: retq # sched: [7:1.00]
124 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
125 %sub.i = fsub <16 x float> %y, %tmp2
126 ret <16 x float> %sub.i
129 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
130 ; GENERIC-LABEL: imulq512:
132 ; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
133 ; GENERIC-NEXT: retq # sched: [1:1.00]
135 ; SKX-LABEL: imulq512:
137 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00]
138 ; SKX-NEXT: retq # sched: [7:1.00]
139 %z = mul <8 x i64>%x, %y
143 define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
144 ; GENERIC-LABEL: imulq256:
146 ; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
147 ; GENERIC-NEXT: retq # sched: [1:1.00]
149 ; SKX-LABEL: imulq256:
151 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
152 ; SKX-NEXT: retq # sched: [7:1.00]
153 %z = mul <4 x i64>%x, %y
157 define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
158 ; GENERIC-LABEL: imulq128:
160 ; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
161 ; GENERIC-NEXT: retq # sched: [1:1.00]
163 ; SKX-LABEL: imulq128:
165 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
166 ; SKX-NEXT: retq # sched: [7:1.00]
167 %z = mul <2 x i64>%x, %y
171 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
172 ; GENERIC-LABEL: mulpd512:
173 ; GENERIC: # %bb.0: # %entry
174 ; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
175 ; GENERIC-NEXT: retq # sched: [1:1.00]
177 ; SKX-LABEL: mulpd512:
178 ; SKX: # %bb.0: # %entry
179 ; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
180 ; SKX-NEXT: retq # sched: [7:1.00]
182 %mul.i = fmul <8 x double> %x, %y
183 ret <8 x double> %mul.i
186 define <8 x double> @mulpd512fold(<8 x double> %y) {
187 ; GENERIC-LABEL: mulpd512fold:
188 ; GENERIC: # %bb.0: # %entry
189 ; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
190 ; GENERIC-NEXT: retq # sched: [1:1.00]
192 ; SKX-LABEL: mulpd512fold:
193 ; SKX: # %bb.0: # %entry
194 ; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
195 ; SKX-NEXT: retq # sched: [7:1.00]
197 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
198 ret <8 x double> %mul.i
201 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
202 ; GENERIC-LABEL: mulps512:
203 ; GENERIC: # %bb.0: # %entry
204 ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
205 ; GENERIC-NEXT: retq # sched: [1:1.00]
207 ; SKX-LABEL: mulps512:
208 ; SKX: # %bb.0: # %entry
209 ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
210 ; SKX-NEXT: retq # sched: [7:1.00]
212 %mul.i = fmul <16 x float> %x, %y
213 ret <16 x float> %mul.i
216 define <16 x float> @mulps512fold(<16 x float> %y) {
217 ; GENERIC-LABEL: mulps512fold:
218 ; GENERIC: # %bb.0: # %entry
219 ; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [9:1.00]
220 ; GENERIC-NEXT: retq # sched: [1:1.00]
222 ; SKX-LABEL: mulps512fold:
223 ; SKX: # %bb.0: # %entry
224 ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
225 ; SKX-NEXT: retq # sched: [7:1.00]
227 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
228 ret <16 x float> %mul.i
231 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
232 ; GENERIC-LABEL: divpd512:
233 ; GENERIC: # %bb.0: # %entry
234 ; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
235 ; GENERIC-NEXT: retq # sched: [1:1.00]
237 ; SKX-LABEL: divpd512:
238 ; SKX: # %bb.0: # %entry
239 ; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:2.00]
240 ; SKX-NEXT: retq # sched: [7:1.00]
242 %div.i = fdiv <8 x double> %x, %y
243 ret <8 x double> %div.i
246 define <8 x double> @divpd512fold(<8 x double> %y) {
247 ; GENERIC-LABEL: divpd512fold:
248 ; GENERIC: # %bb.0: # %entry
249 ; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
250 ; GENERIC-NEXT: retq # sched: [1:1.00]
252 ; SKX-LABEL: divpd512fold:
253 ; SKX: # %bb.0: # %entry
254 ; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:2.00]
255 ; SKX-NEXT: retq # sched: [7:1.00]
257 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
258 ret <8 x double> %div.i
261 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
262 ; GENERIC-LABEL: divps512:
263 ; GENERIC: # %bb.0: # %entry
264 ; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [24:1.00]
265 ; GENERIC-NEXT: retq # sched: [1:1.00]
267 ; SKX-LABEL: divps512:
268 ; SKX: # %bb.0: # %entry
269 ; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [23:2.00]
270 ; SKX-NEXT: retq # sched: [7:1.00]
272 %div.i = fdiv <16 x float> %x, %y
273 ret <16 x float> %div.i
276 define <16 x float> @divps512fold(<16 x float> %y) {
277 ; GENERIC-LABEL: divps512fold:
278 ; GENERIC: # %bb.0: # %entry
279 ; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [28:1.00]
280 ; GENERIC-NEXT: retq # sched: [1:1.00]
282 ; SKX-LABEL: divps512fold:
283 ; SKX: # %bb.0: # %entry
284 ; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [24:2.00]
285 ; SKX-NEXT: retq # sched: [7:1.00]
287 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
288 ret <16 x float> %div.i
291 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
292 ; GENERIC-LABEL: vpaddq_test:
294 ; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
295 ; GENERIC-NEXT: retq # sched: [1:1.00]
297 ; SKX-LABEL: vpaddq_test:
299 ; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
300 ; SKX-NEXT: retq # sched: [7:1.00]
301 %x = add <8 x i64> %i, %j
305 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
306 ; GENERIC-LABEL: vpaddq_fold_test:
308 ; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
309 ; GENERIC-NEXT: retq # sched: [1:1.00]
311 ; SKX-LABEL: vpaddq_fold_test:
313 ; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
314 ; SKX-NEXT: retq # sched: [7:1.00]
315 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
316 %x = add <8 x i64> %i, %tmp
320 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
321 ; GENERIC-LABEL: vpaddq_broadcast_test:
323 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
324 ; GENERIC-NEXT: retq # sched: [1:1.00]
326 ; SKX-LABEL: vpaddq_broadcast_test:
328 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
329 ; SKX-NEXT: retq # sched: [7:1.00]
330 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
334 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
335 ; GENERIC-LABEL: vpaddq_broadcast2_test:
337 ; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
338 ; GENERIC-NEXT: retq # sched: [1:1.00]
340 ; SKX-LABEL: vpaddq_broadcast2_test:
342 ; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
343 ; SKX-NEXT: retq # sched: [7:1.00]
344 %tmp = load i64, i64* %j
345 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
346 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
347 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
348 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
349 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
350 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
351 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
352 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
353 %x = add <8 x i64> %i, %j.7
357 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
358 ; GENERIC-LABEL: vpaddd_test:
360 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
361 ; GENERIC-NEXT: retq # sched: [1:1.00]
363 ; SKX-LABEL: vpaddd_test:
365 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
366 ; SKX-NEXT: retq # sched: [7:1.00]
367 %x = add <16 x i32> %i, %j
371 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
372 ; GENERIC-LABEL: vpaddd_fold_test:
374 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
375 ; GENERIC-NEXT: retq # sched: [1:1.00]
377 ; SKX-LABEL: vpaddd_fold_test:
379 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
380 ; SKX-NEXT: retq # sched: [7:1.00]
381 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
382 %x = add <16 x i32> %i, %tmp
386 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
387 ; GENERIC-LABEL: vpaddd_broadcast_test:
389 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
390 ; GENERIC-NEXT: retq # sched: [1:1.00]
392 ; SKX-LABEL: vpaddd_broadcast_test:
394 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
395 ; SKX-NEXT: retq # sched: [7:1.00]
396 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
400 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
401 ; GENERIC-LABEL: vpaddd_mask_test:
403 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
404 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
405 ; GENERIC-NEXT: retq # sched: [1:1.00]
407 ; SKX-LABEL: vpaddd_mask_test:
409 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
410 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33]
411 ; SKX-NEXT: retq # sched: [7:1.00]
412 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
413 %x = add <16 x i32> %i, %j
414 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
418 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
419 ; GENERIC-LABEL: vpaddd_maskz_test:
421 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
422 ; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
423 ; GENERIC-NEXT: retq # sched: [1:1.00]
425 ; SKX-LABEL: vpaddd_maskz_test:
427 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
428 ; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
429 ; SKX-NEXT: retq # sched: [7:1.00]
430 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
431 %x = add <16 x i32> %i, %j
432 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
436 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
437 ; GENERIC-LABEL: vpaddd_mask_fold_test:
439 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
440 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00]
441 ; GENERIC-NEXT: retq # sched: [1:1.00]
443 ; SKX-LABEL: vpaddd_mask_fold_test:
445 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
446 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
447 ; SKX-NEXT: retq # sched: [7:1.00]
448 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
449 %j = load <16 x i32>, <16 x i32>* %j.ptr
450 %x = add <16 x i32> %i, %j
451 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
455 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
456 ; GENERIC-LABEL: vpaddd_mask_broadcast_test:
458 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
459 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00]
460 ; GENERIC-NEXT: retq # sched: [1:1.00]
462 ; SKX-LABEL: vpaddd_mask_broadcast_test:
464 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
465 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
466 ; SKX-NEXT: retq # sched: [7:1.00]
467 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
468 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
469 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
473 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
474 ; GENERIC-LABEL: vpaddd_maskz_fold_test:
476 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
477 ; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
478 ; GENERIC-NEXT: retq # sched: [1:1.00]
480 ; SKX-LABEL: vpaddd_maskz_fold_test:
482 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
483 ; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
484 ; SKX-NEXT: retq # sched: [7:1.00]
485 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
486 %j = load <16 x i32>, <16 x i32>* %j.ptr
487 %x = add <16 x i32> %i, %j
488 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
492 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
493 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
495 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
496 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
497 ; GENERIC-NEXT: retq # sched: [1:1.00]
499 ; SKX-LABEL: vpaddd_maskz_broadcast_test:
501 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
502 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
503 ; SKX-NEXT: retq # sched: [7:1.00]
504 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
505 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
506 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
510 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
511 ; GENERIC-LABEL: vpsubq_test:
513 ; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
514 ; GENERIC-NEXT: retq # sched: [1:1.00]
516 ; SKX-LABEL: vpsubq_test:
518 ; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
519 ; SKX-NEXT: retq # sched: [7:1.00]
520 %x = sub <8 x i64> %i, %j
524 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
525 ; GENERIC-LABEL: vpsubd_test:
527 ; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
528 ; GENERIC-NEXT: retq # sched: [1:1.00]
530 ; SKX-LABEL: vpsubd_test:
532 ; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
533 ; SKX-NEXT: retq # sched: [7:1.00]
534 %x = sub <16 x i32> %i, %j
538 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
539 ; GENERIC-LABEL: vpmulld_test:
541 ; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
542 ; GENERIC-NEXT: retq # sched: [1:1.00]
544 ; SKX-LABEL: vpmulld_test:
546 ; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [8:0.67]
547 ; SKX-NEXT: retq # sched: [7:1.00]
548 %x = mul <16 x i32> %i, %j
552 declare float @sqrtf(float) readnone
553 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
554 ; GENERIC-LABEL: sqrtA:
555 ; GENERIC: # %bb.0: # %entry
556 ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
557 ; GENERIC-NEXT: retq # sched: [1:1.00]
560 ; SKX: # %bb.0: # %entry
561 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
562 ; SKX-NEXT: retq # sched: [7:1.00]
564 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
568 declare double @sqrt(double) readnone
569 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
570 ; GENERIC-LABEL: sqrtB:
571 ; GENERIC: # %bb.0: # %entry
572 ; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
573 ; GENERIC-NEXT: retq # sched: [1:1.00]
576 ; SKX: # %bb.0: # %entry
577 ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
578 ; SKX-NEXT: retq # sched: [7:1.00]
580 %call = tail call double @sqrt(double %a) nounwind readnone
584 declare float @llvm.sqrt.f32(float)
585 define float @sqrtC(float %a) nounwind {
586 ; GENERIC-LABEL: sqrtC:
588 ; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
589 ; GENERIC-NEXT: retq # sched: [1:1.00]
593 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
594 ; SKX-NEXT: retq # sched: [7:1.00]
595 %b = call float @llvm.sqrt.f32(float %a)
599 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
600 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
601 ; GENERIC-LABEL: sqrtD:
603 ; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [14:1.00]
604 ; GENERIC-NEXT: retq # sched: [1:1.00]
608 ; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [19:2.00]
609 ; SKX-NEXT: retq # sched: [7:1.00]
610 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
614 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
615 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
616 ; GENERIC-LABEL: sqrtE:
618 ; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [14:1.00]
619 ; GENERIC-NEXT: retq # sched: [1:1.00]
623 ; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [31:2.00]
624 ; SKX-NEXT: retq # sched: [7:1.00]
625 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
629 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
630 ; GENERIC-LABEL: fadd_broadcast:
632 ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
633 ; GENERIC-NEXT: retq # sched: [1:1.00]
635 ; SKX-LABEL: fadd_broadcast:
637 ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
638 ; SKX-NEXT: retq # sched: [7:1.00]
639 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
643 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
644 ; GENERIC-LABEL: addq_broadcast:
646 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
647 ; GENERIC-NEXT: retq # sched: [1:1.00]
649 ; SKX-LABEL: addq_broadcast:
651 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
652 ; SKX-NEXT: retq # sched: [7:1.00]
653 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
657 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
658 ; GENERIC-LABEL: orq_broadcast:
660 ; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
661 ; GENERIC-NEXT: retq # sched: [1:1.00]
663 ; SKX-LABEL: orq_broadcast:
665 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
666 ; SKX-NEXT: retq # sched: [7:1.00]
667 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
671 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
672 ; GENERIC-LABEL: andd512fold:
673 ; GENERIC: # %bb.0: # %entry
674 ; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
675 ; GENERIC-NEXT: retq # sched: [1:1.00]
677 ; SKX-LABEL: andd512fold:
678 ; SKX: # %bb.0: # %entry
679 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
680 ; SKX-NEXT: retq # sched: [7:1.00]
682 %a = load <16 x i32>, <16 x i32>* %x, align 4
683 %b = and <16 x i32> %y, %a
687 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
688 ; GENERIC-LABEL: andqbrst:
689 ; GENERIC: # %bb.0: # %entry
690 ; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
691 ; GENERIC-NEXT: retq # sched: [1:1.00]
693 ; SKX-LABEL: andqbrst:
694 ; SKX: # %bb.0: # %entry
695 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
696 ; SKX-NEXT: retq # sched: [7:1.00]
698 %a = load i64, i64* %ap, align 8
699 %b = insertelement <8 x i64> undef, i64 %a, i32 0
700 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
701 %d = and <8 x i64> %p1, %c
705 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
706 ; GENERIC-LABEL: test_mask_vaddps:
708 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
709 ; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
710 ; GENERIC-NEXT: retq # sched: [1:1.00]
712 ; SKX-LABEL: test_mask_vaddps:
714 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
715 ; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
716 ; SKX-NEXT: retq # sched: [7:1.00]
717 <16 x float> %j, <16 x i32> %mask1)
719 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
720 %x = fadd <16 x float> %i, %j
721 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
725 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
726 ; GENERIC-LABEL: test_mask_vmulps:
728 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
729 ; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
730 ; GENERIC-NEXT: retq # sched: [1:1.00]
732 ; SKX-LABEL: test_mask_vmulps:
734 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
735 ; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
736 ; SKX-NEXT: retq # sched: [7:1.00]
737 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
738 %x = fmul <16 x float> %i, %j
739 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
743 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
744 ; GENERIC-LABEL: test_mask_vminps:
746 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
747 ; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
748 ; GENERIC-NEXT: retq # sched: [1:1.00]
750 ; SKX-LABEL: test_mask_vminps:
752 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
753 ; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
754 ; SKX-NEXT: retq # sched: [7:1.00]
755 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
756 %cmp_res = fcmp olt <16 x float> %i, %j
757 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
758 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
762 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
763 ; GENERIC-LABEL: test_mask_vminpd:
765 ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
766 ; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
767 ; GENERIC-NEXT: retq # sched: [1:1.00]
769 ; SKX-LABEL: test_mask_vminpd:
771 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
772 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
773 ; SKX-NEXT: retq # sched: [7:1.00]
774 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
775 %cmp_res = fcmp olt <8 x double> %i, %j
776 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
777 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
781 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
782 ; GENERIC-LABEL: test_mask_vmaxps:
784 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
785 ; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
786 ; GENERIC-NEXT: retq # sched: [1:1.00]
788 ; SKX-LABEL: test_mask_vmaxps:
790 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
791 ; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
792 ; SKX-NEXT: retq # sched: [7:1.00]
793 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
794 %cmp_res = fcmp ogt <16 x float> %i, %j
795 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
796 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
800 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
801 ; GENERIC-LABEL: test_mask_vmaxpd:
803 ; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
804 ; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
805 ; GENERIC-NEXT: retq # sched: [1:1.00]
807 ; SKX-LABEL: test_mask_vmaxpd:
809 ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
810 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
811 ; SKX-NEXT: retq # sched: [7:1.00]
812 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
813 %cmp_res = fcmp ogt <8 x double> %i, %j
814 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
815 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
819 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
820 ; GENERIC-LABEL: test_mask_vsubps:
822 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
823 ; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
824 ; GENERIC-NEXT: retq # sched: [1:1.00]
826 ; SKX-LABEL: test_mask_vsubps:
828 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
829 ; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
830 ; SKX-NEXT: retq # sched: [7:1.00]
831 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
832 %x = fsub <16 x float> %i, %j
833 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
837 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
838 ; GENERIC-LABEL: test_mask_vdivps:
840 ; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
841 ; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
842 ; GENERIC-NEXT: retq # sched: [1:1.00]
844 ; SKX-LABEL: test_mask_vdivps:
846 ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
847 ; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [23:2.00]
848 ; SKX-NEXT: retq # sched: [7:1.00]
849 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
850 %x = fdiv <16 x float> %i, %j
851 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
855 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
856 ; GENERIC-LABEL: test_mask_vaddpd:
858 ; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
859 ; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
860 ; GENERIC-NEXT: retq # sched: [1:1.00]
862 ; SKX-LABEL: test_mask_vaddpd:
864 ; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
865 ; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
866 ; SKX-NEXT: retq # sched: [7:1.00]
867 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
868 %x = fadd <8 x double> %i, %j
869 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
873 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
874 ; GENERIC-LABEL: test_maskz_vaddpd:
876 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
877 ; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
878 ; GENERIC-NEXT: retq # sched: [1:1.00]
880 ; SKX-LABEL: test_maskz_vaddpd:
882 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
883 ; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33]
884 ; SKX-NEXT: retq # sched: [7:1.00]
885 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
886 %x = fadd <8 x double> %i, %j
887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
891 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
892 ; GENERIC-LABEL: test_mask_fold_vaddpd:
894 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
895 ; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00]
896 ; GENERIC-NEXT: retq # sched: [1:1.00]
898 ; SKX-LABEL: test_mask_fold_vaddpd:
900 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
901 ; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50]
902 ; SKX-NEXT: retq # sched: [7:1.00]
903 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
904 %tmp = load <8 x double>, <8 x double>* %j, align 8
905 %x = fadd <8 x double> %i, %tmp
906 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
910 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
911 ; GENERIC-LABEL: test_maskz_fold_vaddpd:
913 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
914 ; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
915 ; GENERIC-NEXT: retq # sched: [1:1.00]
917 ; SKX-LABEL: test_maskz_fold_vaddpd:
919 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
920 ; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
921 ; SKX-NEXT: retq # sched: [7:1.00]
922 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
923 %tmp = load <8 x double>, <8 x double>* %j, align 8
924 %x = fadd <8 x double> %i, %tmp
925 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
929 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
930 ; GENERIC-LABEL: test_broadcast_vaddpd:
932 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
933 ; GENERIC-NEXT: retq # sched: [1:1.00]
935 ; SKX-LABEL: test_broadcast_vaddpd:
937 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50]
938 ; SKX-NEXT: retq # sched: [7:1.00]
939 %tmp = load double, double* %j
940 %b = insertelement <8 x double> undef, double %tmp, i32 0
941 %c = shufflevector <8 x double> %b, <8 x double> undef,
942 <8 x i32> zeroinitializer
943 %x = fadd <8 x double> %c, %i
947 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
948 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
950 ; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
951 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00]
952 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
953 ; GENERIC-NEXT: retq # sched: [1:1.00]
955 ; SKX-LABEL: test_mask_broadcast_vaddpd:
957 ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
958 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50]
959 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
960 ; SKX-NEXT: retq # sched: [7:1.00]
961 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
962 %tmp = load double, double* %j
963 %b = insertelement <8 x double> undef, double %tmp, i32 0
964 %c = shufflevector <8 x double> %b, <8 x double> undef,
965 <8 x i32> zeroinitializer
966 %x = fadd <8 x double> %c, %i
967 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
971 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
972 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
974 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
975 ; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
976 ; GENERIC-NEXT: retq # sched: [1:1.00]
978 ; SKX-LABEL: test_maskz_broadcast_vaddpd:
980 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
981 ; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
982 ; SKX-NEXT: retq # sched: [7:1.00]
983 <8 x i64> %mask1) nounwind {
984 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
985 %tmp = load double, double* %j
986 %b = insertelement <8 x double> undef, double %tmp, i32 0
987 %c = shufflevector <8 x double> %b, <8 x double> undef,
988 <8 x i32> zeroinitializer
989 %x = fadd <8 x double> %c, %i
990 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
994 define <16 x float> @test_fxor(<16 x float> %a) {
995 ; GENERIC-LABEL: test_fxor:
997 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
998 ; GENERIC-NEXT: retq # sched: [1:1.00]
1000 ; SKX-LABEL: test_fxor:
1002 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
1003 ; SKX-NEXT: retq # sched: [7:1.00]
1005 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1006 ret <16 x float>%res
1009 define <8 x float> @test_fxor_8f32(<8 x float> %a) {
1010 ; GENERIC-LABEL: test_fxor_8f32:
1012 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
1013 ; GENERIC-NEXT: retq # sched: [1:1.00]
1015 ; SKX-LABEL: test_fxor_8f32:
1017 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1018 ; SKX-NEXT: retq # sched: [7:1.00]
1019 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1023 define <8 x double> @fabs_v8f64(<8 x double> %p)
1024 ; GENERIC-LABEL: fabs_v8f64:
1026 ; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
1027 ; GENERIC-NEXT: retq # sched: [1:1.00]
1029 ; SKX-LABEL: fabs_v8f64:
1031 ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
1032 ; SKX-NEXT: retq # sched: [7:1.00]
1034 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1037 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1039 define <16 x float> @fabs_v16f32(<16 x float> %p)
1040 ; GENERIC-LABEL: fabs_v16f32:
1042 ; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
1043 ; GENERIC-NEXT: retq # sched: [1:1.00]
1045 ; SKX-LABEL: fabs_v16f32:
1047 ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
1048 ; SKX-NEXT: retq # sched: [7:1.00]
1050 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1053 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1055 define double @test1(double %a, double %b) nounwind {
1056 ; GENERIC-LABEL: test1:
1058 ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1059 ; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00]
1060 ; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00]
1061 ; GENERIC-NEXT: .LBB64_1: # %l1
1062 ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1063 ; GENERIC-NEXT: retq # sched: [1:1.00]
1064 ; GENERIC-NEXT: .LBB64_2: # %l2
1065 ; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1066 ; GENERIC-NEXT: retq # sched: [1:1.00]
1070 ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1071 ; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50]
1072 ; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50]
1073 ; SKX-NEXT: .LBB64_1: # %l1
1074 ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1075 ; SKX-NEXT: retq # sched: [7:1.00]
1076 ; SKX-NEXT: .LBB64_2: # %l2
1077 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1078 ; SKX-NEXT: retq # sched: [7:1.00]
1079 %tobool = fcmp une double %a, %b
1080 br i1 %tobool, label %l1, label %l2
1083 %c = fsub double %a, %b
1086 %c1 = fadd double %a, %b
1090 define float @test2(float %a, float %b) nounwind {
1091 ; GENERIC-LABEL: test2:
1093 ; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00]
1094 ; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00]
1095 ; GENERIC-NEXT: # %bb.1: # %l1
1096 ; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1097 ; GENERIC-NEXT: retq # sched: [1:1.00]
1098 ; GENERIC-NEXT: .LBB65_2: # %l2
1099 ; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1100 ; GENERIC-NEXT: retq # sched: [1:1.00]
1104 ; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00]
1105 ; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50]
1106 ; SKX-NEXT: # %bb.1: # %l1
1107 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1108 ; SKX-NEXT: retq # sched: [7:1.00]
1109 ; SKX-NEXT: .LBB65_2: # %l2
1110 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1111 ; SKX-NEXT: retq # sched: [7:1.00]
1112 %tobool = fcmp olt float %a, %b
1113 br i1 %tobool, label %l1, label %l2
1116 %c = fsub float %a, %b
1119 %c1 = fadd float %a, %b
1123 define i32 @test3(float %a, float %b) {
1124 ; GENERIC-LABEL: test3:
1126 ; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
1127 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
1128 ; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
1129 ; GENERIC-NEXT: retq # sched: [1:1.00]
1133 ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
1134 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
1135 ; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25]
1136 ; SKX-NEXT: retq # sched: [7:1.00]
1138 %cmp10.i = fcmp oeq float %a, %b
1139 %conv11.i = zext i1 %cmp10.i to i32
1143 define float @test5(float %p) #0 {
1144 ; GENERIC-LABEL: test5:
1145 ; GENERIC: # %bb.0: # %entry
1146 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
1147 ; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
1148 ; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00]
1149 ; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00]
1150 ; GENERIC-NEXT: # %bb.2: # %return
1151 ; GENERIC-NEXT: retq # sched: [1:1.00]
1152 ; GENERIC-NEXT: .LBB67_1: # %if.end
1153 ; GENERIC-NEXT: seta %al # sched: [2:1.00]
1154 ; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
1155 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1156 ; GENERIC-NEXT: retq # sched: [1:1.00]
1159 ; SKX: # %bb.0: # %entry
1160 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
1161 ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
1162 ; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50]
1163 ; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50]
1164 ; SKX-NEXT: # %bb.2: # %return
1165 ; SKX-NEXT: retq # sched: [7:1.00]
1166 ; SKX-NEXT: .LBB67_1: # %if.end
1167 ; SKX-NEXT: seta %al # sched: [2:1.00]
1168 ; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25]
1169 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
1170 ; SKX-NEXT: retq # sched: [7:1.00]
1172 %cmp = fcmp oeq float %p, 0.000000e+00
1173 br i1 %cmp, label %return, label %if.end
1175 if.end: ; preds = %entry
1176 %cmp1 = fcmp ogt float %p, 0.000000e+00
1177 %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
1180 return: ; preds = %if.end, %entry
1181 %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
1185 define i32 @test6(i32 %a, i32 %b) {
1186 ; GENERIC-LABEL: test6:
1188 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
1189 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
1190 ; GENERIC-NEXT: sete %al # sched: [1:0.50]
1191 ; GENERIC-NEXT: retq # sched: [1:1.00]
1195 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
1196 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
1197 ; SKX-NEXT: sete %al # sched: [1:0.50]
1198 ; SKX-NEXT: retq # sched: [7:1.00]
1199 %cmp = icmp eq i32 %a, %b
1200 %res = zext i1 %cmp to i32
1204 define i32 @test7(double %x, double %y) #2 {
1205 ; GENERIC-LABEL: test7:
1206 ; GENERIC: # %bb.0: # %entry
1207 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
1208 ; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1209 ; GENERIC-NEXT: setne %al # sched: [1:0.50]
1210 ; GENERIC-NEXT: retq # sched: [1:1.00]
1213 ; SKX: # %bb.0: # %entry
1214 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
1215 ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
1216 ; SKX-NEXT: setne %al # sched: [1:0.50]
1217 ; SKX-NEXT: retq # sched: [7:1.00]
1219 %0 = fcmp one double %x, %y
1220 %or = zext i1 %0 to i32
1224 define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
1225 ; GENERIC-LABEL: test8:
1227 ; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
1228 ; GENERIC-NEXT: # sched: [1:0.33]
1229 ; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33]
1230 ; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33]
1231 ; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67]
1232 ; GENERIC-NEXT: notl %edi # sched: [1:0.33]
1233 ; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33]
1234 ; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67]
1235 ; GENERIC-NEXT: retq # sched: [1:1.00]
1239 ; SKX-NEXT: notl %edi # sched: [1:0.25]
1240 ; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
1241 ; SKX-NEXT: # sched: [1:0.25]
1242 ; SKX-NEXT: testl %edx, %edx # sched: [1:0.25]
1243 ; SKX-NEXT: movl $1, %eax # sched: [1:0.25]
1244 ; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50]
1245 ; SKX-NEXT: orl %edi, %esi # sched: [1:0.25]
1246 ; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50]
1247 ; SKX-NEXT: retq # sched: [7:1.00]
1248 %tmp1 = icmp eq i32 %a1, -1
1249 %tmp2 = icmp eq i32 %a2, -2147483648
1250 %tmp3 = and i1 %tmp1, %tmp2
1251 %tmp4 = icmp eq i32 %a3, 0
1252 %tmp5 = or i1 %tmp3, %tmp4
1253 %res = select i1 %tmp5, i32 1, i32 %a3
1257 define i32 @test9(i64 %a) {
1258 ; GENERIC-LABEL: test9:
1260 ; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33]
1261 ; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00]
1262 ; GENERIC-NEXT: # %bb.1: # %A
1263 ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33]
1264 ; GENERIC-NEXT: retq # sched: [1:1.00]
1265 ; GENERIC-NEXT: .LBB71_2: # %B
1266 ; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
1267 ; GENERIC-NEXT: retq # sched: [1:1.00]
1271 ; SKX-NEXT: testb $1, %dil # sched: [1:0.25]
1272 ; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50]
1273 ; SKX-NEXT: # %bb.1: # %A
1274 ; SKX-NEXT: movl $6, %eax # sched: [1:0.25]
1275 ; SKX-NEXT: retq # sched: [7:1.00]
1276 ; SKX-NEXT: .LBB71_2: # %B
1277 ; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
1278 ; SKX-NEXT: retq # sched: [7:1.00]
1280 %cmp10.i = icmp eq i64 %b, 0
1281 br i1 %cmp10.i, label %A, label %B
1288 define i32 @test10(i64 %b, i64 %c, i1 %d) {
1289 ; GENERIC-LABEL: test10:
1291 ; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33]
1292 ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
1293 ; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33]
1294 ; GENERIC-NEXT: sete %cl # sched: [1:0.50]
1295 ; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33]
1296 ; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33]
1297 ; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33]
1298 ; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00]
1299 ; GENERIC-NEXT: # %bb.2: # %if.end.i
1300 ; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33]
1301 ; GENERIC-NEXT: retq # sched: [1:1.00]
1302 ; GENERIC-NEXT: .LBB72_1: # %if.then.i
1303 ; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33]
1304 ; GENERIC-NEXT: retq # sched: [1:1.00]
1306 ; SKX-LABEL: test10:
1308 ; SKX-NEXT: movl %edx, %eax # sched: [1:0.25]
1309 ; SKX-NEXT: andb $1, %al # sched: [1:0.25]
1310 ; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25]
1311 ; SKX-NEXT: sete %cl # sched: [1:0.50]
1312 ; SKX-NEXT: orb %dl, %cl # sched: [1:0.25]
1313 ; SKX-NEXT: andb $1, %cl # sched: [1:0.25]
1314 ; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25]
1315 ; SKX-NEXT: je .LBB72_1 # sched: [1:0.50]
1316 ; SKX-NEXT: # %bb.2: # %if.end.i
1317 ; SKX-NEXT: movl $6, %eax # sched: [1:0.25]
1318 ; SKX-NEXT: retq # sched: [7:1.00]
1319 ; SKX-NEXT: .LBB72_1: # %if.then.i
1320 ; SKX-NEXT: movl $5, %eax # sched: [1:0.25]
1321 ; SKX-NEXT: retq # sched: [7:1.00]
1323 %cmp8.i = icmp eq i64 %b, %c
1324 %or1 = or i1 %d, %cmp8.i
1325 %xor1 = xor i1 %d, %or1
1326 br i1 %xor1, label %if.end.i, label %if.then.i
1335 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
1336 ; GENERIC-LABEL: sitof32:
1338 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
1339 ; GENERIC-NEXT: retq # sched: [1:1.00]
1341 ; SKX-LABEL: sitof32:
1343 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1344 ; SKX-NEXT: retq # sched: [7:1.00]
1345 %b = sitofp <16 x i32> %a to <16 x float>
1349 define <8 x double> @sltof864(<8 x i64> %a) {
1350 ; GENERIC-LABEL: sltof864:
1352 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1353 ; GENERIC-NEXT: retq # sched: [1:1.00]
1355 ; SKX-LABEL: sltof864:
1357 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
1358 ; SKX-NEXT: retq # sched: [7:1.00]
1359 %b = sitofp <8 x i64> %a to <8 x double>
1363 define <4 x double> @slto4f64(<4 x i64> %a) {
1364 ; GENERIC-LABEL: slto4f64:
1366 ; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00]
1367 ; GENERIC-NEXT: retq # sched: [1:1.00]
1369 ; SKX-LABEL: slto4f64:
1371 ; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33]
1372 ; SKX-NEXT: retq # sched: [7:1.00]
1373 %b = sitofp <4 x i64> %a to <4 x double>
1377 define <2 x double> @slto2f64(<2 x i64> %a) {
1378 ; GENERIC-LABEL: slto2f64:
1380 ; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
1381 ; GENERIC-NEXT: retq # sched: [1:1.00]
1383 ; SKX-LABEL: slto2f64:
1385 ; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
1386 ; SKX-NEXT: retq # sched: [7:1.00]
1387 %b = sitofp <2 x i64> %a to <2 x double>
1391 define <2 x float> @sltof2f32(<2 x i64> %a) {
1392 ; GENERIC-LABEL: sltof2f32:
1394 ; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [4:1.00]
1395 ; GENERIC-NEXT: retq # sched: [1:1.00]
1397 ; SKX-LABEL: sltof2f32:
1399 ; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00]
1400 ; SKX-NEXT: retq # sched: [7:1.00]
1401 %b = sitofp <2 x i64> %a to <2 x float>
1405 define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
1406 ; GENERIC-LABEL: slto4f32_mem:
1408 ; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [8:1.00]
1409 ; GENERIC-NEXT: retq # sched: [1:1.00]
1411 ; SKX-LABEL: slto4f32_mem:
1413 ; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50]
1414 ; SKX-NEXT: retq # sched: [7:1.00]
1415 %a1 = load <4 x i64>, <4 x i64>* %a, align 8
1416 %b = sitofp <4 x i64> %a1 to <4 x float>
1420 define <4 x i64> @f64to4sl(<4 x double> %a) {
1421 ; GENERIC-LABEL: f64to4sl:
1423 ; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [3:1.00]
1424 ; GENERIC-NEXT: retq # sched: [1:1.00]
1426 ; SKX-LABEL: f64to4sl:
1428 ; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33]
1429 ; SKX-NEXT: retq # sched: [7:1.00]
1430 %b = fptosi <4 x double> %a to <4 x i64>
1434 define <4 x i64> @f32to4sl(<4 x float> %a) {
1435 ; GENERIC-LABEL: f32to4sl:
1437 ; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00]
1438 ; GENERIC-NEXT: retq # sched: [1:1.00]
1440 ; SKX-LABEL: f32to4sl:
1442 ; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00]
1443 ; SKX-NEXT: retq # sched: [7:1.00]
1444 %b = fptosi <4 x float> %a to <4 x i64>
1448 define <4 x float> @slto4f32(<4 x i64> %a) {
1449 ; GENERIC-LABEL: slto4f32:
1451 ; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [4:1.00]
1452 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1453 ; GENERIC-NEXT: retq # sched: [1:1.00]
1455 ; SKX-LABEL: slto4f32:
1457 ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
1458 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1459 ; SKX-NEXT: retq # sched: [7:1.00]
1460 %b = sitofp <4 x i64> %a to <4 x float>
1464 define <4 x float> @ulto4f32(<4 x i64> %a) {
1465 ; GENERIC-LABEL: ulto4f32:
1467 ; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [4:1.00]
1468 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1469 ; GENERIC-NEXT: retq # sched: [1:1.00]
1471 ; SKX-LABEL: ulto4f32:
1473 ; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
1474 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1475 ; SKX-NEXT: retq # sched: [7:1.00]
1476 %b = uitofp <4 x i64> %a to <4 x float>
1480 define <8 x double> @ulto8f64(<8 x i64> %a) {
1481 ; GENERIC-LABEL: ulto8f64:
1483 ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1484 ; GENERIC-NEXT: retq # sched: [1:1.00]
1486 ; SKX-LABEL: ulto8f64:
1488 ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
1489 ; SKX-NEXT: retq # sched: [7:1.00]
1490 %b = uitofp <8 x i64> %a to <8 x double>
1494 define <16 x double> @ulto16f64(<16 x i64> %a) {
1495 ; GENERIC-LABEL: ulto16f64:
1497 ; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
1498 ; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00]
1499 ; GENERIC-NEXT: retq # sched: [1:1.00]
1501 ; SKX-LABEL: ulto16f64:
1503 ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
1504 ; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33]
1505 ; SKX-NEXT: retq # sched: [7:1.00]
1506 %b = uitofp <16 x i64> %a to <16 x double>
1507 ret <16 x double> %b
1510 define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
1511 ; GENERIC-LABEL: f64to16si:
1513 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1514 ; GENERIC-NEXT: retq # sched: [1:1.00]
1516 ; SKX-LABEL: f64to16si:
1518 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
1519 ; SKX-NEXT: retq # sched: [7:1.00]
1520 %b = fptosi <16 x float> %a to <16 x i32>
1524 define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
1525 ; GENERIC-LABEL: f32to16ui:
1527 ; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00]
1528 ; GENERIC-NEXT: retq # sched: [1:1.00]
1530 ; SKX-LABEL: f32to16ui:
1532 ; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
1533 ; SKX-NEXT: retq # sched: [7:1.00]
1534 %b = fptoui <16 x float> %a to <16 x i32>
1538 define <16 x i8> @f32to16uc(<16 x float> %f) {
1539 ; GENERIC-LABEL: f32to16uc:
1541 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1542 ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00]
1543 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1544 ; GENERIC-NEXT: retq # sched: [1:1.00]
1546 ; SKX-LABEL: f32to16uc:
1548 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
1549 ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
1550 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1551 ; SKX-NEXT: retq # sched: [7:1.00]
1552 %res = fptoui <16 x float> %f to <16 x i8>
1556 define <16 x i16> @f32to16us(<16 x float> %f) {
1557 ; GENERIC-LABEL: f32to16us:
1559 ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
1560 ; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00]
1561 ; GENERIC-NEXT: retq # sched: [1:1.00]
1563 ; SKX-LABEL: f32to16us:
1565 ; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
1566 ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
1567 ; SKX-NEXT: retq # sched: [7:1.00]
1568 %res = fptoui <16 x float> %f to <16 x i16>
1572 define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
1573 ; GENERIC-LABEL: f32to8ui:
1575 ; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00]
1576 ; GENERIC-NEXT: retq # sched: [1:1.00]
1578 ; SKX-LABEL: f32to8ui:
1580 ; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33]
1581 ; SKX-NEXT: retq # sched: [7:1.00]
1582 %b = fptoui <8 x float> %a to <8 x i32>
1586 define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
1587 ; GENERIC-LABEL: f32to4ui:
1589 ; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00]
1590 ; GENERIC-NEXT: retq # sched: [1:1.00]
1592 ; SKX-LABEL: f32to4ui:
1594 ; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33]
1595 ; SKX-NEXT: retq # sched: [7:1.00]
1596 %b = fptoui <4 x float> %a to <4 x i32>
1600 define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
1601 ; GENERIC-LABEL: f64to8ui:
1603 ; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [3:1.00]
1604 ; GENERIC-NEXT: retq # sched: [1:1.00]
1606 ; SKX-LABEL: f64to8ui:
1608 ; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00]
1609 ; SKX-NEXT: retq # sched: [7:1.00]
1610 %b = fptoui <8 x double> %a to <8 x i32>
1614 define <8 x i16> @f64to8us(<8 x double> %f) {
1615 ; GENERIC-LABEL: f64to8us:
1617 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [3:1.00]
1618 ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
1619 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1620 ; GENERIC-NEXT: retq # sched: [1:1.00]
1622 ; SKX-LABEL: f64to8us:
1624 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1625 ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
1626 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1627 ; SKX-NEXT: retq # sched: [7:1.00]
1628 %res = fptoui <8 x double> %f to <8 x i16>
1632 define <8 x i8> @f64to8uc(<8 x double> %f) {
1633 ; GENERIC-LABEL: f64to8uc:
1635 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [3:1.00]
1636 ; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
1637 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1638 ; GENERIC-NEXT: retq # sched: [1:1.00]
1640 ; SKX-LABEL: f64to8uc:
1642 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1643 ; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
1644 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1645 ; SKX-NEXT: retq # sched: [7:1.00]
1646 %res = fptoui <8 x double> %f to <8 x i8>
1650 define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
1651 ; GENERIC-LABEL: f64to4ui:
1653 ; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [3:1.00]
1654 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1655 ; GENERIC-NEXT: retq # sched: [1:1.00]
1657 ; SKX-LABEL: f64to4ui:
1659 ; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
1660 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1661 ; SKX-NEXT: retq # sched: [7:1.00]
1662 %b = fptoui <4 x double> %a to <4 x i32>
1666 define <8 x double> @sito8f64(<8 x i32> %a) {
1667 ; GENERIC-LABEL: sito8f64:
1669 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
1670 ; GENERIC-NEXT: retq # sched: [1:1.00]
1672 ; SKX-LABEL: sito8f64:
1674 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1675 ; SKX-NEXT: retq # sched: [7:1.00]
1676 %b = sitofp <8 x i32> %a to <8 x double>
1679 define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
1680 ; GENERIC-LABEL: i32to8f64_mask:
1682 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
1683 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
1684 ; GENERIC-NEXT: retq # sched: [1:1.00]
1686 ; SKX-LABEL: i32to8f64_mask:
1688 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
1689 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
1690 ; SKX-NEXT: retq # sched: [7:1.00]
1691 ; VLNOBW-LABEL: i32to8f64_mask:
1693 ; VLNOBW-NEXT: kmovw %edi, %k1
1694 ; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
1695 ; VLNOBW-NEXT: ret{{[l|q]}}
1696 %1 = bitcast i8 %c to <8 x i1>
1697 %2 = sitofp <8 x i32> %b to <8 x double>
1698 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
1701 define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
1702 ; GENERIC-LABEL: sito8f64_maskz:
1704 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
1705 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
1706 ; GENERIC-NEXT: retq # sched: [1:1.00]
1708 ; SKX-LABEL: sito8f64_maskz:
1710 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
1711 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
1712 ; SKX-NEXT: retq # sched: [7:1.00]
1713 ; VLNOBW-LABEL: sito8f64_maskz:
1715 ; VLNOBW-NEXT: kmovw %edi, %k1
1716 ; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
1717 ; VLNOBW-NEXT: ret{{[l|q]}}
1718 %1 = bitcast i8 %b to <8 x i1>
1719 %2 = sitofp <8 x i32> %a to <8 x double>
1720 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
1724 define <8 x i32> @f64to8si(<8 x double> %a) {
1725 ; GENERIC-LABEL: f64to8si:
1727 ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [3:1.00]
1728 ; GENERIC-NEXT: retq # sched: [1:1.00]
1730 ; SKX-LABEL: f64to8si:
1732 ; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1733 ; SKX-NEXT: retq # sched: [7:1.00]
1734 %b = fptosi <8 x double> %a to <8 x i32>
1738 define <4 x i32> @f64to4si(<4 x double> %a) {
1739 ; GENERIC-LABEL: f64to4si:
1741 ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1742 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1743 ; GENERIC-NEXT: retq # sched: [1:1.00]
1745 ; SKX-LABEL: f64to4si:
1747 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1748 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1749 ; SKX-NEXT: retq # sched: [7:1.00]
1750 %b = fptosi <4 x double> %a to <4 x i32>
1754 define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
1755 ; GENERIC-LABEL: f64to16f32:
1757 ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
1758 ; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [3:1.00]
1759 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
1760 ; GENERIC-NEXT: retq # sched: [1:1.00]
1762 ; SKX-LABEL: f64to16f32:
1764 ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
1765 ; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00]
1766 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
1767 ; SKX-NEXT: retq # sched: [7:1.00]
1768 %a = fptrunc <16 x double> %b to <16 x float>
1772 define <4 x float> @f64to4f32(<4 x double> %b) {
1773 ; GENERIC-LABEL: f64to4f32:
1775 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1776 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1777 ; GENERIC-NEXT: retq # sched: [1:1.00]
1779 ; SKX-LABEL: f64to4f32:
1781 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1782 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1783 ; SKX-NEXT: retq # sched: [7:1.00]
1784 %a = fptrunc <4 x double> %b to <4 x float>
1788 define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
1789 ; GENERIC-LABEL: f64to4f32_mask:
1791 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
1792 ; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
1793 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00]
1794 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
1795 ; GENERIC-NEXT: retq # sched: [1:1.00]
1797 ; SKX-LABEL: f64to4f32_mask:
1799 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
1800 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
1801 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
1802 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1803 ; SKX-NEXT: retq # sched: [7:1.00]
1804 %a = fptrunc <4 x double> %b to <4 x float>
1805 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
1809 define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
1810 ; GENERIC-LABEL: f64tof32_inreg:
1812 ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00]
1813 ; GENERIC-NEXT: retq # sched: [1:1.00]
1815 ; SKX-LABEL: f64tof32_inreg:
1817 ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
1818 ; SKX-NEXT: retq # sched: [7:1.00]
1819 %ext = extractelement <2 x double> %a0, i32 0
1820 %cvt = fptrunc double %ext to float
1821 %res = insertelement <4 x float> %a1, float %cvt, i32 0
1822 ret <4 x float> %res
1825 define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
1826 ; GENERIC-LABEL: f32to8f64:
1828 ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
1829 ; GENERIC-NEXT: retq # sched: [1:1.00]
1831 ; SKX-LABEL: f32to8f64:
1833 ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
1834 ; SKX-NEXT: retq # sched: [7:1.00]
1835 %a = fpext <8 x float> %b to <8 x double>
1839 define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
1840 ; GENERIC-LABEL: f32to4f64_mask:
1842 ; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
1843 ; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
1844 ; GENERIC-NEXT: retq # sched: [1:1.00]
1846 ; SKX-LABEL: f32to4f64_mask:
1848 ; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
1849 ; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00]
1850 ; SKX-NEXT: retq # sched: [7:1.00]
1851 %a = fpext <4 x float> %b to <4 x double>
1852 %mask = fcmp ogt <4 x double> %a1, %b1
1853 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
1857 define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
1858 ; GENERIC-LABEL: f32tof64_inreg:
1860 ; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
1861 ; GENERIC-NEXT: retq # sched: [1:1.00]
1863 ; SKX-LABEL: f32tof64_inreg:
1865 ; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
1866 ; SKX-NEXT: retq # sched: [7:1.00]
1867 %ext = extractelement <4 x float> %a1, i32 0
1868 %cvt = fpext float %ext to double
1869 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1870 ret <2 x double> %res
1873 define double @sltof64_load(i64* nocapture %e) {
1874 ; GENERIC-LABEL: sltof64_load:
1875 ; GENERIC: # %bb.0: # %entry
1876 ; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1877 ; GENERIC-NEXT: retq # sched: [1:1.00]
1879 ; SKX-LABEL: sltof64_load:
1880 ; SKX: # %bb.0: # %entry
1881 ; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1882 ; SKX-NEXT: retq # sched: [7:1.00]
1884 %tmp1 = load i64, i64* %e, align 8
1885 %conv = sitofp i64 %tmp1 to double
1889 define double @sitof64_load(i32* %e) {
1890 ; GENERIC-LABEL: sitof64_load:
1891 ; GENERIC: # %bb.0: # %entry
1892 ; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1893 ; GENERIC-NEXT: retq # sched: [1:1.00]
1895 ; SKX-LABEL: sitof64_load:
1896 ; SKX: # %bb.0: # %entry
1897 ; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1898 ; SKX-NEXT: retq # sched: [7:1.00]
1900 %tmp1 = load i32, i32* %e, align 4
1901 %conv = sitofp i32 %tmp1 to double
1905 define float @sitof32_load(i32* %e) {
1906 ; GENERIC-LABEL: sitof32_load:
1907 ; GENERIC: # %bb.0: # %entry
1908 ; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1909 ; GENERIC-NEXT: retq # sched: [1:1.00]
1911 ; SKX-LABEL: sitof32_load:
1912 ; SKX: # %bb.0: # %entry
1913 ; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1914 ; SKX-NEXT: retq # sched: [7:1.00]
1916 %tmp1 = load i32, i32* %e, align 4
1917 %conv = sitofp i32 %tmp1 to float
1921 define float @sltof32_load(i64* %e) {
1922 ; GENERIC-LABEL: sltof32_load:
1923 ; GENERIC: # %bb.0: # %entry
1924 ; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1925 ; GENERIC-NEXT: retq # sched: [1:1.00]
1927 ; SKX-LABEL: sltof32_load:
1928 ; SKX: # %bb.0: # %entry
1929 ; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1930 ; SKX-NEXT: retq # sched: [7:1.00]
1932 %tmp1 = load i64, i64* %e, align 8
1933 %conv = sitofp i64 %tmp1 to float
1937 define void @f32tof64_loadstore() {
1938 ; GENERIC-LABEL: f32tof64_loadstore:
1939 ; GENERIC: # %bb.0: # %entry
1940 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
1941 ; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
1942 ; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1943 ; GENERIC-NEXT: retq # sched: [1:1.00]
1945 ; SKX-LABEL: f32tof64_loadstore:
1946 ; SKX: # %bb.0: # %entry
1947 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
1948 ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
1949 ; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1950 ; SKX-NEXT: retq # sched: [7:1.00]
1952 %f = alloca float, align 4
1953 %d = alloca double, align 8
1954 %tmp = load float, float* %f, align 4
1955 %conv = fpext float %tmp to double
1956 store double %conv, double* %d, align 8
1960 define void @f64tof32_loadstore() nounwind uwtable {
1961 ; GENERIC-LABEL: f64tof32_loadstore:
1962 ; GENERIC: # %bb.0: # %entry
1963 ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
1964 ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
1965 ; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1966 ; GENERIC-NEXT: retq # sched: [1:1.00]
1968 ; SKX-LABEL: f64tof32_loadstore:
1969 ; SKX: # %bb.0: # %entry
1970 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
1971 ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
1972 ; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1973 ; SKX-NEXT: retq # sched: [7:1.00]
1975 %f = alloca float, align 4
1976 %d = alloca double, align 8
1977 %tmp = load double, double* %d, align 8
1978 %conv = fptrunc double %tmp to float
1979 store float %conv, float* %f, align 4
1983 define double @long_to_double(i64 %x) {
1984 ; GENERIC-LABEL: long_to_double:
1986 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.33]
1987 ; GENERIC-NEXT: retq # sched: [1:1.00]
1989 ; SKX-LABEL: long_to_double:
1991 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.25]
1992 ; SKX-NEXT: retq # sched: [7:1.00]
1993 %res = bitcast i64 %x to double
1997 define i64 @double_to_long(double %x) {
1998 ; GENERIC-LABEL: double_to_long:
2000 ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [1:0.33]
2001 ; GENERIC-NEXT: retq # sched: [1:1.00]
2003 ; SKX-LABEL: double_to_long:
2005 ; SKX-NEXT: vmovq %xmm0, %rax # sched: [1:0.25]
2006 ; SKX-NEXT: retq # sched: [7:1.00]
2007 %res = bitcast double %x to i64
2011 define float @int_to_float(i32 %x) {
2012 ; GENERIC-LABEL: int_to_float:
2014 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:0.33]
2015 ; GENERIC-NEXT: retq # sched: [1:1.00]
2017 ; SKX-LABEL: int_to_float:
2019 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:0.25]
2020 ; SKX-NEXT: retq # sched: [7:1.00]
2021 %res = bitcast i32 %x to float
2025 define i32 @float_to_int(float %x) {
2026 ; GENERIC-LABEL: float_to_int:
2028 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [1:0.33]
2029 ; GENERIC-NEXT: retq # sched: [1:1.00]
2031 ; SKX-LABEL: float_to_int:
2033 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [1:0.25]
2034 ; SKX-NEXT: retq # sched: [7:1.00]
2035 %res = bitcast float %x to i32
2039 define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
2040 ; GENERIC-LABEL: uito16f64:
2042 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00]
2043 ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
2044 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00]
2045 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2046 ; GENERIC-NEXT: retq # sched: [1:1.00]
2048 ; SKX-LABEL: uito16f64:
2050 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00]
2051 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
2052 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00]
2053 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2054 ; SKX-NEXT: retq # sched: [7:1.00]
2055 %b = uitofp <16 x i32> %a to <16 x double>
2056 ret <16 x double> %b
2059 define <8 x float> @slto8f32(<8 x i64> %a) {
2060 ; GENERIC-LABEL: slto8f32:
2062 ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [4:1.00]
2063 ; GENERIC-NEXT: retq # sched: [1:1.00]
2065 ; SKX-LABEL: slto8f32:
2067 ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2068 ; SKX-NEXT: retq # sched: [7:1.00]
2069 %b = sitofp <8 x i64> %a to <8 x float>
2073 define <16 x float> @slto16f32(<16 x i64> %a) {
2074 ; GENERIC-LABEL: slto16f32:
2076 ; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [4:1.00]
2077 ; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [4:1.00]
2078 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
2079 ; GENERIC-NEXT: retq # sched: [1:1.00]
2081 ; SKX-LABEL: slto16f32:
2083 ; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2084 ; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00]
2085 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
2086 ; SKX-NEXT: retq # sched: [7:1.00]
2087 %b = sitofp <16 x i64> %a to <16 x float>
2091 define <8 x double> @slto8f64(<8 x i64> %a) {
2092 ; GENERIC-LABEL: slto8f64:
2094 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
2095 ; GENERIC-NEXT: retq # sched: [1:1.00]
2097 ; SKX-LABEL: slto8f64:
2099 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
2100 ; SKX-NEXT: retq # sched: [7:1.00]
2101 %b = sitofp <8 x i64> %a to <8 x double>
2105 define <16 x double> @slto16f64(<16 x i64> %a) {
2106 ; GENERIC-LABEL: slto16f64:
2108 ; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
2109 ; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00]
2110 ; GENERIC-NEXT: retq # sched: [1:1.00]
2112 ; SKX-LABEL: slto16f64:
2114 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
2115 ; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33]
2116 ; SKX-NEXT: retq # sched: [7:1.00]
2117 %b = sitofp <16 x i64> %a to <16 x double>
2118 ret <16 x double> %b
2121 define <8 x float> @ulto8f32(<8 x i64> %a) {
2122 ; GENERIC-LABEL: ulto8f32:
2124 ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [4:1.00]
2125 ; GENERIC-NEXT: retq # sched: [1:1.00]
2127 ; SKX-LABEL: ulto8f32:
2129 ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2130 ; SKX-NEXT: retq # sched: [7:1.00]
2131 %b = uitofp <8 x i64> %a to <8 x float>
2135 define <16 x float> @ulto16f32(<16 x i64> %a) {
2136 ; GENERIC-LABEL: ulto16f32:
2138 ; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [4:1.00]
2139 ; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [4:1.00]
2140 ; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
2141 ; GENERIC-NEXT: retq # sched: [1:1.00]
2143 ; SKX-LABEL: ulto16f32:
2145 ; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
2146 ; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00]
2147 ; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
2148 ; SKX-NEXT: retq # sched: [7:1.00]
2149 %b = uitofp <16 x i64> %a to <16 x float>
2153 define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
2154 ; GENERIC-LABEL: uito8f64_mask:
2156 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
2157 ; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
2158 ; GENERIC-NEXT: retq # sched: [1:1.00]
2160 ; SKX-LABEL: uito8f64_mask:
2162 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
2163 ; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
2164 ; SKX-NEXT: retq # sched: [7:1.00]
2165 ; VLNOBW-LABEL: uito8f64_mask:
2167 ; VLNOBW-NEXT: kmovw %edi, %k1
2168 ; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
2169 ; VLNOBW-NEXT: ret{{[l|q]}}
2170 %1 = bitcast i8 %c to <8 x i1>
2171 %2 = uitofp <8 x i32> %b to <8 x double>
2172 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
2175 define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
2176 ; GENERIC-LABEL: uito8f64_maskz:
2178 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
2179 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
2180 ; GENERIC-NEXT: retq # sched: [1:1.00]
2182 ; SKX-LABEL: uito8f64_maskz:
2184 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
2185 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
2186 ; SKX-NEXT: retq # sched: [7:1.00]
2187 %1 = bitcast i8 %b to <8 x i1>
2188 %2 = uitofp <8 x i32> %a to <8 x double>
2189 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
2193 define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
2194 ; GENERIC-LABEL: uito4f64:
2196 ; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00]
2197 ; GENERIC-NEXT: retq # sched: [1:1.00]
2199 ; SKX-LABEL: uito4f64:
2201 ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
2202 ; SKX-NEXT: retq # sched: [7:1.00]
2203 %b = uitofp <4 x i32> %a to <4 x double>
2207 define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
2208 ; GENERIC-LABEL: uito16f32:
2210 ; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:1.00]
2211 ; GENERIC-NEXT: retq # sched: [1:1.00]
2213 ; SKX-LABEL: uito16f32:
2215 ; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33]
2216 ; SKX-NEXT: retq # sched: [7:1.00]
2217 %b = uitofp <16 x i32> %a to <16 x float>
2221 define <8 x double> @uito8f64(<8 x i32> %a) {
2222 ; GENERIC-LABEL: uito8f64:
2224 ; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00]
2225 ; GENERIC-NEXT: retq # sched: [1:1.00]
2227 ; SKX-LABEL: uito8f64:
2229 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
2230 ; SKX-NEXT: retq # sched: [7:1.00]
2231 %b = uitofp <8 x i32> %a to <8 x double>
2235 define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
2236 ; GENERIC-LABEL: uito8f32:
2238 ; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:1.00]
2239 ; GENERIC-NEXT: retq # sched: [1:1.00]
2241 ; SKX-LABEL: uito8f32:
2243 ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33]
2244 ; SKX-NEXT: retq # sched: [7:1.00]
2245 %b = uitofp <8 x i32> %a to <8 x float>
2249 define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
2250 ; GENERIC-LABEL: uito4f32:
2252 ; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:1.00]
2253 ; GENERIC-NEXT: retq # sched: [1:1.00]
2255 ; SKX-LABEL: uito4f32:
2257 ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33]
2258 ; SKX-NEXT: retq # sched: [7:1.00]
2259 %b = uitofp <4 x i32> %a to <4 x float>
2263 define i32 @fptosi(float %a) nounwind {
2264 ; GENERIC-LABEL: fptosi:
2266 ; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00]
2267 ; GENERIC-NEXT: retq # sched: [1:1.00]
2269 ; SKX-LABEL: fptosi:
2271 ; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00]
2272 ; SKX-NEXT: retq # sched: [7:1.00]
2273 %b = fptosi float %a to i32
2277 define i32 @fptoui(float %a) nounwind {
2278 ; GENERIC-LABEL: fptoui:
2280 ; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [3:1.00]
2281 ; GENERIC-NEXT: retq # sched: [1:1.00]
2283 ; SKX-LABEL: fptoui:
2285 ; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00]
2286 ; SKX-NEXT: retq # sched: [7:1.00]
2287 %b = fptoui float %a to i32
2291 define float @uitof32(i32 %a) nounwind {
2292 ; GENERIC-LABEL: uitof32:
2294 ; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
2295 ; GENERIC-NEXT: retq # sched: [1:1.00]
2297 ; SKX-LABEL: uitof32:
2299 ; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
2300 ; SKX-NEXT: retq # sched: [7:1.00]
2301 %b = uitofp i32 %a to float
2305 define double @uitof64(i32 %a) nounwind {
2306 ; GENERIC-LABEL: uitof64:
2308 ; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
2309 ; GENERIC-NEXT: retq # sched: [1:1.00]
2311 ; SKX-LABEL: uitof64:
2313 ; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
2314 ; SKX-NEXT: retq # sched: [7:1.00]
2315 %b = uitofp i32 %a to double
2319 define <16 x float> @sbto16f32(<16 x i32> %a) {
2320 ; GENERIC-LABEL: sbto16f32:
2322 ; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
2323 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
2324 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2325 ; GENERIC-NEXT: retq # sched: [1:1.00]
2327 ; SKX-LABEL: sbto16f32:
2329 ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
2330 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
2331 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2332 ; SKX-NEXT: retq # sched: [7:1.00]
2333 %mask = icmp slt <16 x i32> %a, zeroinitializer
2334 %1 = sitofp <16 x i1> %mask to <16 x float>
2338 define <16 x float> @scto16f32(<16 x i8> %a) {
2339 ; GENERIC-LABEL: scto16f32:
2341 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
2342 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2343 ; GENERIC-NEXT: retq # sched: [1:1.00]
2345 ; SKX-LABEL: scto16f32:
2347 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
2348 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2349 ; SKX-NEXT: retq # sched: [7:1.00]
2350 %1 = sitofp <16 x i8> %a to <16 x float>
2354 define <16 x float> @ssto16f32(<16 x i16> %a) {
2355 ; GENERIC-LABEL: ssto16f32:
2357 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
2358 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2359 ; GENERIC-NEXT: retq # sched: [1:1.00]
2361 ; SKX-LABEL: ssto16f32:
2363 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
2364 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2365 ; SKX-NEXT: retq # sched: [7:1.00]
2366 %1 = sitofp <16 x i16> %a to <16 x float>
2370 define <8 x double> @ssto16f64(<8 x i16> %a) {
2371 ; GENERIC-LABEL: ssto16f64:
2373 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
2374 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2375 ; GENERIC-NEXT: retq # sched: [1:1.00]
2377 ; SKX-LABEL: ssto16f64:
2379 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
2380 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2381 ; SKX-NEXT: retq # sched: [7:1.00]
2382 %1 = sitofp <8 x i16> %a to <8 x double>
2386 define <8 x double> @scto8f64(<8 x i8> %a) {
2387 ; GENERIC-LABEL: scto8f64:
2389 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2390 ; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00]
2391 ; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00]
2392 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2393 ; GENERIC-NEXT: retq # sched: [1:1.00]
2395 ; SKX-LABEL: scto8f64:
2397 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2398 ; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50]
2399 ; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50]
2400 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2401 ; SKX-NEXT: retq # sched: [7:1.00]
2402 %1 = sitofp <8 x i8> %a to <8 x double>
2406 define <16 x double> @scto16f64(<16 x i8> %a) {
2407 ; GENERIC-LABEL: scto16f64:
2409 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00]
2410 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2411 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2412 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2413 ; GENERIC-NEXT: retq # sched: [1:1.00]
2415 ; SKX-LABEL: scto16f64:
2417 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00]
2418 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2419 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2420 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2421 ; SKX-NEXT: retq # sched: [7:1.00]
2422 %b = sitofp <16 x i8> %a to <16 x double>
2423 ret <16 x double> %b
2426 define <16 x double> @sbto16f64(<16 x double> %a) {
2427 ; GENERIC-LABEL: sbto16f64:
2429 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
2430 ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
2431 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
2432 ; GENERIC-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.33]
2433 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2434 ; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
2435 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2436 ; GENERIC-NEXT: retq # sched: [1:1.00]
2438 ; SKX-LABEL: sbto16f64:
2440 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
2441 ; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
2442 ; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
2443 ; SKX-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.25]
2444 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2445 ; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
2446 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2447 ; SKX-NEXT: retq # sched: [7:1.00]
2448 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
2449 %1 = sitofp <16 x i1> %cmpres to <16 x double>
2450 ret <16 x double> %1
2453 define <8 x double> @sbto8f64(<8 x double> %a) {
2454 ; GENERIC-LABEL: sbto8f64:
2456 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2457 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
2458 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2459 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2460 ; GENERIC-NEXT: retq # sched: [1:1.00]
2462 ; SKX-LABEL: sbto8f64:
2464 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2465 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
2466 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2467 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2468 ; SKX-NEXT: retq # sched: [7:1.00]
2469 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
2470 %1 = sitofp <8 x i1> %cmpres to <8 x double>
2474 define <8 x float> @sbto8f32(<8 x float> %a) {
2475 ; GENERIC-LABEL: sbto8f32:
2477 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2478 ; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
2479 ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
2480 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
2481 ; GENERIC-NEXT: retq # sched: [1:1.00]
2483 ; SKX-LABEL: sbto8f32:
2485 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2486 ; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
2487 ; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
2488 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
2489 ; SKX-NEXT: retq # sched: [7:1.00]
2490 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
2491 %1 = sitofp <8 x i1> %cmpres to <8 x float>
2495 define <4 x float> @sbto4f32(<4 x float> %a) {
2496 ; GENERIC-LABEL: sbto4f32:
2498 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2499 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2500 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2501 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2502 ; GENERIC-NEXT: retq # sched: [1:1.00]
2504 ; SKX-LABEL: sbto4f32:
2506 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2507 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2508 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2509 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
2510 ; SKX-NEXT: retq # sched: [7:1.00]
2511 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
2512 %1 = sitofp <4 x i1> %cmpres to <4 x float>
2516 define <4 x double> @sbto4f64(<4 x double> %a) {
2517 ; GENERIC-LABEL: sbto4f64:
2519 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2520 ; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
2521 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2522 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
2523 ; GENERIC-NEXT: retq # sched: [1:1.00]
2525 ; SKX-LABEL: sbto4f64:
2527 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2528 ; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
2529 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2530 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
2531 ; SKX-NEXT: retq # sched: [7:1.00]
2532 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
2533 %1 = sitofp <4 x i1> %cmpres to <4 x double>
2537 define <2 x float> @sbto2f32(<2 x float> %a) {
2538 ; GENERIC-LABEL: sbto2f32:
2540 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2541 ; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2542 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2543 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2544 ; GENERIC-NEXT: retq # sched: [1:1.00]
2546 ; SKX-LABEL: sbto2f32:
2548 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2549 ; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
2550 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2551 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
2552 ; SKX-NEXT: retq # sched: [7:1.00]
2553 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
2554 %1 = sitofp <2 x i1> %cmpres to <2 x float>
2558 define <2 x double> @sbto2f64(<2 x double> %a) {
2559 ; GENERIC-LABEL: sbto2f64:
2561 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
2562 ; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
2563 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
2564 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
2565 ; GENERIC-NEXT: retq # sched: [1:1.00]
2567 ; SKX-LABEL: sbto2f64:
2569 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2570 ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
2571 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
2572 ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
2573 ; SKX-NEXT: retq # sched: [7:1.00]
2574 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
2575 %1 = sitofp <2 x i1> %cmpres to <2 x double>
2579 define <16 x float> @ucto16f32(<16 x i8> %a) {
2580 ; GENERIC-LABEL: ucto16f32:
2582 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
2583 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2584 ; GENERIC-NEXT: retq # sched: [1:1.00]
2586 ; SKX-LABEL: ucto16f32:
2588 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2589 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2590 ; SKX-NEXT: retq # sched: [7:1.00]
2591 %b = uitofp <16 x i8> %a to <16 x float>
2595 define <8 x double> @ucto8f64(<8 x i8> %a) {
2596 ; GENERIC-LABEL: ucto8f64:
2598 ; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2599 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2600 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2601 ; GENERIC-NEXT: retq # sched: [1:1.00]
2603 ; SKX-LABEL: ucto8f64:
2605 ; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
2606 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2607 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2608 ; SKX-NEXT: retq # sched: [7:1.00]
2609 %b = uitofp <8 x i8> %a to <8 x double>
2613 define <16 x float> @swto16f32(<16 x i16> %a) {
2614 ; GENERIC-LABEL: swto16f32:
2616 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
2617 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2618 ; GENERIC-NEXT: retq # sched: [1:1.00]
2620 ; SKX-LABEL: swto16f32:
2622 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
2623 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2624 ; SKX-NEXT: retq # sched: [7:1.00]
2625 %b = sitofp <16 x i16> %a to <16 x float>
2629 define <8 x double> @swto8f64(<8 x i16> %a) {
2630 ; GENERIC-LABEL: swto8f64:
2632 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
2633 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2634 ; GENERIC-NEXT: retq # sched: [1:1.00]
2636 ; SKX-LABEL: swto8f64:
2638 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
2639 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2640 ; SKX-NEXT: retq # sched: [7:1.00]
2641 %b = sitofp <8 x i16> %a to <8 x double>
2645 define <16 x double> @swto16f64(<16 x i16> %a) {
2646 ; GENERIC-LABEL: swto16f64:
2648 ; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00]
2649 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2650 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2651 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2652 ; GENERIC-NEXT: retq # sched: [1:1.00]
2654 ; SKX-LABEL: swto16f64:
2656 ; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00]
2657 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2658 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2659 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2660 ; SKX-NEXT: retq # sched: [7:1.00]
2661 %b = sitofp <16 x i16> %a to <16 x double>
2662 ret <16 x double> %b
2665 define <16 x double> @ucto16f64(<16 x i8> %a) {
2666 ; GENERIC-LABEL: ucto16f64:
2668 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
2669 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2670 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2671 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2672 ; GENERIC-NEXT: retq # sched: [1:1.00]
2674 ; SKX-LABEL: ucto16f64:
2676 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2677 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2678 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2679 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2680 ; SKX-NEXT: retq # sched: [7:1.00]
2681 %b = uitofp <16 x i8> %a to <16 x double>
2682 ret <16 x double> %b
2685 define <16 x float> @uwto16f32(<16 x i16> %a) {
2686 ; GENERIC-LABEL: uwto16f32:
2688 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2689 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2690 ; GENERIC-NEXT: retq # sched: [1:1.00]
2692 ; SKX-LABEL: uwto16f32:
2694 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2695 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2696 ; SKX-NEXT: retq # sched: [7:1.00]
2697 %b = uitofp <16 x i16> %a to <16 x float>
2701 define <8 x double> @uwto8f64(<8 x i16> %a) {
2702 ; GENERIC-LABEL: uwto8f64:
2704 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2705 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2706 ; GENERIC-NEXT: retq # sched: [1:1.00]
2708 ; SKX-LABEL: uwto8f64:
2710 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2711 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2712 ; SKX-NEXT: retq # sched: [7:1.00]
2713 %b = uitofp <8 x i16> %a to <8 x double>
2717 define <16 x double> @uwto16f64(<16 x i16> %a) {
2718 ; GENERIC-LABEL: uwto16f64:
2720 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2721 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
2722 ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
2723 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2724 ; GENERIC-NEXT: retq # sched: [1:1.00]
2726 ; SKX-LABEL: uwto16f64:
2728 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2729 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
2730 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
2731 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2732 ; SKX-NEXT: retq # sched: [7:1.00]
2733 %b = uitofp <16 x i16> %a to <16 x double>
2734 ret <16 x double> %b
2737 define <16 x float> @sito16f32(<16 x i32> %a) {
2738 ; GENERIC-LABEL: sito16f32:
2740 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2741 ; GENERIC-NEXT: retq # sched: [1:1.00]
2743 ; SKX-LABEL: sito16f32:
2745 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2746 ; SKX-NEXT: retq # sched: [7:1.00]
2747 %b = sitofp <16 x i32> %a to <16 x float>
2751 define <16 x double> @sito16f64(<16 x i32> %a) {
2752 ; GENERIC-LABEL: sito16f64:
2754 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00]
2755 ; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
2756 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00]
2757 ; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2758 ; GENERIC-NEXT: retq # sched: [1:1.00]
2760 ; SKX-LABEL: sito16f64:
2762 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00]
2763 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
2764 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00]
2765 ; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2766 ; SKX-NEXT: retq # sched: [7:1.00]
2767 %b = sitofp <16 x i32> %a to <16 x double>
2768 ret <16 x double> %b
2771 define <16 x float> @usto16f32(<16 x i16> %a) {
2772 ; GENERIC-LABEL: usto16f32:
2774 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
2775 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2776 ; GENERIC-NEXT: retq # sched: [1:1.00]
2778 ; SKX-LABEL: usto16f32:
2780 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
2781 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2782 ; SKX-NEXT: retq # sched: [7:1.00]
2783 %b = uitofp <16 x i16> %a to <16 x float>
2787 define <16 x float> @ubto16f32(<16 x i32> %a) {
2788 ; GENERIC-LABEL: ubto16f32:
2790 ; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
2791 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
2792 ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
2793 ; GENERIC-NEXT: retq # sched: [1:1.00]
2795 ; SKX-LABEL: ubto16f32:
2797 ; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
2798 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
2799 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2800 ; SKX-NEXT: retq # sched: [7:1.00]
2801 %mask = icmp slt <16 x i32> %a, zeroinitializer
2802 %1 = uitofp <16 x i1> %mask to <16 x float>
2806 define <16 x double> @ubto16f64(<16 x i32> %a) {
2807 ; GENERIC-LABEL: ubto16f64:
2809 ; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
2810 ; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
2811 ; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
2812 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2813 ; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
2814 ; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
2815 ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
2816 ; GENERIC-NEXT: retq # sched: [1:1.00]
2818 ; SKX-LABEL: ubto16f64:
2820 ; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
2821 ; SKX-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
2822 ; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00]
2823 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2824 ; SKX-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00]
2825 ; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00]
2826 ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2827 ; SKX-NEXT: retq # sched: [7:1.00]
2828 %mask = icmp slt <16 x i32> %a, zeroinitializer
2829 %1 = uitofp <16 x i1> %mask to <16 x double>
2830 ret <16 x double> %1
2833 define <8 x float> @ubto8f32(<8 x i32> %a) {
2834 ; GENERIC-LABEL: ubto8f32:
2836 ; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
2837 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
2838 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
2839 ; GENERIC-NEXT: retq # sched: [1:1.00]
2841 ; SKX-LABEL: ubto8f32:
2843 ; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
2844 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
2845 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
2846 ; SKX-NEXT: retq # sched: [7:1.00]
2847 %mask = icmp slt <8 x i32> %a, zeroinitializer
2848 %1 = uitofp <8 x i1> %mask to <8 x float>
2852 define <8 x double> @ubto8f64(<8 x i32> %a) {
2853 ; GENERIC-LABEL: ubto8f64:
2855 ; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
2856 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
2857 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
2858 ; GENERIC-NEXT: retq # sched: [1:1.00]
2860 ; SKX-LABEL: ubto8f64:
2862 ; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
2863 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
2864 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2865 ; SKX-NEXT: retq # sched: [7:1.00]
2866 %mask = icmp slt <8 x i32> %a, zeroinitializer
2867 %1 = uitofp <8 x i1> %mask to <8 x double>
2871 define <4 x float> @ubto4f32(<4 x i32> %a) {
2872 ; GENERIC-LABEL: ubto4f32:
2874 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
2875 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2876 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2877 ; GENERIC-NEXT: retq # sched: [1:1.00]
2879 ; SKX-LABEL: ubto4f32:
2881 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
2882 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2883 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
2884 ; SKX-NEXT: retq # sched: [7:1.00]
2885 %mask = icmp slt <4 x i32> %a, zeroinitializer
2886 %1 = uitofp <4 x i1> %mask to <4 x float>
2890 define <4 x double> @ubto4f64(<4 x i32> %a) {
2891 ; GENERIC-LABEL: ubto4f64:
2893 ; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
2894 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2895 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
2896 ; GENERIC-NEXT: retq # sched: [1:1.00]
2898 ; SKX-LABEL: ubto4f64:
2900 ; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
2901 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2902 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
2903 ; SKX-NEXT: retq # sched: [7:1.00]
2904 %mask = icmp slt <4 x i32> %a, zeroinitializer
2905 %1 = uitofp <4 x i1> %mask to <4 x double>
2909 define <2 x float> @ubto2f32(<2 x i32> %a) {
2910 ; GENERIC-LABEL: ubto2f32:
2912 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2913 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2914 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
2915 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2916 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
2917 ; GENERIC-NEXT: retq # sched: [1:1.00]
2919 ; SKX-LABEL: ubto2f32:
2921 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2922 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2923 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
2924 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2925 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
2926 ; SKX-NEXT: retq # sched: [7:1.00]
2927 %mask = icmp ne <2 x i32> %a, zeroinitializer
2928 %1 = uitofp <2 x i1> %mask to <2 x float>
2932 define <2 x double> @ubto2f64(<2 x i32> %a) {
2933 ; GENERIC-LABEL: ubto2f64:
2935 ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2936 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
2937 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
2938 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
2939 ; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
2940 ; GENERIC-NEXT: retq # sched: [1:1.00]
2942 ; SKX-LABEL: ubto2f64:
2944 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
2945 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
2946 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
2947 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
2948 ; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
2949 ; SKX-NEXT: retq # sched: [7:1.00]
2950 %mask = icmp ne <2 x i32> %a, zeroinitializer
2951 %1 = uitofp <2 x i1> %mask to <2 x double>
2955 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
2956 ; GENERIC-LABEL: zext_8x8mem_to_8x16:
2958 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
2959 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
2960 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
2961 ; GENERIC-NEXT: retq # sched: [1:1.00]
2963 ; SKX-LABEL: zext_8x8mem_to_8x16:
2965 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
2966 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
2967 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
2968 ; SKX-NEXT: retq # sched: [7:1.00]
2969 %a = load <8 x i8>,<8 x i8> *%i,align 1
2970 %x = zext <8 x i8> %a to <8 x i16>
2971 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
2975 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
2976 ; GENERIC-LABEL: sext_8x8mem_to_8x16:
2978 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
2979 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
2980 ; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
2981 ; GENERIC-NEXT: retq # sched: [1:1.00]
2983 ; SKX-LABEL: sext_8x8mem_to_8x16:
2985 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
2986 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
2987 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2988 ; SKX-NEXT: retq # sched: [7:1.00]
2989 %a = load <8 x i8>,<8 x i8> *%i,align 1
2990 %x = sext <8 x i8> %a to <8 x i16>
2991 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
2996 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
2997 ; GENERIC-LABEL: zext_16x8mem_to_16x16:
2999 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3000 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3001 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
3002 ; GENERIC-NEXT: retq # sched: [1:1.00]
3004 ; SKX-LABEL: zext_16x8mem_to_16x16:
3006 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3007 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3008 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3009 ; SKX-NEXT: retq # sched: [7:1.00]
3010 %a = load <16 x i8>,<16 x i8> *%i,align 1
3011 %x = zext <16 x i8> %a to <16 x i16>
3012 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3016 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3017 ; GENERIC-LABEL: sext_16x8mem_to_16x16:
3019 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3020 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3021 ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
3022 ; GENERIC-NEXT: retq # sched: [1:1.00]
3024 ; SKX-LABEL: sext_16x8mem_to_16x16:
3026 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3027 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3028 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3029 ; SKX-NEXT: retq # sched: [7:1.00]
3030 %a = load <16 x i8>,<16 x i8> *%i,align 1
3031 %x = sext <16 x i8> %a to <16 x i16>
3032 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3036 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
3037 ; GENERIC-LABEL: zext_16x8_to_16x16:
3039 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
3040 ; GENERIC-NEXT: retq # sched: [1:1.00]
3042 ; SKX-LABEL: zext_16x8_to_16x16:
3044 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
3045 ; SKX-NEXT: retq # sched: [7:1.00]
3046 %x = zext <16 x i8> %a to <16 x i16>
3050 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
3051 ; GENERIC-LABEL: zext_16x8_to_16x16_mask:
3053 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3054 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3055 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
3056 ; GENERIC-NEXT: retq # sched: [1:1.00]
3058 ; SKX-LABEL: zext_16x8_to_16x16_mask:
3060 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3061 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3062 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
3063 ; SKX-NEXT: retq # sched: [7:1.00]
3064 %x = zext <16 x i8> %a to <16 x i16>
3065 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3069 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
3070 ; GENERIC-LABEL: sext_16x8_to_16x16:
3072 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
3073 ; GENERIC-NEXT: retq # sched: [1:1.00]
3075 ; SKX-LABEL: sext_16x8_to_16x16:
3077 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
3078 ; SKX-NEXT: retq # sched: [7:1.00]
3079 %x = sext <16 x i8> %a to <16 x i16>
3083 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
3084 ; GENERIC-LABEL: sext_16x8_to_16x16_mask:
3086 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3087 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3088 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
3089 ; GENERIC-NEXT: retq # sched: [1:1.00]
3091 ; SKX-LABEL: sext_16x8_to_16x16_mask:
3093 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3094 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3095 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
3096 ; SKX-NEXT: retq # sched: [7:1.00]
3097 %x = sext <16 x i8> %a to <16 x i16>
3098 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
3102 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
3103 ; GENERIC-LABEL: zext_32x8mem_to_32x16:
3105 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
3106 ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
3107 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00]
3108 ; GENERIC-NEXT: retq # sched: [1:1.00]
3110 ; SKX-LABEL: zext_32x8mem_to_32x16:
3112 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
3113 ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
3114 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00]
3115 ; SKX-NEXT: retq # sched: [7:1.00]
3116 %a = load <32 x i8>,<32 x i8> *%i,align 1
3117 %x = zext <32 x i8> %a to <32 x i16>
3118 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3122 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
3123 ; GENERIC-LABEL: sext_32x8mem_to_32x16:
3125 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
3126 ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
3127 ; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
3128 ; GENERIC-NEXT: retq # sched: [1:1.00]
3130 ; SKX-LABEL: sext_32x8mem_to_32x16:
3132 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
3133 ; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
3134 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3135 ; SKX-NEXT: retq # sched: [7:1.00]
3136 %a = load <32 x i8>,<32 x i8> *%i,align 1
3137 %x = sext <32 x i8> %a to <32 x i16>
3138 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3142 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
3143 ; GENERIC-LABEL: zext_32x8_to_32x16:
3145 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
3146 ; GENERIC-NEXT: retq # sched: [1:1.00]
3148 ; SKX-LABEL: zext_32x8_to_32x16:
3150 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
3151 ; SKX-NEXT: retq # sched: [7:1.00]
3152 %x = zext <32 x i8> %a to <32 x i16>
3156 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
3157 ; GENERIC-LABEL: zext_32x8_to_32x16_mask:
3159 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
3160 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
3161 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
3162 ; GENERIC-NEXT: retq # sched: [1:1.00]
3164 ; SKX-LABEL: zext_32x8_to_32x16_mask:
3166 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
3167 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
3168 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
3169 ; SKX-NEXT: retq # sched: [7:1.00]
3170 %x = zext <32 x i8> %a to <32 x i16>
3171 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3175 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
3176 ; GENERIC-LABEL: sext_32x8_to_32x16:
3178 ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00]
3179 ; GENERIC-NEXT: retq # sched: [1:1.00]
3181 ; SKX-LABEL: sext_32x8_to_32x16:
3183 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00]
3184 ; SKX-NEXT: retq # sched: [7:1.00]
3185 %x = sext <32 x i8> %a to <32 x i16>
3189 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
3190 ; GENERIC-LABEL: sext_32x8_to_32x16_mask:
3192 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
3193 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
3194 ; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
3195 ; GENERIC-NEXT: retq # sched: [1:1.00]
3197 ; SKX-LABEL: sext_32x8_to_32x16_mask:
3199 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
3200 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
3201 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00]
3202 ; SKX-NEXT: retq # sched: [7:1.00]
3203 %x = sext <32 x i8> %a to <32 x i16>
3204 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
3208 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3209 ; GENERIC-LABEL: zext_4x8mem_to_4x32:
3211 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3212 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3213 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
3214 ; GENERIC-NEXT: retq # sched: [1:1.00]
3216 ; SKX-LABEL: zext_4x8mem_to_4x32:
3218 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3219 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3220 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
3221 ; SKX-NEXT: retq # sched: [7:1.00]
3222 %a = load <4 x i8>,<4 x i8> *%i,align 1
3223 %x = zext <4 x i8> %a to <4 x i32>
3224 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3228 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3229 ; GENERIC-LABEL: sext_4x8mem_to_4x32:
3231 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3232 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3233 ; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
3234 ; GENERIC-NEXT: retq # sched: [1:1.00]
3236 ; SKX-LABEL: sext_4x8mem_to_4x32:
3238 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3239 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3240 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3241 ; SKX-NEXT: retq # sched: [7:1.00]
3242 %a = load <4 x i8>,<4 x i8> *%i,align 1
3243 %x = sext <4 x i8> %a to <4 x i32>
3244 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3248 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3249 ; GENERIC-LABEL: zext_8x8mem_to_8x32:
3251 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3252 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3253 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
3254 ; GENERIC-NEXT: retq # sched: [1:1.00]
3256 ; SKX-LABEL: zext_8x8mem_to_8x32:
3258 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3259 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3260 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3261 ; SKX-NEXT: retq # sched: [7:1.00]
3262 %a = load <8 x i8>,<8 x i8> *%i,align 1
3263 %x = zext <8 x i8> %a to <8 x i32>
3264 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3268 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3269 ; GENERIC-LABEL: sext_8x8mem_to_8x32:
3271 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3272 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3273 ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
3274 ; GENERIC-NEXT: retq # sched: [1:1.00]
3276 ; SKX-LABEL: sext_8x8mem_to_8x32:
3278 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3279 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3280 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3281 ; SKX-NEXT: retq # sched: [7:1.00]
3282 %a = load <8 x i8>,<8 x i8> *%i,align 1
3283 %x = sext <8 x i8> %a to <8 x i32>
3284 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3288 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3289 ; GENERIC-LABEL: zext_16x8mem_to_16x32:
3291 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3292 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3293 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00]
3294 ; GENERIC-NEXT: retq # sched: [1:1.00]
3296 ; SKX-LABEL: zext_16x8mem_to_16x32:
3298 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3299 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3300 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00]
3301 ; SKX-NEXT: retq # sched: [7:1.00]
3302 %a = load <16 x i8>,<16 x i8> *%i,align 1
3303 %x = zext <16 x i8> %a to <16 x i32>
3304 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3308 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
3309 ; GENERIC-LABEL: sext_16x8mem_to_16x32:
3311 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3312 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3313 ; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
3314 ; GENERIC-NEXT: retq # sched: [1:1.00]
3316 ; SKX-LABEL: sext_16x8mem_to_16x32:
3318 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3319 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3320 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3321 ; SKX-NEXT: retq # sched: [7:1.00]
3322 %a = load <16 x i8>,<16 x i8> *%i,align 1
3323 %x = sext <16 x i8> %a to <16 x i32>
3324 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3328 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
3329 ; GENERIC-LABEL: zext_16x8_to_16x32_mask:
3331 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3332 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3333 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
3334 ; GENERIC-NEXT: retq # sched: [1:1.00]
3336 ; SKX-LABEL: zext_16x8_to_16x32_mask:
3338 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3339 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3340 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
3341 ; SKX-NEXT: retq # sched: [7:1.00]
3342 %x = zext <16 x i8> %a to <16 x i32>
3343 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3347 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
3348 ; GENERIC-LABEL: sext_16x8_to_16x32_mask:
3350 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3351 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3352 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
3353 ; GENERIC-NEXT: retq # sched: [1:1.00]
3355 ; SKX-LABEL: sext_16x8_to_16x32_mask:
3357 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3358 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3359 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
3360 ; SKX-NEXT: retq # sched: [7:1.00]
3361 %x = sext <16 x i8> %a to <16 x i32>
3362 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3366 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
3367 ; GENERIC-LABEL: zext_16x8_to_16x32:
3369 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
3370 ; GENERIC-NEXT: retq # sched: [1:1.00]
3372 ; SKX-LABEL: zext_16x8_to_16x32:
3374 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
3375 ; SKX-NEXT: retq # sched: [7:1.00]
3376 %x = zext <16 x i8> %i to <16 x i32>
3380 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
3381 ; GENERIC-LABEL: sext_16x8_to_16x32:
3383 ; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
3384 ; GENERIC-NEXT: retq # sched: [1:1.00]
3386 ; SKX-LABEL: sext_16x8_to_16x32:
3388 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
3389 ; SKX-NEXT: retq # sched: [7:1.00]
3390 %x = sext <16 x i8> %i to <16 x i32>
3394 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
3395 ; GENERIC-LABEL: zext_2x8mem_to_2x64:
3397 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3398 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
3399 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
3400 ; GENERIC-NEXT: retq # sched: [1:1.00]
3402 ; SKX-LABEL: zext_2x8mem_to_2x64:
3404 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3405 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
3406 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
3407 ; SKX-NEXT: retq # sched: [7:1.00]
3408 %a = load <2 x i8>,<2 x i8> *%i,align 1
3409 %x = zext <2 x i8> %a to <2 x i64>
3410 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3413 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
3414 ; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
3416 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3417 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
3418 ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
3419 ; GENERIC-NEXT: retq # sched: [1:1.00]
3421 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
3423 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3424 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
3425 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3426 ; SKX-NEXT: retq # sched: [7:1.00]
3427 %a = load <2 x i8>,<2 x i8> *%i,align 1
3428 %x = sext <2 x i8> %a to <2 x i64>
3429 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3432 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
3433 ; GENERIC-LABEL: sext_2x8mem_to_2x64:
3435 ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
3436 ; GENERIC-NEXT: retq # sched: [1:1.00]
3438 ; SKX-LABEL: sext_2x8mem_to_2x64:
3440 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
3441 ; SKX-NEXT: retq # sched: [7:1.00]
3442 %a = load <2 x i8>,<2 x i8> *%i,align 1
3443 %x = sext <2 x i8> %a to <2 x i64>
3447 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3448 ; GENERIC-LABEL: zext_4x8mem_to_4x64:
3450 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3451 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3452 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
3453 ; GENERIC-NEXT: retq # sched: [1:1.00]
3455 ; SKX-LABEL: zext_4x8mem_to_4x64:
3457 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3458 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3459 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3460 ; SKX-NEXT: retq # sched: [7:1.00]
3461 %a = load <4 x i8>,<4 x i8> *%i,align 1
3462 %x = zext <4 x i8> %a to <4 x i64>
3463 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3467 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
3468 ; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
3470 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3471 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3472 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
3473 ; GENERIC-NEXT: retq # sched: [1:1.00]
3475 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
3477 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3478 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3479 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3480 ; SKX-NEXT: retq # sched: [7:1.00]
3481 %a = load <4 x i8>,<4 x i8> *%i,align 1
3482 %x = sext <4 x i8> %a to <4 x i64>
3483 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3487 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
3488 ; GENERIC-LABEL: sext_4x8mem_to_4x64:
3490 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [5:1.00]
3491 ; GENERIC-NEXT: retq # sched: [1:1.00]
3493 ; SKX-LABEL: sext_4x8mem_to_4x64:
3495 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
3496 ; SKX-NEXT: retq # sched: [7:1.00]
3497 %a = load <4 x i8>,<4 x i8> *%i,align 1
3498 %x = sext <4 x i8> %a to <4 x i64>
3502 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3503 ; GENERIC-LABEL: zext_8x8mem_to_8x64:
3505 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3506 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3507 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
3508 ; GENERIC-NEXT: retq # sched: [1:1.00]
3510 ; SKX-LABEL: zext_8x8mem_to_8x64:
3512 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3513 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3514 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
3515 ; SKX-NEXT: retq # sched: [7:1.00]
3516 %a = load <8 x i8>,<8 x i8> *%i,align 1
3517 %x = zext <8 x i8> %a to <8 x i64>
3518 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3522 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
3523 ; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
3525 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3526 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3527 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
3528 ; GENERIC-NEXT: retq # sched: [1:1.00]
3530 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
3532 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3533 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3534 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3535 ; SKX-NEXT: retq # sched: [7:1.00]
3536 %a = load <8 x i8>,<8 x i8> *%i,align 1
3537 %x = sext <8 x i8> %a to <8 x i64>
3538 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3542 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
3543 ; GENERIC-LABEL: sext_8x8mem_to_8x64:
3545 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00]
3546 ; GENERIC-NEXT: retq # sched: [1:1.00]
3548 ; SKX-LABEL: sext_8x8mem_to_8x64:
3550 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
3551 ; SKX-NEXT: retq # sched: [7:1.00]
3552 %a = load <8 x i8>,<8 x i8> *%i,align 1
3553 %x = sext <8 x i8> %a to <8 x i64>
3557 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3558 ; GENERIC-LABEL: zext_4x16mem_to_4x32:
3560 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3561 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3562 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
3563 ; GENERIC-NEXT: retq # sched: [1:1.00]
3565 ; SKX-LABEL: zext_4x16mem_to_4x32:
3567 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3568 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3569 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
3570 ; SKX-NEXT: retq # sched: [7:1.00]
3571 %a = load <4 x i16>,<4 x i16> *%i,align 1
3572 %x = zext <4 x i16> %a to <4 x i32>
3573 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3577 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3578 ; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
3580 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3581 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3582 ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
3583 ; GENERIC-NEXT: retq # sched: [1:1.00]
3585 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
3587 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3588 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3589 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3590 ; SKX-NEXT: retq # sched: [7:1.00]
3591 %a = load <4 x i16>,<4 x i16> *%i,align 1
3592 %x = sext <4 x i16> %a to <4 x i32>
3593 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
3597 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
3598 ; GENERIC-LABEL: sext_4x16mem_to_4x32:
3600 ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
3601 ; GENERIC-NEXT: retq # sched: [1:1.00]
3603 ; SKX-LABEL: sext_4x16mem_to_4x32:
3605 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
3606 ; SKX-NEXT: retq # sched: [7:1.00]
3607 %a = load <4 x i16>,<4 x i16> *%i,align 1
3608 %x = sext <4 x i16> %a to <4 x i32>
3613 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3614 ; GENERIC-LABEL: zext_8x16mem_to_8x32:
3616 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3617 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3618 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
3619 ; GENERIC-NEXT: retq # sched: [1:1.00]
3621 ; SKX-LABEL: zext_8x16mem_to_8x32:
3623 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3624 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3625 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
3626 ; SKX-NEXT: retq # sched: [7:1.00]
3627 %a = load <8 x i16>,<8 x i16> *%i,align 1
3628 %x = zext <8 x i16> %a to <8 x i32>
3629 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3633 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3634 ; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
3636 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3637 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3638 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
3639 ; GENERIC-NEXT: retq # sched: [1:1.00]
3641 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
3643 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3644 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3645 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3646 ; SKX-NEXT: retq # sched: [7:1.00]
3647 %a = load <8 x i16>,<8 x i16> *%i,align 1
3648 %x = sext <8 x i16> %a to <8 x i32>
3649 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3653 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
3654 ; GENERIC-LABEL: sext_8x16mem_to_8x32:
3656 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [5:1.00]
3657 ; GENERIC-NEXT: retq # sched: [1:1.00]
3659 ; SKX-LABEL: sext_8x16mem_to_8x32:
3661 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00]
3662 ; SKX-NEXT: retq # sched: [7:1.00]
3663 %a = load <8 x i16>,<8 x i16> *%i,align 1
3664 %x = sext <8 x i16> %a to <8 x i32>
3668 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
3669 ; GENERIC-LABEL: zext_8x16_to_8x32mask:
3671 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
3672 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
3673 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
3674 ; GENERIC-NEXT: retq # sched: [1:1.00]
3676 ; SKX-LABEL: zext_8x16_to_8x32mask:
3678 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
3679 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
3680 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
3681 ; SKX-NEXT: retq # sched: [7:1.00]
3682 %x = zext <8 x i16> %a to <8 x i32>
3683 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
3687 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
3688 ; GENERIC-LABEL: zext_8x16_to_8x32:
3690 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
3691 ; GENERIC-NEXT: retq # sched: [1:1.00]
3693 ; SKX-LABEL: zext_8x16_to_8x32:
3695 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
3696 ; SKX-NEXT: retq # sched: [7:1.00]
3697 %x = zext <8 x i16> %a to <8 x i32>
3701 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
3702 ; GENERIC-LABEL: zext_16x16mem_to_16x32:
3704 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3705 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3706 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
3707 ; GENERIC-NEXT: retq # sched: [1:1.00]
3709 ; SKX-LABEL: zext_16x16mem_to_16x32:
3711 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3712 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3713 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
3714 ; SKX-NEXT: retq # sched: [7:1.00]
3715 %a = load <16 x i16>,<16 x i16> *%i,align 1
3716 %x = zext <16 x i16> %a to <16 x i32>
3717 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3721 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
3722 ; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
3724 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
3725 ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
3726 ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
3727 ; GENERIC-NEXT: retq # sched: [1:1.00]
3729 ; SKX-LABEL: sext_16x16mem_to_16x32mask:
3731 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
3732 ; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
3733 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3734 ; SKX-NEXT: retq # sched: [7:1.00]
3735 %a = load <16 x i16>,<16 x i16> *%i,align 1
3736 %x = sext <16 x i16> %a to <16 x i32>
3737 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3741 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
3742 ; GENERIC-LABEL: sext_16x16mem_to_16x32:
3744 ; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [5:1.00]
3745 ; GENERIC-NEXT: retq # sched: [1:1.00]
3747 ; SKX-LABEL: sext_16x16mem_to_16x32:
3749 ; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00]
3750 ; SKX-NEXT: retq # sched: [7:1.00]
3751 %a = load <16 x i16>,<16 x i16> *%i,align 1
3752 %x = sext <16 x i16> %a to <16 x i32>
3755 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
3756 ; GENERIC-LABEL: zext_16x16_to_16x32mask:
3758 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
3759 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
3760 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
3761 ; GENERIC-NEXT: retq # sched: [1:1.00]
3763 ; SKX-LABEL: zext_16x16_to_16x32mask:
3765 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
3766 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
3767 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
3768 ; SKX-NEXT: retq # sched: [7:1.00]
3769 %x = zext <16 x i16> %a to <16 x i32>
3770 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
3774 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
3775 ; GENERIC-LABEL: zext_16x16_to_16x32:
3777 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
3778 ; GENERIC-NEXT: retq # sched: [1:1.00]
3780 ; SKX-LABEL: zext_16x16_to_16x32:
3782 ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
3783 ; SKX-NEXT: retq # sched: [7:1.00]
3784 %x = zext <16 x i16> %a to <16 x i32>
3788 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
3789 ; GENERIC-LABEL: zext_2x16mem_to_2x64:
3791 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3792 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
3793 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
3794 ; GENERIC-NEXT: retq # sched: [1:1.00]
3796 ; SKX-LABEL: zext_2x16mem_to_2x64:
3798 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3799 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
3800 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00]
3801 ; SKX-NEXT: retq # sched: [7:1.00]
3802 %a = load <2 x i16>,<2 x i16> *%i,align 1
3803 %x = zext <2 x i16> %a to <2 x i64>
3804 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3808 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
3809 ; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
3811 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3812 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
3813 ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
3814 ; GENERIC-NEXT: retq # sched: [1:1.00]
3816 ; SKX-LABEL: sext_2x16mem_to_2x64mask:
3818 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3819 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
3820 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
3821 ; SKX-NEXT: retq # sched: [7:1.00]
3822 %a = load <2 x i16>,<2 x i16> *%i,align 1
3823 %x = sext <2 x i16> %a to <2 x i64>
3824 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
3828 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
3829 ; GENERIC-LABEL: sext_2x16mem_to_2x64:
3831 ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
3832 ; GENERIC-NEXT: retq # sched: [1:1.00]
3834 ; SKX-LABEL: sext_2x16mem_to_2x64:
3836 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
3837 ; SKX-NEXT: retq # sched: [7:1.00]
3838 %a = load <2 x i16>,<2 x i16> *%i,align 1
3839 %x = sext <2 x i16> %a to <2 x i64>
3843 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3844 ; GENERIC-LABEL: zext_4x16mem_to_4x64:
3846 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3847 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3848 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
3849 ; GENERIC-NEXT: retq # sched: [1:1.00]
3851 ; SKX-LABEL: zext_4x16mem_to_4x64:
3853 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3854 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3855 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
3856 ; SKX-NEXT: retq # sched: [7:1.00]
3857 %a = load <4 x i16>,<4 x i16> *%i,align 1
3858 %x = zext <4 x i16> %a to <4 x i64>
3859 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3863 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
3864 ; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
3866 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
3867 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
3868 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
3869 ; GENERIC-NEXT: retq # sched: [1:1.00]
3871 ; SKX-LABEL: sext_4x16mem_to_4x64mask:
3873 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
3874 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
3875 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
3876 ; SKX-NEXT: retq # sched: [7:1.00]
3877 %a = load <4 x i16>,<4 x i16> *%i,align 1
3878 %x = sext <4 x i16> %a to <4 x i64>
3879 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
3883 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
3884 ; GENERIC-LABEL: sext_4x16mem_to_4x64:
3886 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [5:1.00]
3887 ; GENERIC-NEXT: retq # sched: [1:1.00]
3889 ; SKX-LABEL: sext_4x16mem_to_4x64:
3891 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
3892 ; SKX-NEXT: retq # sched: [7:1.00]
3893 %a = load <4 x i16>,<4 x i16> *%i,align 1
3894 %x = sext <4 x i16> %a to <4 x i64>
3898 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3899 ; GENERIC-LABEL: zext_8x16mem_to_8x64:
3901 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3902 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3903 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
3904 ; GENERIC-NEXT: retq # sched: [1:1.00]
3906 ; SKX-LABEL: zext_8x16mem_to_8x64:
3908 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3909 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3910 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
3911 ; SKX-NEXT: retq # sched: [7:1.00]
3912 %a = load <8 x i16>,<8 x i16> *%i,align 1
3913 %x = zext <8 x i16> %a to <8 x i64>
3914 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3918 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
3919 ; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
3921 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
3922 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
3923 ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
3924 ; GENERIC-NEXT: retq # sched: [1:1.00]
3926 ; SKX-LABEL: sext_8x16mem_to_8x64mask:
3928 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
3929 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
3930 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
3931 ; SKX-NEXT: retq # sched: [7:1.00]
3932 %a = load <8 x i16>,<8 x i16> *%i,align 1
3933 %x = sext <8 x i16> %a to <8 x i64>
3934 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3938 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
3939 ; GENERIC-LABEL: sext_8x16mem_to_8x64:
3941 ; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [5:1.00]
3942 ; GENERIC-NEXT: retq # sched: [1:1.00]
3944 ; SKX-LABEL: sext_8x16mem_to_8x64:
3946 ; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00]
3947 ; SKX-NEXT: retq # sched: [7:1.00]
3948 %a = load <8 x i16>,<8 x i16> *%i,align 1
3949 %x = sext <8 x i16> %a to <8 x i64>
3953 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
3954 ; GENERIC-LABEL: zext_8x16_to_8x64mask:
3956 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
3957 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
3958 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
3959 ; GENERIC-NEXT: retq # sched: [1:1.00]
3961 ; SKX-LABEL: zext_8x16_to_8x64mask:
3963 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
3964 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
3965 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
3966 ; SKX-NEXT: retq # sched: [7:1.00]
3967 %x = zext <8 x i16> %a to <8 x i64>
3968 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
3972 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
3973 ; GENERIC-LABEL: zext_8x16_to_8x64:
3975 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
3976 ; GENERIC-NEXT: retq # sched: [1:1.00]
3978 ; SKX-LABEL: zext_8x16_to_8x64:
3980 ; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
3981 ; SKX-NEXT: retq # sched: [7:1.00]
3982 %ret = zext <8 x i16> %a to <8 x i64>
3986 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
3987 ; GENERIC-LABEL: zext_2x32mem_to_2x64:
3989 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
3990 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
3991 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00]
3992 ; GENERIC-NEXT: retq # sched: [1:1.00]
3994 ; SKX-LABEL: zext_2x32mem_to_2x64:
3996 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
3997 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
3998 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00]
3999 ; SKX-NEXT: retq # sched: [7:1.00]
4000 %a = load <2 x i32>,<2 x i32> *%i,align 1
4001 %x = zext <2 x i32> %a to <2 x i64>
4002 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
4006 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
4007 ; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
4009 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
4010 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
4011 ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
4012 ; GENERIC-NEXT: retq # sched: [1:1.00]
4014 ; SKX-LABEL: sext_2x32mem_to_2x64mask:
4016 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
4017 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
4018 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
4019 ; SKX-NEXT: retq # sched: [7:1.00]
4020 %a = load <2 x i32>,<2 x i32> *%i,align 1
4021 %x = sext <2 x i32> %a to <2 x i64>
4022 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
4026 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
4027 ; GENERIC-LABEL: sext_2x32mem_to_2x64:
4029 ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
4030 ; GENERIC-NEXT: retq # sched: [1:1.00]
4032 ; SKX-LABEL: sext_2x32mem_to_2x64:
4034 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
4035 ; SKX-NEXT: retq # sched: [7:1.00]
4036 %a = load <2 x i32>,<2 x i32> *%i,align 1
4037 %x = sext <2 x i32> %a to <2 x i64>
4041 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
4042 ; GENERIC-LABEL: zext_4x32mem_to_4x64:
4044 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4045 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
4046 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
4047 ; GENERIC-NEXT: retq # sched: [1:1.00]
4049 ; SKX-LABEL: zext_4x32mem_to_4x64:
4051 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4052 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
4053 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4054 ; SKX-NEXT: retq # sched: [7:1.00]
4055 %a = load <4 x i32>,<4 x i32> *%i,align 1
4056 %x = zext <4 x i32> %a to <4 x i64>
4057 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4061 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
4062 ; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
4064 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4065 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
4066 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
4067 ; GENERIC-NEXT: retq # sched: [1:1.00]
4069 ; SKX-LABEL: sext_4x32mem_to_4x64mask:
4071 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4072 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
4073 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
4074 ; SKX-NEXT: retq # sched: [7:1.00]
4075 %a = load <4 x i32>,<4 x i32> *%i,align 1
4076 %x = sext <4 x i32> %a to <4 x i64>
4077 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4081 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
4082 ; GENERIC-LABEL: sext_4x32mem_to_4x64:
4084 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [5:1.00]
4085 ; GENERIC-NEXT: retq # sched: [1:1.00]
4087 ; SKX-LABEL: sext_4x32mem_to_4x64:
4089 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00]
4090 ; SKX-NEXT: retq # sched: [7:1.00]
4091 %a = load <4 x i32>,<4 x i32> *%i,align 1
4092 %x = sext <4 x i32> %a to <4 x i64>
4096 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
4097 ; GENERIC-LABEL: sext_4x32_to_4x64:
4099 ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4100 ; GENERIC-NEXT: retq # sched: [1:1.00]
4102 ; SKX-LABEL: sext_4x32_to_4x64:
4104 ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4105 ; SKX-NEXT: retq # sched: [7:1.00]
4106 %x = sext <4 x i32> %a to <4 x i64>
4110 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
4111 ; GENERIC-LABEL: zext_4x32_to_4x64mask:
4113 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
4114 ; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
4115 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4116 ; GENERIC-NEXT: retq # sched: [1:1.00]
4118 ; SKX-LABEL: zext_4x32_to_4x64mask:
4120 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
4121 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
4122 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4123 ; SKX-NEXT: retq # sched: [7:1.00]
4124 %x = zext <4 x i32> %a to <4 x i64>
4125 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
4129 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
4130 ; GENERIC-LABEL: zext_8x32mem_to_8x64:
4132 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4133 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
4134 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
4135 ; GENERIC-NEXT: retq # sched: [1:1.00]
4137 ; SKX-LABEL: zext_8x32mem_to_8x64:
4139 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4140 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
4141 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
4142 ; SKX-NEXT: retq # sched: [7:1.00]
4143 %a = load <8 x i32>,<8 x i32> *%i,align 1
4144 %x = zext <8 x i32> %a to <8 x i64>
4145 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4149 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
4150 ; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
4152 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4153 ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
4154 ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
4155 ; GENERIC-NEXT: retq # sched: [1:1.00]
4157 ; SKX-LABEL: sext_8x32mem_to_8x64mask:
4159 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4160 ; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
4161 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
4162 ; SKX-NEXT: retq # sched: [7:1.00]
4163 %a = load <8 x i32>,<8 x i32> *%i,align 1
4164 %x = sext <8 x i32> %a to <8 x i64>
4165 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4169 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
4170 ; GENERIC-LABEL: sext_8x32mem_to_8x64:
4172 ; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [5:1.00]
4173 ; GENERIC-NEXT: retq # sched: [1:1.00]
4175 ; SKX-LABEL: sext_8x32mem_to_8x64:
4177 ; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00]
4178 ; SKX-NEXT: retq # sched: [7:1.00]
4179 %a = load <8 x i32>,<8 x i32> *%i,align 1
4180 %x = sext <8 x i32> %a to <8 x i64>
4184 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
4185 ; GENERIC-LABEL: sext_8x32_to_8x64:
4187 ; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00]
4188 ; GENERIC-NEXT: retq # sched: [1:1.00]
4190 ; SKX-LABEL: sext_8x32_to_8x64:
4192 ; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00]
4193 ; SKX-NEXT: retq # sched: [7:1.00]
4194 %x = sext <8 x i32> %a to <8 x i64>
4198 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
4199 ; GENERIC-LABEL: zext_8x32_to_8x64mask:
4201 ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
4202 ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
4203 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
4204 ; GENERIC-NEXT: retq # sched: [1:1.00]
4206 ; SKX-LABEL: zext_8x32_to_8x64mask:
4208 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
4209 ; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
4210 ; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00]
4211 ; SKX-NEXT: retq # sched: [7:1.00]
4212 %x = zext <8 x i32> %a to <8 x i64>
4213 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
4216 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
4217 ; GENERIC-LABEL: fptrunc_test:
4219 ; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [3:1.00]
4220 ; GENERIC-NEXT: retq # sched: [1:1.00]
4222 ; SKX-LABEL: fptrunc_test:
4224 ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
4225 ; SKX-NEXT: retq # sched: [7:1.00]
4226 %b = fptrunc <8 x double> %a to <8 x float>
4230 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
4231 ; GENERIC-LABEL: fpext_test:
4233 ; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [3:1.00]
4234 ; GENERIC-NEXT: retq # sched: [1:1.00]
4236 ; SKX-LABEL: fpext_test:
4238 ; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
4239 ; SKX-NEXT: retq # sched: [7:1.00]
4240 %b = fpext <8 x float> %a to <8 x double>
4244 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
4245 ; GENERIC-LABEL: zext_16i1_to_16xi32:
4247 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
4248 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
4249 ; GENERIC-NEXT: retq # sched: [1:1.00]
4251 ; SKX-LABEL: zext_16i1_to_16xi32:
4253 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
4254 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4255 ; SKX-NEXT: retq # sched: [7:1.00]
4256 %a = bitcast i16 %b to <16 x i1>
4257 %c = zext <16 x i1> %a to <16 x i32>
4261 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
4262 ; GENERIC-LABEL: zext_8i1_to_8xi64:
4264 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
4265 ; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
4266 ; GENERIC-NEXT: retq # sched: [1:1.00]
4268 ; SKX-LABEL: zext_8i1_to_8xi64:
4270 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
4271 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4272 ; SKX-NEXT: retq # sched: [7:1.00]
4273 %a = bitcast i8 %b to <8 x i1>
4274 %c = zext <8 x i1> %a to <8 x i64>
4278 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
4279 ; GENERIC-LABEL: trunc_16i8_to_16i1:
4281 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
4282 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
4283 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
4284 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
4285 ; GENERIC-NEXT: retq # sched: [1:1.00]
4287 ; SKX-LABEL: trunc_16i8_to_16i1:
4289 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
4290 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
4291 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
4292 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
4293 ; SKX-NEXT: retq # sched: [7:1.00]
4294 %mask_b = trunc <16 x i8>%a to <16 x i1>
4295 %mask = bitcast <16 x i1> %mask_b to i16
4299 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
4300 ; GENERIC-LABEL: trunc_16i32_to_16i1:
4302 ; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
4303 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
4304 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
4305 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
4306 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
4307 ; GENERIC-NEXT: retq # sched: [1:1.00]
4309 ; SKX-LABEL: trunc_16i32_to_16i1:
4311 ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50]
4312 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
4313 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
4314 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
4315 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4316 ; SKX-NEXT: retq # sched: [7:1.00]
4317 %mask_b = trunc <16 x i32>%a to <16 x i1>
4318 %mask = bitcast <16 x i1> %mask_b to i16
4322 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
4323 ; GENERIC-LABEL: trunc_4i32_to_4i1:
4325 ; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
4326 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
4327 ; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00]
4328 ; GENERIC-NEXT: retq # sched: [1:1.00]
4330 ; SKX-LABEL: trunc_4i32_to_4i1:
4332 ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
4333 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
4334 ; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50]
4335 ; SKX-NEXT: retq # sched: [7:1.00]
4336 %mask_a = trunc <4 x i32>%a to <4 x i1>
4337 %mask_b = trunc <4 x i32>%b to <4 x i1>
4338 %a_and_b = and <4 x i1>%mask_a, %mask_b
4339 %res = sext <4 x i1>%a_and_b to <4 x i32>
4344 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
4345 ; GENERIC-LABEL: trunc_8i16_to_8i1:
4347 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
4348 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
4349 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
4350 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
4351 ; GENERIC-NEXT: retq # sched: [1:1.00]
4353 ; SKX-LABEL: trunc_8i16_to_8i1:
4355 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
4356 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
4357 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
4358 ; SKX-NEXT: # kill: def $al killed $al killed $eax
4359 ; SKX-NEXT: retq # sched: [7:1.00]
4360 %mask_b = trunc <8 x i16>%a to <8 x i1>
4361 %mask = bitcast <8 x i1> %mask_b to i8
4365 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4366 ; GENERIC-LABEL: sext_8i1_8i32:
4368 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
4369 ; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
4370 ; GENERIC-NEXT: retq # sched: [1:1.00]
4372 ; SKX-LABEL: sext_8i1_8i32:
4374 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
4375 ; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
4376 ; SKX-NEXT: retq # sched: [7:1.00]
4377 %x = icmp slt <8 x i32> %a1, %a2
4378 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
4379 %y = sext <8 x i1> %x1 to <8 x i32>
4384 define i16 @trunc_i32_to_i1(i32 %a) {
4385 ; GENERIC-LABEL: trunc_i32_to_i1:
4387 ; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33]
4388 ; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33]
4389 ; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00]
4390 ; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00]
4391 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
4392 ; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33]
4393 ; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
4394 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
4395 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
4396 ; GENERIC-NEXT: retq # sched: [1:1.00]
4398 ; SKX-LABEL: trunc_i32_to_i1:
4400 ; SKX-NEXT: movw $-4, %ax # sched: [1:0.25]
4401 ; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00]
4402 ; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00]
4403 ; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00]
4404 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
4405 ; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00]
4406 ; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
4407 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
4408 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
4409 ; SKX-NEXT: retq # sched: [7:1.00]
4410 %a_i = trunc i32 %a to i1
4411 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
4412 %res = bitcast <16 x i1> %maskv to i16
4416 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4417 ; GENERIC-LABEL: sext_8i1_8i16:
4419 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4420 ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
4421 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
4422 ; GENERIC-NEXT: retq # sched: [1:1.00]
4424 ; SKX-LABEL: sext_8i1_8i16:
4426 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4427 ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
4428 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4429 ; SKX-NEXT: retq # sched: [7:1.00]
4430 %x = icmp slt <8 x i32> %a1, %a2
4431 %y = sext <8 x i1> %x to <8 x i16>
4435 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
4436 ; GENERIC-LABEL: sext_16i1_16i32:
4438 ; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
4439 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
4440 ; GENERIC-NEXT: retq # sched: [1:1.00]
4442 ; SKX-LABEL: sext_16i1_16i32:
4444 ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
4445 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
4446 ; SKX-NEXT: retq # sched: [7:1.00]
4447 %x = icmp slt <16 x i32> %a1, %a2
4448 %y = sext <16 x i1> %x to <16 x i32>
4452 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
4453 ; GENERIC-LABEL: sext_8i1_8i64:
4455 ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4456 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
4457 ; GENERIC-NEXT: retq # sched: [1:1.00]
4459 ; SKX-LABEL: sext_8i1_8i64:
4461 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
4462 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
4463 ; SKX-NEXT: retq # sched: [7:1.00]
4464 %x = icmp slt <8 x i32> %a1, %a2
4465 %y = sext <8 x i1> %x to <8 x i64>
4469 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
4470 ; GENERIC-LABEL: extload_v8i64:
4472 ; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00]
4473 ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
4474 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
4475 ; GENERIC-NEXT: retq # sched: [1:1.00]
4477 ; SKX-LABEL: extload_v8i64:
4479 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
4480 ; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
4481 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4482 ; SKX-NEXT: retq # sched: [7:1.00]
4483 %sign_load = load <8 x i8>, <8 x i8>* %a
4484 %c = sext <8 x i8> %sign_load to <8 x i64>
4485 store <8 x i64> %c, <8 x i64>* %res
4489 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
4490 ; GENERIC-LABEL: test21:
4492 ; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
4493 ; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33]
4494 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
4495 ; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00]
4496 ; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
4497 ; GENERIC-NEXT: retq # sched: [1:1.00]
4499 ; SKX-LABEL: test21:
4501 ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:0.50]
4502 ; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00]
4503 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
4504 ; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00]
4505 ; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
4506 ; SKX-NEXT: retq # sched: [7:1.00]
4507 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
4511 define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
4512 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16:
4514 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4515 ; GENERIC-NEXT: retq # sched: [1:1.00]
4517 ; SKX-LABEL: shuffle_zext_16x8_to_16x16:
4519 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4520 ; SKX-NEXT: retq # sched: [7:1.00]
4521 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
4522 %2 = bitcast <32 x i8> %1 to <16 x i16>
4526 define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
4527 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
4529 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
4530 ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
4531 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4532 ; GENERIC-NEXT: retq # sched: [1:1.00]
4534 ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
4536 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
4537 ; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
4538 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4539 ; SKX-NEXT: retq # sched: [7:1.00]
4540 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
4541 %bc = bitcast <32 x i8> %x to <16 x i16>
4542 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
4546 define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
4547 ; GENERIC-LABEL: zext_32x8_to_16x16:
4549 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4550 ; GENERIC-NEXT: retq # sched: [1:1.00]
4552 ; SKX-LABEL: zext_32x8_to_16x16:
4554 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4555 ; SKX-NEXT: retq # sched: [7:1.00]
4556 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
4557 %2 = bitcast <32 x i8> %1 to <16 x i16>
4561 define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
4562 ; GENERIC-LABEL: zext_32x8_to_8x32:
4564 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4565 ; GENERIC-NEXT: retq # sched: [1:1.00]
4567 ; SKX-LABEL: zext_32x8_to_8x32:
4569 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4570 ; SKX-NEXT: retq # sched: [7:1.00]
4571 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
4572 %2 = bitcast <32 x i8> %1 to <8 x i32>
4576 define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
4577 ; GENERIC-LABEL: zext_32x8_to_4x64:
4579 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4580 ; GENERIC-NEXT: retq # sched: [1:1.00]
4582 ; SKX-LABEL: zext_32x8_to_4x64:
4584 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4585 ; SKX-NEXT: retq # sched: [7:1.00]
4586 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
4587 %2 = bitcast <32 x i8> %1 to <4 x i64>
4591 define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
4592 ; GENERIC-LABEL: zext_16x16_to_8x32:
4594 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4595 ; GENERIC-NEXT: retq # sched: [1:1.00]
4597 ; SKX-LABEL: zext_16x16_to_8x32:
4599 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4600 ; SKX-NEXT: retq # sched: [7:1.00]
4601 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
4602 %2 = bitcast <16 x i16> %1 to <8 x i32>
4606 define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
4607 ; GENERIC-LABEL: zext_16x16_to_4x64:
4609 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4610 ; GENERIC-NEXT: retq # sched: [1:1.00]
4612 ; SKX-LABEL: zext_16x16_to_4x64:
4614 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4615 ; SKX-NEXT: retq # sched: [7:1.00]
4616 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
4617 %2 = bitcast <16 x i16> %1 to <4 x i64>
4621 define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
4622 ; GENERIC-LABEL: zext_8x32_to_4x64:
4624 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4625 ; GENERIC-NEXT: retq # sched: [1:1.00]
4627 ; SKX-LABEL: zext_8x32_to_4x64:
4629 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4630 ; SKX-NEXT: retq # sched: [7:1.00]
4631 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
4632 %2 = bitcast <8 x i32> %1 to <4 x i64>
4636 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
4637 ; GENERIC-LABEL: zext_64xi1_to_64xi8:
4639 ; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
4640 ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
4641 ; GENERIC-NEXT: retq # sched: [1:1.00]
4643 ; SKX-LABEL: zext_64xi1_to_64xi8:
4645 ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
4646 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4647 ; SKX-NEXT: retq # sched: [7:1.00]
4648 %mask = icmp eq <64 x i8> %x, %y
4649 %1 = zext <64 x i1> %mask to <64 x i8>
4653 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
4654 ; GENERIC-LABEL: zext_32xi1_to_32xi16:
4656 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4657 ; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
4658 ; GENERIC-NEXT: retq # sched: [1:1.00]
4660 ; SKX-LABEL: zext_32xi1_to_32xi16:
4662 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4663 ; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
4664 ; SKX-NEXT: retq # sched: [7:1.00]
4665 %mask = icmp eq <32 x i16> %x, %y
4666 %1 = zext <32 x i1> %mask to <32 x i16>
4670 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
4671 ; GENERIC-LABEL: zext_16xi1_to_16xi16:
4673 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4674 ; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
4675 ; GENERIC-NEXT: retq # sched: [1:1.00]
4677 ; SKX-LABEL: zext_16xi1_to_16xi16:
4679 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4680 ; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
4681 ; SKX-NEXT: retq # sched: [7:1.00]
4682 %mask = icmp eq <16 x i16> %x, %y
4683 %1 = zext <16 x i1> %mask to <16 x i16>
4688 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
4689 ; GENERIC-LABEL: zext_32xi1_to_32xi8:
4691 ; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4692 ; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50]
4693 ; GENERIC-NEXT: retq # sched: [1:1.00]
4695 ; SKX-LABEL: zext_32xi1_to_32xi8:
4697 ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
4698 ; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
4699 ; SKX-NEXT: retq # sched: [7:1.00]
4700 %mask = icmp eq <32 x i16> %x, %y
4701 %1 = zext <32 x i1> %mask to <32 x i8>
4705 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
4706 ; GENERIC-LABEL: zext_4xi1_to_4x32:
4708 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
4709 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4710 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4711 ; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4712 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
4713 ; GENERIC-NEXT: retq # sched: [1:1.00]
4715 ; SKX-LABEL: zext_4xi1_to_4x32:
4717 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
4718 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4719 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4720 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4721 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
4722 ; SKX-NEXT: retq # sched: [7:1.00]
4723 %mask = icmp eq <4 x i8> %x, %y
4724 %1 = zext <4 x i1> %mask to <4 x i32>
4728 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
4729 ; GENERIC-LABEL: zext_2xi1_to_2xi64:
4731 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
4732 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4733 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4734 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4735 ; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
4736 ; GENERIC-NEXT: retq # sched: [1:1.00]
4738 ; SKX-LABEL: zext_2xi1_to_2xi64:
4740 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
4741 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
4742 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4743 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4744 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
4745 ; SKX-NEXT: retq # sched: [7:1.00]
4746 %mask = icmp eq <2 x i8> %x, %y
4747 %1 = zext <2 x i1> %mask to <2 x i64>
4751 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4752 ; GENERIC-LABEL: test_x86_fmadd_ps_z:
4754 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4755 ; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4756 ; GENERIC-NEXT: retq # sched: [1:1.00]
4758 ; SKX-LABEL: test_x86_fmadd_ps_z:
4760 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4761 ; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
4762 ; SKX-NEXT: retq # sched: [7:1.00]
4763 %x = fmul <16 x float> %a0, %a1
4764 %res = fadd <16 x float> %x, %a2
4765 ret <16 x float> %res
4768 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4769 ; GENERIC-LABEL: test_x86_fmsub_ps_z:
4771 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4772 ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4773 ; GENERIC-NEXT: retq # sched: [1:1.00]
4775 ; SKX-LABEL: test_x86_fmsub_ps_z:
4777 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4778 ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
4779 ; SKX-NEXT: retq # sched: [7:1.00]
4780 %x = fmul <16 x float> %a0, %a1
4781 %res = fsub <16 x float> %x, %a2
4782 ret <16 x float> %res
4785 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4786 ; GENERIC-LABEL: test_x86_fnmadd_ps_z:
4788 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4789 ; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00]
4790 ; GENERIC-NEXT: retq # sched: [1:1.00]
4792 ; SKX-LABEL: test_x86_fnmadd_ps_z:
4794 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4795 ; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.33]
4796 ; SKX-NEXT: retq # sched: [7:1.00]
4797 %x = fmul <16 x float> %a0, %a1
4798 %res = fsub <16 x float> %a2, %x
4799 ret <16 x float> %res
4802 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
4803 ; GENERIC-LABEL: test_x86_fnmsub_ps_z:
4805 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4806 ; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
4807 ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4808 ; GENERIC-NEXT: retq # sched: [1:1.00]
4810 ; SKX-LABEL: test_x86_fnmsub_ps_z:
4812 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4813 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
4814 ; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
4815 ; SKX-NEXT: retq # sched: [7:1.00]
4816 %x = fmul <16 x float> %a0, %a1
4817 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4818 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4819 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4820 float -0.000000e+00>, %x
4821 %res = fsub <16 x float> %y, %a2
4822 ret <16 x float> %res
4825 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
4826 ; GENERIC-LABEL: test_x86_fmadd_pd_z:
4828 ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4829 ; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4830 ; GENERIC-NEXT: retq # sched: [1:1.00]
4832 ; SKX-LABEL: test_x86_fmadd_pd_z:
4834 ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4835 ; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
4836 ; SKX-NEXT: retq # sched: [7:1.00]
4837 %x = fmul <8 x double> %a0, %a1
4838 %res = fadd <8 x double> %x, %a2
4839 ret <8 x double> %res
4842 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
4843 ; GENERIC-LABEL: test_x86_fmsub_pd_z:
4845 ; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4846 ; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
4847 ; GENERIC-NEXT: retq # sched: [1:1.00]
4849 ; SKX-LABEL: test_x86_fmsub_pd_z:
4851 ; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4852 ; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33]
4853 ; SKX-NEXT: retq # sched: [7:1.00]
4854 %x = fmul <8 x double> %a0, %a1
4855 %res = fsub <8 x double> %x, %a2
4856 ret <8 x double> %res
4859 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
4860 ; GENERIC-LABEL: test_x86_fmsub_213:
4862 ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
4863 ; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
4864 ; GENERIC-NEXT: retq # sched: [1:1.00]
4866 ; SKX-LABEL: test_x86_fmsub_213:
4868 ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
4869 ; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.33]
4870 ; SKX-NEXT: retq # sched: [7:1.00]
4871 %x = fmul double %a0, %a1
4872 %res = fsub double %x, %a2
4876 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
4877 ; GENERIC-LABEL: test_x86_fmsub_213_m:
4879 ; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
4880 ; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
4881 ; GENERIC-NEXT: retq # sched: [1:1.00]
4883 ; SKX-LABEL: test_x86_fmsub_213_m:
4885 ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
4886 ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
4887 ; SKX-NEXT: retq # sched: [7:1.00]
4888 %a2 = load double , double *%a2_ptr
4889 %x = fmul double %a0, %a1
4890 %res = fsub double %x, %a2
4894 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
4895 ; GENERIC-LABEL: test_x86_fmsub_231_m:
4897 ; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
4898 ; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
4899 ; GENERIC-NEXT: retq # sched: [1:1.00]
4901 ; SKX-LABEL: test_x86_fmsub_231_m:
4903 ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
4904 ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
4905 ; SKX-NEXT: retq # sched: [7:1.00]
4906 %a2 = load double , double *%a2_ptr
4907 %x = fmul double %a0, %a2
4908 %res = fsub double %x, %a1
4912 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
4913 ; GENERIC-LABEL: test231_br:
4915 ; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [9:1.00]
4916 ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
4917 ; GENERIC-NEXT: retq # sched: [1:1.00]
4919 ; SKX-LABEL: test231_br:
4921 ; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
4922 ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4923 ; SKX-NEXT: retq # sched: [7:1.00]
4924 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
4925 %b2 = fadd <16 x float> %b1, %a2
4926 ret <16 x float> %b2
4929 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
4930 ; GENERIC-LABEL: test213_br:
4932 ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
4933 ; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
4934 ; GENERIC-NEXT: retq # sched: [1:1.00]
4936 ; SKX-LABEL: test213_br:
4938 ; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33]
4939 ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
4940 ; SKX-NEXT: retq # sched: [7:1.00]
4941 %b1 = fmul <16 x float> %a1, %a2
4942 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
4943 ret <16 x float> %b2
4947 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
4948 ; GENERIC-LABEL: test_x86_fmadd132_ps:
4950 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
4951 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
4952 ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00]
4953 ; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
4954 ; GENERIC-NEXT: retq # sched: [1:1.00]
4956 ; SKX-LABEL: test_x86_fmadd132_ps:
4958 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
4959 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
4960 ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50]
4961 ; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.33]
4962 ; SKX-NEXT: retq # sched: [7:1.00]
4963 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
4964 %x = fmul <16 x float> %a0, %a2
4965 %y = fadd <16 x float> %x, %a1
4966 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
4967 ret <16 x float> %res
4971 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
4972 ; GENERIC-LABEL: test_x86_fmadd231_ps:
4974 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
4975 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
4976 ; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00]
4977 ; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
4978 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
4979 ; GENERIC-NEXT: retq # sched: [1:1.00]
4981 ; SKX-LABEL: test_x86_fmadd231_ps:
4983 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
4984 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
4985 ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
4986 ; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.33]
4987 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
4988 ; SKX-NEXT: retq # sched: [7:1.00]
4989 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
4990 %x = fmul <16 x float> %a0, %a2
4991 %y = fadd <16 x float> %x, %a1
4992 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
4993 ret <16 x float> %res
4997 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
4998 ; GENERIC-LABEL: test_x86_fmadd213_ps:
5000 ; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
5001 ; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
5002 ; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
5003 ; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00]
5004 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
5005 ; GENERIC-NEXT: retq # sched: [1:1.00]
5007 ; SKX-LABEL: test_x86_fmadd213_ps:
5009 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
5010 ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
5011 ; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
5012 ; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50]
5013 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
5014 ; SKX-NEXT: retq # sched: [7:1.00]
5015 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
5016 %x = fmul <16 x float> %a1, %a0
5017 %y = fadd <16 x float> %x, %a2
5018 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
5019 ret <16 x float> %res
5022 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5023 ; GENERIC-LABEL: vpandd:
5024 ; GENERIC: # %bb.0: # %entry
5025 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
5026 ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5027 ; GENERIC-NEXT: retq # sched: [1:1.00]
5029 ; SKX-LABEL: vpandd:
5030 ; SKX: # %bb.0: # %entry
5031 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5032 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5033 ; SKX-NEXT: retq # sched: [7:1.00]
5035 ; Force the execution domain with an add.
5036 %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
5037 i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5038 %x = and <16 x i32> %a2, %b
5042 define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5043 ; GENERIC-LABEL: vpandnd:
5044 ; GENERIC: # %bb.0: # %entry
5045 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
5046 ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
5047 ; GENERIC-NEXT: retq # sched: [1:1.00]
5049 ; SKX-LABEL: vpandnd:
5050 ; SKX: # %bb.0: # %entry
5051 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5052 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5053 ; SKX-NEXT: retq # sched: [7:1.00]
5055 ; Force the execution domain with an add.
5056 %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
5057 i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
5058 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
5059 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5060 %x = and <16 x i32> %a2, %b2
5064 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5065 ; GENERIC-LABEL: vpord:
5066 ; GENERIC: # %bb.0: # %entry
5067 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
5068 ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5069 ; GENERIC-NEXT: retq # sched: [1:1.00]
5072 ; SKX: # %bb.0: # %entry
5073 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5074 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5075 ; SKX-NEXT: retq # sched: [7:1.00]
5077 ; Force the execution domain with an add.
5078 %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4,
5079 i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
5080 %x = or <16 x i32> %a2, %b
5084 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
5085 ; GENERIC-LABEL: vpxord:
5086 ; GENERIC: # %bb.0: # %entry
5087 ; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
5088 ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5089 ; GENERIC-NEXT: retq # sched: [1:1.00]
5091 ; SKX-LABEL: vpxord:
5092 ; SKX: # %bb.0: # %entry
5093 ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
5094 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5095 ; SKX-NEXT: retq # sched: [7:1.00]
5097 ; Force the execution domain with an add.
5098 %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5,
5099 i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
5100 %x = xor <16 x i32> %a2, %b
5104 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5105 ; GENERIC-LABEL: vpandq:
5106 ; GENERIC: # %bb.0: # %entry
5107 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
5108 ; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5109 ; GENERIC-NEXT: retq # sched: [1:1.00]
5111 ; SKX-LABEL: vpandq:
5112 ; SKX: # %bb.0: # %entry
5113 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5114 ; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5115 ; SKX-NEXT: retq # sched: [7:1.00]
5117 ; Force the execution domain with an add.
5118 %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
5119 %x = and <8 x i64> %a2, %b
5123 define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5124 ; GENERIC-LABEL: vpandnq:
5125 ; GENERIC: # %bb.0: # %entry
5126 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
5127 ; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
5128 ; GENERIC-NEXT: retq # sched: [1:1.00]
5130 ; SKX-LABEL: vpandnq:
5131 ; SKX: # %bb.0: # %entry
5132 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5133 ; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5134 ; SKX-NEXT: retq # sched: [7:1.00]
5136 ; Force the execution domain with an add.
5137 %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
5138 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5139 %x = and <8 x i64> %a2, %b2
5143 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5144 ; GENERIC-LABEL: vporq:
5145 ; GENERIC: # %bb.0: # %entry
5146 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
5147 ; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5148 ; GENERIC-NEXT: retq # sched: [1:1.00]
5151 ; SKX: # %bb.0: # %entry
5152 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5153 ; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5154 ; SKX-NEXT: retq # sched: [7:1.00]
5156 ; Force the execution domain with an add.
5157 %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
5158 %x = or <8 x i64> %a2, %b
5162 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
5163 ; GENERIC-LABEL: vpxorq:
5164 ; GENERIC: # %bb.0: # %entry
5165 ; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
5166 ; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
5167 ; GENERIC-NEXT: retq # sched: [1:1.00]
5169 ; SKX-LABEL: vpxorq:
5170 ; SKX: # %bb.0: # %entry
5171 ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
5172 ; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5173 ; SKX-NEXT: retq # sched: [7:1.00]
5175 ; Force the execution domain with an add.
5176 %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
5177 %x = xor <8 x i64> %a2, %b
5181 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
5182 ; GENERIC-LABEL: and_v64i8:
5184 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5185 ; GENERIC-NEXT: retq # sched: [1:1.00]
5187 ; SKX-LABEL: and_v64i8:
5189 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5190 ; SKX-NEXT: retq # sched: [7:1.00]
5191 %res = and <64 x i8> %a, %b
5195 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
5196 ; GENERIC-LABEL: andn_v64i8:
5198 ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
5199 ; GENERIC-NEXT: retq # sched: [1:1.00]
5201 ; SKX-LABEL: andn_v64i8:
5203 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5204 ; SKX-NEXT: retq # sched: [7:1.00]
5205 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5206 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5207 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
5208 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
5209 %res = and <64 x i8> %a, %b2
5213 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
5214 ; GENERIC-LABEL: or_v64i8:
5216 ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5217 ; GENERIC-NEXT: retq # sched: [1:1.00]
5219 ; SKX-LABEL: or_v64i8:
5221 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5222 ; SKX-NEXT: retq # sched: [7:1.00]
5223 %res = or <64 x i8> %a, %b
5227 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
5228 ; GENERIC-LABEL: xor_v64i8:
5230 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5231 ; GENERIC-NEXT: retq # sched: [1:1.00]
5233 ; SKX-LABEL: xor_v64i8:
5235 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5236 ; SKX-NEXT: retq # sched: [7:1.00]
5237 %res = xor <64 x i8> %a, %b
5241 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
5242 ; GENERIC-LABEL: and_v32i16:
5244 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5245 ; GENERIC-NEXT: retq # sched: [1:1.00]
5247 ; SKX-LABEL: and_v32i16:
5249 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5250 ; SKX-NEXT: retq # sched: [7:1.00]
5251 %res = and <32 x i16> %a, %b
5255 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
5256 ; GENERIC-LABEL: andn_v32i16:
5258 ; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
5259 ; GENERIC-NEXT: retq # sched: [1:1.00]
5261 ; SKX-LABEL: andn_v32i16:
5263 ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
5264 ; SKX-NEXT: retq # sched: [7:1.00]
5265 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
5266 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
5267 %res = and <32 x i16> %a, %b2
5271 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
5272 ; GENERIC-LABEL: or_v32i16:
5274 ; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5275 ; GENERIC-NEXT: retq # sched: [1:1.00]
5277 ; SKX-LABEL: or_v32i16:
5279 ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5280 ; SKX-NEXT: retq # sched: [7:1.00]
5281 %res = or <32 x i16> %a, %b
5285 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
5286 ; GENERIC-LABEL: xor_v32i16:
5288 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
5289 ; GENERIC-NEXT: retq # sched: [1:1.00]
5291 ; SKX-LABEL: xor_v32i16:
5293 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
5294 ; SKX-NEXT: retq # sched: [7:1.00]
5295 %res = xor <32 x i16> %a, %b
5299 define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5300 ; GENERIC-LABEL: masked_and_v16f32:
5302 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5303 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5304 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5305 ; GENERIC-NEXT: retq # sched: [1:1.00]
5307 ; SKX-LABEL: masked_and_v16f32:
5309 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5310 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5311 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5312 ; SKX-NEXT: retq # sched: [7:1.00]
5313 %a1 = bitcast <16 x float> %a to <16 x i32>
5314 %b1 = bitcast <16 x float> %b to <16 x i32>
5315 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5316 %mask1 = bitcast i16 %mask to <16 x i1>
5317 %op = and <16 x i32> %a1, %b1
5318 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5319 %cast = bitcast <16 x i32> %select to <16 x float>
5320 %add = fadd <16 x float> %c, %cast
5321 ret <16 x float> %add
5324 define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5325 ; GENERIC-LABEL: masked_or_v16f32:
5327 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5328 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5329 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5330 ; GENERIC-NEXT: retq # sched: [1:1.00]
5332 ; SKX-LABEL: masked_or_v16f32:
5334 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5335 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5336 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5337 ; SKX-NEXT: retq # sched: [7:1.00]
5338 %a1 = bitcast <16 x float> %a to <16 x i32>
5339 %b1 = bitcast <16 x float> %b to <16 x i32>
5340 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5341 %mask1 = bitcast i16 %mask to <16 x i1>
5342 %op = and <16 x i32> %a1, %b1
5343 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5344 %cast = bitcast <16 x i32> %select to <16 x float>
5345 %add = fadd <16 x float> %c, %cast
5346 ret <16 x float> %add
5349 define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
5350 ; GENERIC-LABEL: masked_xor_v16f32:
5352 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5353 ; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5354 ; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5355 ; GENERIC-NEXT: retq # sched: [1:1.00]
5357 ; SKX-LABEL: masked_xor_v16f32:
5359 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5360 ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5361 ; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5362 ; SKX-NEXT: retq # sched: [7:1.00]
5363 %a1 = bitcast <16 x float> %a to <16 x i32>
5364 %b1 = bitcast <16 x float> %b to <16 x i32>
5365 %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
5366 %mask1 = bitcast i16 %mask to <16 x i1>
5367 %op = and <16 x i32> %a1, %b1
5368 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
5369 %cast = bitcast <16 x i32> %select to <16 x float>
5370 %add = fadd <16 x float> %c, %cast
5371 ret <16 x float> %add
5374 define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5375 ; GENERIC-LABEL: masked_and_v8f64:
5377 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5378 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5379 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5380 ; GENERIC-NEXT: retq # sched: [1:1.00]
5382 ; SKX-LABEL: masked_and_v8f64:
5384 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5385 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5386 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5387 ; SKX-NEXT: retq # sched: [7:1.00]
5388 %a1 = bitcast <8 x double> %a to <8 x i64>
5389 %b1 = bitcast <8 x double> %b to <8 x i64>
5390 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5391 %mask1 = bitcast i8 %mask to <8 x i1>
5392 %op = and <8 x i64> %a1, %b1
5393 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5394 %cast = bitcast <8 x i64> %select to <8 x double>
5395 %add = fadd <8 x double> %c, %cast
5396 ret <8 x double> %add
5399 define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5400 ; GENERIC-LABEL: masked_or_v8f64:
5402 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5403 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5404 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5405 ; GENERIC-NEXT: retq # sched: [1:1.00]
5407 ; SKX-LABEL: masked_or_v8f64:
5409 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5410 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5411 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5412 ; SKX-NEXT: retq # sched: [7:1.00]
5413 %a1 = bitcast <8 x double> %a to <8 x i64>
5414 %b1 = bitcast <8 x double> %b to <8 x i64>
5415 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5416 %mask1 = bitcast i8 %mask to <8 x i1>
5417 %op = and <8 x i64> %a1, %b1
5418 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5419 %cast = bitcast <8 x i64> %select to <8 x double>
5420 %add = fadd <8 x double> %c, %cast
5421 ret <8 x double> %add
5424 define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
5425 ; GENERIC-LABEL: masked_xor_v8f64:
5427 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5428 ; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
5429 ; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
5430 ; GENERIC-NEXT: retq # sched: [1:1.00]
5432 ; SKX-LABEL: masked_xor_v8f64:
5434 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5435 ; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33]
5436 ; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33]
5437 ; SKX-NEXT: retq # sched: [7:1.00]
5438 %a1 = bitcast <8 x double> %a to <8 x i64>
5439 %b1 = bitcast <8 x double> %b to <8 x i64>
5440 %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
5441 %mask1 = bitcast i8 %mask to <8 x i1>
5442 %op = and <8 x i64> %a1, %b1
5443 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
5444 %cast = bitcast <8 x i64> %select to <8 x double>
5445 %add = fadd <8 x double> %c, %cast
5446 ret <8 x double> %add
5449 define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5450 ; GENERIC-LABEL: test_mm512_mask_and_epi32:
5451 ; GENERIC: # %bb.0: # %entry
5452 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5453 ; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5454 ; GENERIC-NEXT: retq # sched: [1:1.00]
5456 ; SKX-LABEL: test_mm512_mask_and_epi32:
5457 ; SKX: # %bb.0: # %entry
5458 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5459 ; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5460 ; SKX-NEXT: retq # sched: [7:1.00]
5462 %and1.i.i = and <8 x i64> %__a, %__b
5463 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
5464 %1 = bitcast <8 x i64> %__src to <16 x i32>
5465 %2 = bitcast i16 %__k to <16 x i1>
5466 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5467 %4 = bitcast <16 x i32> %3 to <8 x i64>
5471 define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5472 ; GENERIC-LABEL: test_mm512_mask_or_epi32:
5473 ; GENERIC: # %bb.0: # %entry
5474 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5475 ; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5476 ; GENERIC-NEXT: retq # sched: [1:1.00]
5478 ; SKX-LABEL: test_mm512_mask_or_epi32:
5479 ; SKX: # %bb.0: # %entry
5480 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5481 ; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5482 ; SKX-NEXT: retq # sched: [7:1.00]
5484 %or1.i.i = or <8 x i64> %__a, %__b
5485 %0 = bitcast <8 x i64> %or1.i.i to <16 x i32>
5486 %1 = bitcast <8 x i64> %__src to <16 x i32>
5487 %2 = bitcast i16 %__k to <16 x i1>
5488 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5489 %4 = bitcast <16 x i32> %3 to <8 x i64>
5493 define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
5494 ; GENERIC-LABEL: test_mm512_mask_xor_epi32:
5495 ; GENERIC: # %bb.0: # %entry
5496 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5497 ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5498 ; GENERIC-NEXT: retq # sched: [1:1.00]
5500 ; SKX-LABEL: test_mm512_mask_xor_epi32:
5501 ; SKX: # %bb.0: # %entry
5502 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5503 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5504 ; SKX-NEXT: retq # sched: [7:1.00]
5506 %xor1.i.i = xor <8 x i64> %__a, %__b
5507 %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32>
5508 %1 = bitcast <8 x i64> %__src to <16 x i32>
5509 %2 = bitcast i16 %__k to <16 x i1>
5510 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
5511 %4 = bitcast <16 x i32> %3 to <8 x i64>
5515 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5516 ; GENERIC-LABEL: test_mm512_mask_xor_pd:
5517 ; GENERIC: # %bb.0: # %entry
5518 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5519 ; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5520 ; GENERIC-NEXT: retq # sched: [1:1.00]
5522 ; SKX-LABEL: test_mm512_mask_xor_pd:
5523 ; SKX: # %bb.0: # %entry
5524 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5525 ; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5526 ; SKX-NEXT: retq # sched: [7:1.00]
5528 %0 = bitcast <8 x double> %__A to <8 x i64>
5529 %1 = bitcast <8 x double> %__B to <8 x i64>
5530 %xor.i.i = xor <8 x i64> %0, %1
5531 %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
5532 %3 = bitcast i8 %__U to <8 x i1>
5533 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5537 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5538 ; GENERIC-LABEL: test_mm512_maskz_xor_pd:
5539 ; GENERIC: # %bb.0: # %entry
5540 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5541 ; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
5542 ; GENERIC-NEXT: retq # sched: [1:1.00]
5544 ; SKX-LABEL: test_mm512_maskz_xor_pd:
5545 ; SKX: # %bb.0: # %entry
5546 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5547 ; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
5548 ; SKX-NEXT: retq # sched: [7:1.00]
5550 %0 = bitcast <8 x double> %__A to <8 x i64>
5551 %1 = bitcast <8 x double> %__B to <8 x i64>
5552 %xor.i.i = xor <8 x i64> %0, %1
5553 %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
5554 %3 = bitcast i8 %__U to <8 x i1>
5555 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5559 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5560 ; GENERIC-LABEL: test_mm512_mask_xor_ps:
5561 ; GENERIC: # %bb.0: # %entry
5562 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5563 ; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5564 ; GENERIC-NEXT: retq # sched: [1:1.00]
5566 ; SKX-LABEL: test_mm512_mask_xor_ps:
5567 ; SKX: # %bb.0: # %entry
5568 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5569 ; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5570 ; SKX-NEXT: retq # sched: [7:1.00]
5572 %0 = bitcast <16 x float> %__A to <16 x i32>
5573 %1 = bitcast <16 x float> %__B to <16 x i32>
5574 %xor.i.i = xor <16 x i32> %0, %1
5575 %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
5576 %3 = bitcast i16 %__U to <16 x i1>
5577 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5581 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5582 ; GENERIC-LABEL: test_mm512_maskz_xor_ps:
5583 ; GENERIC: # %bb.0: # %entry
5584 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5585 ; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
5586 ; GENERIC-NEXT: retq # sched: [1:1.00]
5588 ; SKX-LABEL: test_mm512_maskz_xor_ps:
5589 ; SKX: # %bb.0: # %entry
5590 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5591 ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
5592 ; SKX-NEXT: retq # sched: [7:1.00]
5594 %0 = bitcast <16 x float> %__A to <16 x i32>
5595 %1 = bitcast <16 x float> %__B to <16 x i32>
5596 %xor.i.i = xor <16 x i32> %0, %1
5597 %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
5598 %3 = bitcast i16 %__U to <16 x i1>
5599 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5603 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5604 ; GENERIC-LABEL: test_mm512_mask_or_pd:
5605 ; GENERIC: # %bb.0: # %entry
5606 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5607 ; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
5608 ; GENERIC-NEXT: retq # sched: [1:1.00]
5610 ; SKX-LABEL: test_mm512_mask_or_pd:
5611 ; SKX: # %bb.0: # %entry
5612 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5613 ; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
5614 ; SKX-NEXT: retq # sched: [7:1.00]
5616 %0 = bitcast <8 x double> %__A to <8 x i64>
5617 %1 = bitcast <8 x double> %__B to <8 x i64>
5618 %or.i.i = or <8 x i64> %1, %0
5619 %2 = bitcast <8 x i64> %or.i.i to <8 x double>
5620 %3 = bitcast i8 %__U to <8 x i1>
5621 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5625 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5626 ; GENERIC-LABEL: test_mm512_maskz_or_pd:
5627 ; GENERIC: # %bb.0: # %entry
5628 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5629 ; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
5630 ; GENERIC-NEXT: retq # sched: [1:1.00]
5632 ; SKX-LABEL: test_mm512_maskz_or_pd:
5633 ; SKX: # %bb.0: # %entry
5634 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5635 ; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
5636 ; SKX-NEXT: retq # sched: [7:1.00]
5638 %0 = bitcast <8 x double> %__A to <8 x i64>
5639 %1 = bitcast <8 x double> %__B to <8 x i64>
5640 %or.i.i = or <8 x i64> %1, %0
5641 %2 = bitcast <8 x i64> %or.i.i to <8 x double>
5642 %3 = bitcast i8 %__U to <8 x i1>
5643 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5647 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5648 ; GENERIC-LABEL: test_mm512_mask_or_ps:
5649 ; GENERIC: # %bb.0: # %entry
5650 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5651 ; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
5652 ; GENERIC-NEXT: retq # sched: [1:1.00]
5654 ; SKX-LABEL: test_mm512_mask_or_ps:
5655 ; SKX: # %bb.0: # %entry
5656 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5657 ; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
5658 ; SKX-NEXT: retq # sched: [7:1.00]
5660 %0 = bitcast <16 x float> %__A to <16 x i32>
5661 %1 = bitcast <16 x float> %__B to <16 x i32>
5662 %or.i.i = or <16 x i32> %1, %0
5663 %2 = bitcast <16 x i32> %or.i.i to <16 x float>
5664 %3 = bitcast i16 %__U to <16 x i1>
5665 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5669 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5670 ; GENERIC-LABEL: test_mm512_maskz_or_ps:
5671 ; GENERIC: # %bb.0: # %entry
5672 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5673 ; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
5674 ; GENERIC-NEXT: retq # sched: [1:1.00]
5676 ; SKX-LABEL: test_mm512_maskz_or_ps:
5677 ; SKX: # %bb.0: # %entry
5678 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5679 ; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
5680 ; SKX-NEXT: retq # sched: [7:1.00]
5682 %0 = bitcast <16 x float> %__A to <16 x i32>
5683 %1 = bitcast <16 x float> %__B to <16 x i32>
5684 %or.i.i = or <16 x i32> %1, %0
5685 %2 = bitcast <16 x i32> %or.i.i to <16 x float>
5686 %3 = bitcast i16 %__U to <16 x i1>
5687 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5691 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5692 ; GENERIC-LABEL: test_mm512_mask_and_pd:
5693 ; GENERIC: # %bb.0: # %entry
5694 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5695 ; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
5696 ; GENERIC-NEXT: retq # sched: [1:1.00]
5698 ; SKX-LABEL: test_mm512_mask_and_pd:
5699 ; SKX: # %bb.0: # %entry
5700 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5701 ; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
5702 ; SKX-NEXT: retq # sched: [7:1.00]
5704 %0 = bitcast <8 x double> %__A to <8 x i64>
5705 %1 = bitcast <8 x double> %__B to <8 x i64>
5706 %and.i.i = and <8 x i64> %1, %0
5707 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5708 %3 = bitcast i8 %__U to <8 x i1>
5709 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5713 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5714 ; GENERIC-LABEL: test_mm512_maskz_and_pd:
5715 ; GENERIC: # %bb.0: # %entry
5716 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5717 ; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
5718 ; GENERIC-NEXT: retq # sched: [1:1.00]
5720 ; SKX-LABEL: test_mm512_maskz_and_pd:
5721 ; SKX: # %bb.0: # %entry
5722 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5723 ; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
5724 ; SKX-NEXT: retq # sched: [7:1.00]
5726 %0 = bitcast <8 x double> %__A to <8 x i64>
5727 %1 = bitcast <8 x double> %__B to <8 x i64>
5728 %and.i.i = and <8 x i64> %1, %0
5729 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5730 %3 = bitcast i8 %__U to <8 x i1>
5731 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5735 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5736 ; GENERIC-LABEL: test_mm512_mask_and_ps:
5737 ; GENERIC: # %bb.0: # %entry
5738 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5739 ; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
5740 ; GENERIC-NEXT: retq # sched: [1:1.00]
5742 ; SKX-LABEL: test_mm512_mask_and_ps:
5743 ; SKX: # %bb.0: # %entry
5744 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5745 ; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33]
5746 ; SKX-NEXT: retq # sched: [7:1.00]
5748 %0 = bitcast <16 x float> %__A to <16 x i32>
5749 %1 = bitcast <16 x float> %__B to <16 x i32>
5750 %and.i.i = and <16 x i32> %1, %0
5751 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5752 %3 = bitcast i16 %__U to <16 x i1>
5753 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5757 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5758 ; GENERIC-LABEL: test_mm512_maskz_and_ps:
5759 ; GENERIC: # %bb.0: # %entry
5760 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5761 ; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
5762 ; GENERIC-NEXT: retq # sched: [1:1.00]
5764 ; SKX-LABEL: test_mm512_maskz_and_ps:
5765 ; SKX: # %bb.0: # %entry
5766 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5767 ; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
5768 ; SKX-NEXT: retq # sched: [7:1.00]
5770 %0 = bitcast <16 x float> %__A to <16 x i32>
5771 %1 = bitcast <16 x float> %__B to <16 x i32>
5772 %and.i.i = and <16 x i32> %1, %0
5773 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5774 %3 = bitcast i16 %__U to <16 x i1>
5775 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5779 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5780 ; GENERIC-LABEL: test_mm512_mask_andnot_pd:
5781 ; GENERIC: # %bb.0: # %entry
5782 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5783 ; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5784 ; GENERIC-NEXT: retq # sched: [1:1.00]
5786 ; SKX-LABEL: test_mm512_mask_andnot_pd:
5787 ; SKX: # %bb.0: # %entry
5788 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5789 ; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5790 ; SKX-NEXT: retq # sched: [7:1.00]
5792 %0 = bitcast <8 x double> %__A to <8 x i64>
5793 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5794 %1 = bitcast <8 x double> %__B to <8 x i64>
5795 %and.i.i = and <8 x i64> %1, %neg.i.i
5796 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5797 %3 = bitcast i8 %__U to <8 x i1>
5798 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
5802 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
5803 ; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
5804 ; GENERIC: # %bb.0: # %entry
5805 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5806 ; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
5807 ; GENERIC-NEXT: retq # sched: [1:1.00]
5809 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
5810 ; SKX: # %bb.0: # %entry
5811 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5812 ; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
5813 ; SKX-NEXT: retq # sched: [7:1.00]
5815 %0 = bitcast <8 x double> %__A to <8 x i64>
5816 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
5817 %1 = bitcast <8 x double> %__B to <8 x i64>
5818 %and.i.i = and <8 x i64> %1, %neg.i.i
5819 %2 = bitcast <8 x i64> %and.i.i to <8 x double>
5820 %3 = bitcast i8 %__U to <8 x i1>
5821 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
5825 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5826 ; GENERIC-LABEL: test_mm512_mask_andnot_ps:
5827 ; GENERIC: # %bb.0: # %entry
5828 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5829 ; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
5830 ; GENERIC-NEXT: retq # sched: [1:1.00]
5832 ; SKX-LABEL: test_mm512_mask_andnot_ps:
5833 ; SKX: # %bb.0: # %entry
5834 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5835 ; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33]
5836 ; SKX-NEXT: retq # sched: [7:1.00]
5838 %0 = bitcast <16 x float> %__A to <16 x i32>
5839 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5840 %1 = bitcast <16 x float> %__B to <16 x i32>
5841 %and.i.i = and <16 x i32> %1, %neg.i.i
5842 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5843 %3 = bitcast i16 %__U to <16 x i1>
5844 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
5848 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
5849 ; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
5850 ; GENERIC: # %bb.0: # %entry
5851 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
5852 ; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
5853 ; GENERIC-NEXT: retq # sched: [1:1.00]
5855 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
5856 ; SKX: # %bb.0: # %entry
5857 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
5858 ; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
5859 ; SKX-NEXT: retq # sched: [7:1.00]
5861 %0 = bitcast <16 x float> %__A to <16 x i32>
5862 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
5863 %1 = bitcast <16 x float> %__B to <16 x i32>
5864 %and.i.i = and <16 x i32> %1, %neg.i.i
5865 %2 = bitcast <16 x i32> %and.i.i to <16 x float>
5866 %3 = bitcast i16 %__U to <16 x i1>
5867 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
5871 define i32 @mov_test1(float %x) {
5872 ; GENERIC-LABEL: mov_test1:
5874 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [1:0.33]
5875 ; GENERIC-NEXT: retq # sched: [1:1.00]
5877 ; SKX-LABEL: mov_test1:
5879 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [1:0.25]
5880 ; SKX-NEXT: retq # sched: [7:1.00]
5881 %res = bitcast float %x to i32
5885 define <4 x i32> @mov_test2(i32 %x) {
5886 ; GENERIC-LABEL: mov_test2:
5888 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
5889 ; GENERIC-NEXT: retq # sched: [1:1.00]
5891 ; SKX-LABEL: mov_test2:
5893 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
5894 ; SKX-NEXT: retq # sched: [7:1.00]
5895 %res = insertelement <4 x i32>undef, i32 %x, i32 0
5899 define <2 x i64> @mov_test3(i64 %x) {
5900 ; GENERIC-LABEL: mov_test3:
5902 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
5903 ; GENERIC-NEXT: retq # sched: [1:1.00]
5905 ; SKX-LABEL: mov_test3:
5907 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
5908 ; SKX-NEXT: retq # sched: [7:1.00]
5909 %res = insertelement <2 x i64>undef, i64 %x, i32 0
5913 define <4 x i32> @mov_test4(i32* %x) {
5914 ; GENERIC-LABEL: mov_test4:
5916 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
5917 ; GENERIC-NEXT: retq # sched: [1:1.00]
5919 ; SKX-LABEL: mov_test4:
5921 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
5922 ; SKX-NEXT: retq # sched: [7:1.00]
5923 %y = load i32, i32* %x
5924 %res = insertelement <4 x i32>undef, i32 %y, i32 0
5928 define void @mov_test5(float %x, float* %y) {
5929 ; GENERIC-LABEL: mov_test5:
5931 ; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [5:1.00]
5932 ; GENERIC-NEXT: retq # sched: [1:1.00]
5934 ; SKX-LABEL: mov_test5:
5936 ; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00]
5937 ; SKX-NEXT: retq # sched: [7:1.00]
5938 store float %x, float* %y, align 4
5942 define void @mov_test6(double %x, double* %y) {
5943 ; GENERIC-LABEL: mov_test6:
5945 ; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [5:1.00]
5946 ; GENERIC-NEXT: retq # sched: [1:1.00]
5948 ; SKX-LABEL: mov_test6:
5950 ; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00]
5951 ; SKX-NEXT: retq # sched: [7:1.00]
5952 store double %x, double* %y, align 8
5956 define float @mov_test7(i32* %x) {
5957 ; GENERIC-LABEL: mov_test7:
5959 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
5960 ; GENERIC-NEXT: retq # sched: [1:1.00]
5962 ; SKX-LABEL: mov_test7:
5964 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
5965 ; SKX-NEXT: retq # sched: [7:1.00]
5966 %y = load i32, i32* %x
5967 %res = bitcast i32 %y to float
5971 define i32 @mov_test8(<4 x i32> %x) {
5972 ; GENERIC-LABEL: mov_test8:
5974 ; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
5975 ; GENERIC-NEXT: retq # sched: [1:1.00]
5977 ; SKX-LABEL: mov_test8:
5979 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
5980 ; SKX-NEXT: retq # sched: [7:1.00]
5981 %res = extractelement <4 x i32> %x, i32 0
5985 define i64 @mov_test9(<2 x i64> %x) {
5986 ; GENERIC-LABEL: mov_test9:
5988 ; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
5989 ; GENERIC-NEXT: retq # sched: [1:1.00]
5991 ; SKX-LABEL: mov_test9:
5993 ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
5994 ; SKX-NEXT: retq # sched: [7:1.00]
5995 %res = extractelement <2 x i64> %x, i32 0
5999 define <4 x i32> @mov_test10(i32* %x) {
6000 ; GENERIC-LABEL: mov_test10:
6002 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6003 ; GENERIC-NEXT: retq # sched: [1:1.00]
6005 ; SKX-LABEL: mov_test10:
6007 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6008 ; SKX-NEXT: retq # sched: [7:1.00]
6009 %y = load i32, i32* %x, align 4
6010 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
6014 define <4 x float> @mov_test11(float* %x) {
6015 ; GENERIC-LABEL: mov_test11:
6017 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6018 ; GENERIC-NEXT: retq # sched: [1:1.00]
6020 ; SKX-LABEL: mov_test11:
6022 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6023 ; SKX-NEXT: retq # sched: [7:1.00]
6024 %y = load float, float* %x, align 4
6025 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
6029 define <2 x double> @mov_test12(double* %x) {
6030 ; GENERIC-LABEL: mov_test12:
6032 ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
6033 ; GENERIC-NEXT: retq # sched: [1:1.00]
6035 ; SKX-LABEL: mov_test12:
6037 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
6038 ; SKX-NEXT: retq # sched: [7:1.00]
6039 %y = load double, double* %x, align 8
6040 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
6041 ret <2 x double>%res
6044 define <2 x i64> @mov_test13(i64 %x) {
6045 ; GENERIC-LABEL: mov_test13:
6047 ; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
6048 ; GENERIC-NEXT: retq # sched: [1:1.00]
6050 ; SKX-LABEL: mov_test13:
6052 ; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
6053 ; SKX-NEXT: retq # sched: [7:1.00]
6054 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
6058 define <4 x i32> @mov_test14(i32 %x) {
6059 ; GENERIC-LABEL: mov_test14:
6061 ; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
6062 ; GENERIC-NEXT: retq # sched: [1:1.00]
6064 ; SKX-LABEL: mov_test14:
6066 ; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
6067 ; SKX-NEXT: retq # sched: [7:1.00]
6068 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
6072 define <4 x i32> @mov_test15(i32* %x) {
6073 ; GENERIC-LABEL: mov_test15:
6075 ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
6076 ; GENERIC-NEXT: retq # sched: [1:1.00]
6078 ; SKX-LABEL: mov_test15:
6080 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
6081 ; SKX-NEXT: retq # sched: [7:1.00]
6082 %y = load i32, i32* %x, align 4
6083 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
6087 define <16 x i32> @mov_test16(i8 * %addr) {
6088 ; GENERIC-LABEL: mov_test16:
6090 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [4:0.50]
6091 ; GENERIC-NEXT: retq # sched: [1:1.00]
6093 ; SKX-LABEL: mov_test16:
6095 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
6096 ; SKX-NEXT: retq # sched: [7:1.00]
6097 %vaddr = bitcast i8* %addr to <16 x i32>*
6098 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
6102 define <16 x i32> @mov_test17(i8 * %addr) {
6103 ; GENERIC-LABEL: mov_test17:
6105 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [4:0.50]
6106 ; GENERIC-NEXT: retq # sched: [1:1.00]
6108 ; SKX-LABEL: mov_test17:
6110 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6111 ; SKX-NEXT: retq # sched: [7:1.00]
6112 %vaddr = bitcast i8* %addr to <16 x i32>*
6113 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
6117 define void @mov_test18(i8 * %addr, <8 x i64> %data) {
6118 ; GENERIC-LABEL: mov_test18:
6120 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6121 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6122 ; GENERIC-NEXT: retq # sched: [1:1.00]
6124 ; SKX-LABEL: mov_test18:
6126 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6127 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6128 ; SKX-NEXT: retq # sched: [7:1.00]
6129 %vaddr = bitcast i8* %addr to <8 x i64>*
6130 store <8 x i64>%data, <8 x i64>* %vaddr, align 64
6134 define void @mov_test19(i8 * %addr, <16 x i32> %data) {
6135 ; GENERIC-LABEL: mov_test19:
6137 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6138 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6139 ; GENERIC-NEXT: retq # sched: [1:1.00]
6141 ; SKX-LABEL: mov_test19:
6143 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6144 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6145 ; SKX-NEXT: retq # sched: [7:1.00]
6146 %vaddr = bitcast i8* %addr to <16 x i32>*
6147 store <16 x i32>%data, <16 x i32>* %vaddr, align 1
6151 define void @mov_test20(i8 * %addr, <16 x i32> %data) {
6152 ; GENERIC-LABEL: mov_test20:
6154 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6155 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6156 ; GENERIC-NEXT: retq # sched: [1:1.00]
6158 ; SKX-LABEL: mov_test20:
6160 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6161 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6162 ; SKX-NEXT: retq # sched: [7:1.00]
6163 %vaddr = bitcast i8* %addr to <16 x i32>*
6164 store <16 x i32>%data, <16 x i32>* %vaddr, align 64
6168 define <8 x i64> @mov_test21(i8 * %addr) {
6169 ; GENERIC-LABEL: mov_test21:
6171 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [4:0.50]
6172 ; GENERIC-NEXT: retq # sched: [1:1.00]
6174 ; SKX-LABEL: mov_test21:
6176 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6177 ; SKX-NEXT: retq # sched: [7:1.00]
6178 %vaddr = bitcast i8* %addr to <8 x i64>*
6179 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
6183 define void @mov_test22(i8 * %addr, <8 x i64> %data) {
6184 ; GENERIC-LABEL: mov_test22:
6186 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6187 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6188 ; GENERIC-NEXT: retq # sched: [1:1.00]
6190 ; SKX-LABEL: mov_test22:
6192 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6193 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6194 ; SKX-NEXT: retq # sched: [7:1.00]
6195 %vaddr = bitcast i8* %addr to <8 x i64>*
6196 store <8 x i64>%data, <8 x i64>* %vaddr, align 1
6200 define <8 x i64> @mov_test23(i8 * %addr) {
6201 ; GENERIC-LABEL: mov_test23:
6203 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [4:0.50]
6204 ; GENERIC-NEXT: retq # sched: [1:1.00]
6206 ; SKX-LABEL: mov_test23:
6208 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
6209 ; SKX-NEXT: retq # sched: [7:1.00]
6210 %vaddr = bitcast i8* %addr to <8 x i64>*
6211 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
6215 define void @mov_test24(i8 * %addr, <8 x double> %data) {
6216 ; GENERIC-LABEL: mov_test24:
6218 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6219 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6220 ; GENERIC-NEXT: retq # sched: [1:1.00]
6222 ; SKX-LABEL: mov_test24:
6224 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6225 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6226 ; SKX-NEXT: retq # sched: [7:1.00]
6227 %vaddr = bitcast i8* %addr to <8 x double>*
6228 store <8 x double>%data, <8 x double>* %vaddr, align 64
6232 define <8 x double> @mov_test25(i8 * %addr) {
6233 ; GENERIC-LABEL: mov_test25:
6235 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [4:0.50]
6236 ; GENERIC-NEXT: retq # sched: [1:1.00]
6238 ; SKX-LABEL: mov_test25:
6240 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6241 ; SKX-NEXT: retq # sched: [7:1.00]
6242 %vaddr = bitcast i8* %addr to <8 x double>*
6243 %res = load <8 x double>, <8 x double>* %vaddr, align 64
6244 ret <8 x double>%res
6247 define void @mov_test26(i8 * %addr, <16 x float> %data) {
6248 ; GENERIC-LABEL: mov_test26:
6250 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6251 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6252 ; GENERIC-NEXT: retq # sched: [1:1.00]
6254 ; SKX-LABEL: mov_test26:
6256 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
6257 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6258 ; SKX-NEXT: retq # sched: [7:1.00]
6259 %vaddr = bitcast i8* %addr to <16 x float>*
6260 store <16 x float>%data, <16 x float>* %vaddr, align 64
6264 define <16 x float> @mov_test27(i8 * %addr) {
6265 ; GENERIC-LABEL: mov_test27:
6267 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [4:0.50]
6268 ; GENERIC-NEXT: retq # sched: [1:1.00]
6270 ; SKX-LABEL: mov_test27:
6272 ; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
6273 ; SKX-NEXT: retq # sched: [7:1.00]
6274 %vaddr = bitcast i8* %addr to <16 x float>*
6275 %res = load <16 x float>, <16 x float>* %vaddr, align 64
6276 ret <16 x float>%res
6279 define void @mov_test28(i8 * %addr, <8 x double> %data) {
6280 ; GENERIC-LABEL: mov_test28:
6282 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6283 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6284 ; GENERIC-NEXT: retq # sched: [1:1.00]
6286 ; SKX-LABEL: mov_test28:
6288 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6289 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6290 ; SKX-NEXT: retq # sched: [7:1.00]
6291 %vaddr = bitcast i8* %addr to <8 x double>*
6292 store <8 x double>%data, <8 x double>* %vaddr, align 1
6296 define <8 x double> @mov_test29(i8 * %addr) {
6297 ; GENERIC-LABEL: mov_test29:
6299 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [4:0.50]
6300 ; GENERIC-NEXT: retq # sched: [1:1.00]
6302 ; SKX-LABEL: mov_test29:
6304 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
6305 ; SKX-NEXT: retq # sched: [7:1.00]
6306 %vaddr = bitcast i8* %addr to <8 x double>*
6307 %res = load <8 x double>, <8 x double>* %vaddr, align 1
6308 ret <8 x double>%res
6311 define void @mov_test30(i8 * %addr, <16 x float> %data) {
6312 ; GENERIC-LABEL: mov_test30:
6314 ; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6315 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6316 ; GENERIC-NEXT: retq # sched: [1:1.00]
6318 ; SKX-LABEL: mov_test30:
6320 ; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
6321 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6322 ; SKX-NEXT: retq # sched: [7:1.00]
6323 %vaddr = bitcast i8* %addr to <16 x float>*
6324 store <16 x float>%data, <16 x float>* %vaddr, align 1
6328 define <16 x float> @mov_test31(i8 * %addr) {
6329 ; GENERIC-LABEL: mov_test31:
6331 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [4:0.50]
6332 ; GENERIC-NEXT: retq # sched: [1:1.00]
6334 ; SKX-LABEL: mov_test31:
6336 ; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
6337 ; SKX-NEXT: retq # sched: [7:1.00]
6338 %vaddr = bitcast i8* %addr to <16 x float>*
6339 %res = load <16 x float>, <16 x float>* %vaddr, align 1
6340 ret <16 x float>%res
6343 define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
6344 ; GENERIC-LABEL: mov_test32:
6346 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
6347 ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6348 ; GENERIC-NEXT: retq # sched: [1:1.00]
6350 ; SKX-LABEL: mov_test32:
6352 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
6353 ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6354 ; SKX-NEXT: retq # sched: [7:1.00]
6355 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6356 %vaddr = bitcast i8* %addr to <16 x i32>*
6357 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
6358 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
6362 define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
6363 ; GENERIC-LABEL: mov_test33:
6365 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
6366 ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6367 ; GENERIC-NEXT: retq # sched: [1:1.00]
6369 ; SKX-LABEL: mov_test33:
6371 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
6372 ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6373 ; SKX-NEXT: retq # sched: [7:1.00]
6374 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6375 %vaddr = bitcast i8* %addr to <16 x i32>*
6376 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
6377 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
6381 define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) {
6382 ; GENERIC-LABEL: mov_test34:
6384 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
6385 ; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6386 ; GENERIC-NEXT: retq # sched: [1:1.00]
6388 ; SKX-LABEL: mov_test34:
6390 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
6391 ; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6392 ; SKX-NEXT: retq # sched: [7:1.00]
6393 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6394 %vaddr = bitcast i8* %addr to <16 x i32>*
6395 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
6396 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
6400 define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) {
6401 ; GENERIC-LABEL: mov_test35:
6403 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
6404 ; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6405 ; GENERIC-NEXT: retq # sched: [1:1.00]
6407 ; SKX-LABEL: mov_test35:
6409 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
6410 ; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6411 ; SKX-NEXT: retq # sched: [7:1.00]
6412 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
6413 %vaddr = bitcast i8* %addr to <16 x i32>*
6414 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
6415 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
6419 define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
6420 ; GENERIC-LABEL: mov_test36:
6422 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
6423 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6424 ; GENERIC-NEXT: retq # sched: [1:1.00]
6426 ; SKX-LABEL: mov_test36:
6428 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
6429 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6430 ; SKX-NEXT: retq # sched: [7:1.00]
6431 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6432 %vaddr = bitcast i8* %addr to <8 x i64>*
6433 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
6434 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
6438 define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
6439 ; GENERIC-LABEL: mov_test37:
6441 ; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
6442 ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6443 ; GENERIC-NEXT: retq # sched: [1:1.00]
6445 ; SKX-LABEL: mov_test37:
6447 ; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
6448 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6449 ; SKX-NEXT: retq # sched: [7:1.00]
6450 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6451 %vaddr = bitcast i8* %addr to <8 x i64>*
6452 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
6453 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
6457 define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) {
6458 ; GENERIC-LABEL: mov_test38:
6460 ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
6461 ; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6462 ; GENERIC-NEXT: retq # sched: [1:1.00]
6464 ; SKX-LABEL: mov_test38:
6466 ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
6467 ; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6468 ; SKX-NEXT: retq # sched: [7:1.00]
6469 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6470 %vaddr = bitcast i8* %addr to <8 x i64>*
6471 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
6472 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
6476 define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) {
6477 ; GENERIC-LABEL: mov_test39:
6479 ; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
6480 ; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6481 ; GENERIC-NEXT: retq # sched: [1:1.00]
6483 ; SKX-LABEL: mov_test39:
6485 ; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
6486 ; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6487 ; SKX-NEXT: retq # sched: [7:1.00]
6488 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
6489 %vaddr = bitcast i8* %addr to <8 x i64>*
6490 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
6491 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
6495 define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
6496 ; GENERIC-LABEL: mov_test40:
6498 ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6499 ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6500 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6501 ; GENERIC-NEXT: retq # sched: [1:1.00]
6503 ; SKX-LABEL: mov_test40:
6505 ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6506 ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6507 ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6508 ; SKX-NEXT: retq # sched: [7:1.00]
6509 %mask = fcmp one <16 x float> %mask1, zeroinitializer
6510 %vaddr = bitcast i8* %addr to <16 x float>*
6511 %r = load <16 x float>, <16 x float>* %vaddr, align 64
6512 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
6513 ret <16 x float>%res
6516 define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
6517 ; GENERIC-LABEL: mov_test41:
6519 ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6520 ; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6521 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6522 ; GENERIC-NEXT: retq # sched: [1:1.00]
6524 ; SKX-LABEL: mov_test41:
6526 ; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6527 ; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
6528 ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6529 ; SKX-NEXT: retq # sched: [7:1.00]
6530 %mask = fcmp one <16 x float> %mask1, zeroinitializer
6531 %vaddr = bitcast i8* %addr to <16 x float>*
6532 %r = load <16 x float>, <16 x float>* %vaddr, align 1
6533 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
6534 ret <16 x float>%res
6537 define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) {
6538 ; GENERIC-LABEL: mov_test42:
6540 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6541 ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6542 ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6543 ; GENERIC-NEXT: retq # sched: [1:1.00]
6545 ; SKX-LABEL: mov_test42:
6547 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6548 ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6549 ; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6550 ; SKX-NEXT: retq # sched: [7:1.00]
6551 %mask = fcmp one <16 x float> %mask1, zeroinitializer
6552 %vaddr = bitcast i8* %addr to <16 x float>*
6553 %r = load <16 x float>, <16 x float>* %vaddr, align 64
6554 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
6555 ret <16 x float>%res
6558 define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) {
6559 ; GENERIC-LABEL: mov_test43:
6561 ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6562 ; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6563 ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6564 ; GENERIC-NEXT: retq # sched: [1:1.00]
6566 ; SKX-LABEL: mov_test43:
6568 ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6569 ; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
6570 ; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6571 ; SKX-NEXT: retq # sched: [7:1.00]
6572 %mask = fcmp one <16 x float> %mask1, zeroinitializer
6573 %vaddr = bitcast i8* %addr to <16 x float>*
6574 %r = load <16 x float>, <16 x float>* %vaddr, align 1
6575 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
6576 ret <16 x float>%res
6579 define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
6580 ; GENERIC-LABEL: mov_test44:
6582 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6583 ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6584 ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6585 ; GENERIC-NEXT: retq # sched: [1:1.00]
6587 ; SKX-LABEL: mov_test44:
6589 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6590 ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6591 ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6592 ; SKX-NEXT: retq # sched: [7:1.00]
6593 %mask = fcmp one <8 x double> %mask1, zeroinitializer
6594 %vaddr = bitcast i8* %addr to <8 x double>*
6595 %r = load <8 x double>, <8 x double>* %vaddr, align 64
6596 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
6597 ret <8 x double>%res
6600 define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
6601 ; GENERIC-LABEL: mov_test45:
6603 ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
6604 ; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6605 ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [4:0.50]
6606 ; GENERIC-NEXT: retq # sched: [1:1.00]
6608 ; SKX-LABEL: mov_test45:
6610 ; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
6611 ; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
6612 ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
6613 ; SKX-NEXT: retq # sched: [7:1.00]
6614 %mask = fcmp one <8 x double> %mask1, zeroinitializer
6615 %vaddr = bitcast i8* %addr to <8 x double>*
6616 %r = load <8 x double>, <8 x double>* %vaddr, align 1
6617 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
6618 ret <8 x double>%res
6621 define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) {
6622 ; GENERIC-LABEL: mov_test46:
6624 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6625 ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6626 ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6627 ; GENERIC-NEXT: retq # sched: [1:1.00]
6629 ; SKX-LABEL: mov_test46:
6631 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6632 ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6633 ; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6634 ; SKX-NEXT: retq # sched: [7:1.00]
6635 %mask = fcmp one <8 x double> %mask1, zeroinitializer
6636 %vaddr = bitcast i8* %addr to <8 x double>*
6637 %r = load <8 x double>, <8 x double>* %vaddr, align 64
6638 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
6639 ret <8 x double>%res
6642 define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) {
6643 ; GENERIC-LABEL: mov_test47:
6645 ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
6646 ; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6647 ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
6648 ; GENERIC-NEXT: retq # sched: [1:1.00]
6650 ; SKX-LABEL: mov_test47:
6652 ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
6653 ; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
6654 ; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
6655 ; SKX-NEXT: retq # sched: [7:1.00]
6656 %mask = fcmp one <8 x double> %mask1, zeroinitializer
6657 %vaddr = bitcast i8* %addr to <8 x double>*
6658 %r = load <8 x double>, <8 x double>* %vaddr, align 1
6659 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
6660 ret <8 x double>%res
6663 define i16 @mask16(i16 %x) {
6664 ; GENERIC-LABEL: mask16:
6666 ; GENERIC-NEXT: notl %edi # sched: [1:0.33]
6667 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
6668 ; GENERIC-NEXT: retq # sched: [1:1.00]
6670 ; SKX-LABEL: mask16:
6672 ; SKX-NEXT: notl %edi # sched: [1:0.25]
6673 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
6674 ; SKX-NEXT: retq # sched: [7:1.00]
6675 %m0 = bitcast i16 %x to <16 x i1>
6676 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6677 %ret = bitcast <16 x i1> %m1 to i16
6681 define i32 @mask16_zext(i16 %x) {
6682 ; GENERIC-LABEL: mask16_zext:
6684 ; GENERIC-NEXT: notl %edi # sched: [1:0.33]
6685 ; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33]
6686 ; GENERIC-NEXT: retq # sched: [1:1.00]
6688 ; SKX-LABEL: mask16_zext:
6690 ; SKX-NEXT: notl %edi # sched: [1:0.25]
6691 ; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25]
6692 ; SKX-NEXT: retq # sched: [7:1.00]
6693 %m0 = bitcast i16 %x to <16 x i1>
6694 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6695 %m2 = bitcast <16 x i1> %m1 to i16
6696 %ret = zext i16 %m2 to i32
6700 define i8 @mask8(i8 %x) {
6701 ; GENERIC-LABEL: mask8:
6703 ; GENERIC-NEXT: notb %dil # sched: [1:0.33]
6704 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
6705 ; GENERIC-NEXT: retq # sched: [1:1.00]
6709 ; SKX-NEXT: notb %dil # sched: [1:0.25]
6710 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
6711 ; SKX-NEXT: retq # sched: [7:1.00]
6712 %m0 = bitcast i8 %x to <8 x i1>
6713 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6714 %ret = bitcast <8 x i1> %m1 to i8
6718 define i32 @mask8_zext(i8 %x) {
6719 ; GENERIC-LABEL: mask8_zext:
6721 ; GENERIC-NEXT: notb %dil # sched: [1:0.33]
6722 ; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33]
6723 ; GENERIC-NEXT: retq # sched: [1:1.00]
6725 ; SKX-LABEL: mask8_zext:
6727 ; SKX-NEXT: notb %dil # sched: [1:0.25]
6728 ; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25]
6729 ; SKX-NEXT: retq # sched: [7:1.00]
6730 %m0 = bitcast i8 %x to <8 x i1>
6731 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6732 %m2 = bitcast <8 x i1> %m1 to i8
6733 %ret = zext i8 %m2 to i32
6737 define void @mask16_mem(i16* %ptr) {
6738 ; GENERIC-LABEL: mask16_mem:
6740 ; GENERIC-NEXT: kmovw (%rdi), %k0
6741 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
6742 ; GENERIC-NEXT: kmovw %k0, (%rdi)
6743 ; GENERIC-NEXT: retq # sched: [1:1.00]
6745 ; SKX-LABEL: mask16_mem:
6747 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
6748 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
6749 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
6750 ; SKX-NEXT: retq # sched: [7:1.00]
6751 %x = load i16, i16* %ptr, align 4
6752 %m0 = bitcast i16 %x to <16 x i1>
6753 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6754 %ret = bitcast <16 x i1> %m1 to i16
6755 store i16 %ret, i16* %ptr, align 4
6759 define void @mask8_mem(i8* %ptr) {
6760 ; GENERIC-LABEL: mask8_mem:
6762 ; GENERIC-NEXT: kmovb (%rdi), %k0
6763 ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
6764 ; GENERIC-NEXT: kmovb %k0, (%rdi)
6765 ; GENERIC-NEXT: retq # sched: [1:1.00]
6767 ; SKX-LABEL: mask8_mem:
6769 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
6770 ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
6771 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
6772 ; SKX-NEXT: retq # sched: [7:1.00]
6773 %x = load i8, i8* %ptr, align 4
6774 %m0 = bitcast i8 %x to <8 x i1>
6775 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6776 %ret = bitcast <8 x i1> %m1 to i8
6777 store i8 %ret, i8* %ptr, align 4
6781 define i16 @mand16(i16 %x, i16 %y) {
6782 ; GENERIC-LABEL: mand16:
6784 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
6785 ; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33]
6786 ; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33]
6787 ; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33]
6788 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
6789 ; GENERIC-NEXT: retq # sched: [1:1.00]
6791 ; SKX-LABEL: mand16:
6793 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
6794 ; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25]
6795 ; SKX-NEXT: andl %esi, %edi # sched: [1:0.25]
6796 ; SKX-NEXT: orl %eax, %edi # sched: [1:0.25]
6797 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
6798 ; SKX-NEXT: retq # sched: [7:1.00]
6799 %ma = bitcast i16 %x to <16 x i1>
6800 %mb = bitcast i16 %y to <16 x i1>
6801 %mc = and <16 x i1> %ma, %mb
6802 %md = xor <16 x i1> %ma, %mb
6803 %me = or <16 x i1> %mc, %md
6804 %ret = bitcast <16 x i1> %me to i16
6808 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
6809 ; GENERIC-LABEL: mand16_mem:
6811 ; GENERIC-NEXT: kmovw (%rdi), %k0
6812 ; GENERIC-NEXT: kmovw (%rsi), %k1
6813 ; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00]
6814 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
6815 ; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00]
6816 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
6817 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
6818 ; GENERIC-NEXT: retq # sched: [1:1.00]
6820 ; SKX-LABEL: mand16_mem:
6822 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
6823 ; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00]
6824 ; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00]
6825 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
6826 ; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00]
6827 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
6828 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
6829 ; SKX-NEXT: retq # sched: [7:1.00]
6830 %ma = load <16 x i1>, <16 x i1>* %x
6831 %mb = load <16 x i1>, <16 x i1>* %y
6832 %mc = and <16 x i1> %ma, %mb
6833 %md = xor <16 x i1> %ma, %mb
6834 %me = or <16 x i1> %mc, %md
6835 %ret = bitcast <16 x i1> %me to i16
6839 define i8 @shuf_test1(i16 %v) nounwind {
6840 ; GENERIC-LABEL: shuf_test1:
6842 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
6843 ; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
6844 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
6845 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
6846 ; GENERIC-NEXT: retq # sched: [1:1.00]
6848 ; SKX-LABEL: shuf_test1:
6850 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
6851 ; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
6852 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
6853 ; SKX-NEXT: # kill: def $al killed $al killed $eax
6854 ; SKX-NEXT: retq # sched: [7:1.00]
6855 %v1 = bitcast i16 %v to <16 x i1>
6856 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6857 %mask1 = bitcast <8 x i1> %mask to i8
6861 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
6862 ; GENERIC-LABEL: zext_test1:
6864 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6865 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6866 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
6867 ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
6868 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6869 ; GENERIC-NEXT: retq # sched: [1:1.00]
6871 ; SKX-LABEL: zext_test1:
6873 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6874 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6875 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
6876 ; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
6877 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6878 ; SKX-NEXT: retq # sched: [7:1.00]
6879 %cmp_res = icmp ugt <16 x i32> %a, %b
6880 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6881 %res = zext i1 %cmp_res.i1 to i32
6885 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
6886 ; GENERIC-LABEL: zext_test2:
6888 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6889 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6890 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
6891 ; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
6892 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
6893 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6894 ; GENERIC-NEXT: retq # sched: [1:1.00]
6896 ; SKX-LABEL: zext_test2:
6898 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6899 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6900 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
6901 ; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
6902 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
6903 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6904 ; SKX-NEXT: retq # sched: [7:1.00]
6905 %cmp_res = icmp ugt <16 x i32> %a, %b
6906 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6907 %res = zext i1 %cmp_res.i1 to i16
6911 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
6912 ; GENERIC-LABEL: zext_test3:
6914 ; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6915 ; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
6916 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
6917 ; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
6918 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
6919 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6920 ; GENERIC-NEXT: retq # sched: [1:1.00]
6922 ; SKX-LABEL: zext_test3:
6924 ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
6925 ; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
6926 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
6927 ; SKX-NEXT: andb $1, %al # sched: [1:0.25]
6928 ; SKX-NEXT: # kill: def $al killed $al killed $eax
6929 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6930 ; SKX-NEXT: retq # sched: [7:1.00]
6931 %cmp_res = icmp ugt <16 x i32> %a, %b
6932 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
6933 %res = zext i1 %cmp_res.i1 to i8
6937 define i8 @conv1(<8 x i1>* %R) {
6938 ; GENERIC-LABEL: conv1:
6939 ; GENERIC: # %bb.0: # %entry
6940 ; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
6941 ; GENERIC-NEXT: kmovb %k0, (%rdi)
6942 ; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
6943 ; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
6944 ; GENERIC-NEXT: retq # sched: [1:1.00]
6947 ; SKX: # %bb.0: # %entry
6948 ; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
6949 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
6950 ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
6951 ; SKX-NEXT: movb $-2, %al # sched: [1:0.25]
6952 ; SKX-NEXT: retq # sched: [7:1.00]
6954 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
6956 %maskPtr = alloca <8 x i1>
6957 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
6958 %mask = load <8 x i1>, <8 x i1>* %maskPtr
6959 %mask_convert = bitcast <8 x i1> %mask to i8
6960 ret i8 %mask_convert
6963 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
6964 ; GENERIC-LABEL: test4:
6966 ; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
6967 ; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
6968 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
6969 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
6970 ; GENERIC-NEXT: retq # sched: [1:1.00]
6974 ; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
6975 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
6976 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
6977 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
6978 ; SKX-NEXT: retq # sched: [7:1.00]
6979 %x_gt_y = icmp sgt <4 x i64> %x, %y
6980 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
6981 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
6982 %resse = sext <4 x i1>%res to <4 x i32>
6983 ret <4 x i32> %resse
6986 define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
6987 ; GENERIC-LABEL: vcmp_test5:
6989 ; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
6990 ; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
6991 ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
6992 ; GENERIC-NEXT: retq # sched: [1:1.00]
6994 ; SKX-LABEL: vcmp_test5:
6996 ; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
6997 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
6998 ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
6999 ; SKX-NEXT: retq # sched: [7:1.00]
7000 %x_gt_y = icmp slt <2 x i64> %x, %y
7001 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
7002 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
7003 %resse = sext <2 x i1>%res to <2 x i64>
7004 ret <2 x i64> %resse
7005 }define void @vcmp_test6(<16 x i1> %mask) {
7007 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
7008 %b = bitcast <16 x i1> %a to i16
7009 %c = icmp eq i16 %b, 0
7010 br i1 %c, label %true, label %false
7018 define void @vcmp_test7(<8 x i1> %mask) {
7019 ; GENERIC-LABEL: vcmp_test7:
7020 ; GENERIC: # %bb.0: # %allocas
7021 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7022 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7023 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
7024 ; GENERIC-NEXT: orb $85, %al # sched: [1:0.33]
7025 ; GENERIC-NEXT: retq # sched: [1:1.00]
7027 ; SKX-LABEL: vcmp_test7:
7028 ; SKX: # %bb.0: # %allocas
7029 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7030 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7031 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
7032 ; SKX-NEXT: orb $85, %al # sched: [1:0.25]
7033 ; SKX-NEXT: retq # sched: [7:1.00]
7035 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
7036 %b = bitcast <8 x i1> %a to i8
7037 %c = icmp eq i8 %b, 0
7038 br i1 %c, label %true, label %false
7046 define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
7047 ; GENERIC-LABEL: vcmp_test8:
7049 ; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
7050 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
7051 ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00]
7052 ; GENERIC-NEXT: # %bb.2:
7053 ; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
7054 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7055 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7056 ; GENERIC-NEXT: retq # sched: [1:1.00]
7057 ; GENERIC-NEXT: .LBB386_1:
7058 ; GENERIC-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
7059 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7060 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7061 ; GENERIC-NEXT: retq # sched: [1:1.00]
7063 ; SKX-LABEL: vcmp_test8:
7065 ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
7066 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
7067 ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50]
7068 ; SKX-NEXT: # %bb.2:
7069 ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
7070 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7071 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7072 ; SKX-NEXT: retq # sched: [7:1.00]
7073 ; SKX-NEXT: .LBB386_1:
7074 ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
7075 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7076 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7077 ; SKX-NEXT: retq # sched: [7:1.00]
7078 %cond = icmp sgt i32 %a1, %b1
7079 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
7080 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
7081 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
7082 %res = sext <16 x i1> %mix to <16 x i8>
7085 define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
7086 ; GENERIC-LABEL: vpmov_test9:
7088 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
7089 ; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00]
7090 ; GENERIC-NEXT: # %bb.2:
7091 ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00]
7092 ; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00]
7093 ; GENERIC-NEXT: .LBB387_1:
7094 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7095 ; GENERIC-NEXT: .LBB387_3:
7096 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7097 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7098 ; GENERIC-NEXT: retq # sched: [1:1.00]
7100 ; SKX-LABEL: vpmov_test9:
7102 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
7103 ; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50]
7104 ; SKX-NEXT: # %bb.2:
7105 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50]
7106 ; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50]
7107 ; SKX-NEXT: .LBB387_1:
7108 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7109 ; SKX-NEXT: .LBB387_3:
7110 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7111 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7112 ; SKX-NEXT: retq # sched: [7:1.00]
7113 %mask = icmp sgt i32 %a1, %b1
7114 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
7116 }define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
7117 %mask = icmp sgt i32 %a1, %b1
7118 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
7122 define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
7123 ; GENERIC-LABEL: vmov_test11:
7125 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
7126 ; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00]
7127 ; GENERIC-NEXT: # %bb.2:
7128 ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
7129 ; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00]
7130 ; GENERIC-NEXT: .LBB389_1:
7131 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7132 ; GENERIC-NEXT: .LBB389_3:
7133 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
7134 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
7135 ; GENERIC-NEXT: retq # sched: [1:1.00]
7137 ; SKX-LABEL: vmov_test11:
7139 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
7140 ; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50]
7141 ; SKX-NEXT: # %bb.2:
7142 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
7143 ; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50]
7144 ; SKX-NEXT: .LBB389_1:
7145 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7146 ; SKX-NEXT: .LBB389_3:
7147 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
7148 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
7149 ; SKX-NEXT: retq # sched: [7:1.00]
7150 %mask = icmp sgt i32 %a1, %b1
7151 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
7155 define i32 @vmov_test12(i32 %x, i32 %y) {
7156 ; GENERIC-LABEL: vmov_test12:
7158 ; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
7159 ; GENERIC-NEXT: retq # sched: [1:1.00]
7161 ; SKX-LABEL: vmov_test12:
7163 ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
7164 ; SKX-NEXT: retq # sched: [7:1.00]
7165 %a = bitcast i16 21845 to <16 x i1>
7166 %b = extractelement <16 x i1> %a, i32 0
7167 %c = select i1 %b, i32 %x, i32 %y
7171 define i32 @vmov_test13(i32 %x, i32 %y) {
7172 ; GENERIC-LABEL: vmov_test13:
7174 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
7175 ; GENERIC-NEXT: retq # sched: [1:1.00]
7177 ; SKX-LABEL: vmov_test13:
7179 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
7180 ; SKX-NEXT: retq # sched: [7:1.00]
7181 %a = bitcast i16 21845 to <16 x i1>
7182 %b = extractelement <16 x i1> %a, i32 3
7183 %c = select i1 %b, i32 %x, i32 %y
7185 }define <4 x i1> @vmov_test14() {
7186 %a = bitcast i16 21845 to <16 x i1>
7187 %b = extractelement <16 x i1> %a, i32 2
7188 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
7192 define <16 x i1> @vmov_test15(i32 %x, i32 %y) {
7193 ; GENERIC-LABEL: vmov_test15:
7195 ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
7196 ; GENERIC-NEXT: movw $21845, %ax # imm = 0x5555
7197 ; GENERIC-NEXT: # sched: [1:0.33]
7198 ; GENERIC-NEXT: movw $1, %cx # sched: [1:0.33]
7199 ; GENERIC-NEXT: cmovgw %ax, %cx # sched: [2:0.67]
7200 ; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33]
7201 ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
7202 ; GENERIC-NEXT: retq # sched: [1:1.00]
7204 ; SKX-LABEL: vmov_test15:
7206 ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
7207 ; SKX-NEXT: movw $21845, %ax # imm = 0x5555
7208 ; SKX-NEXT: # sched: [1:0.25]
7209 ; SKX-NEXT: movw $1, %cx # sched: [1:0.25]
7210 ; SKX-NEXT: cmovgw %ax, %cx # sched: [1:0.50]
7211 ; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00]
7212 ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
7213 ; SKX-NEXT: retq # sched: [7:1.00]
7214 %a = bitcast i16 21845 to <16 x i1>
7215 %b = bitcast i16 1 to <16 x i1>
7216 %mask = icmp sgt i32 %x, %y
7217 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
7221 define <64 x i8> @vmov_test16(i64 %x) {
7223 ; GENERIC-LABEL: vmov_test16:
7225 ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
7226 ; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
7227 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
7228 ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
7229 ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
7230 ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
7231 ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
7232 ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
7233 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7234 ; GENERIC-NEXT: retq # sched: [1:1.00]
7236 ; SKX-LABEL: vmov_test16:
7238 ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
7239 ; SKX-NEXT: movb $1, %al # sched: [1:0.25]
7240 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
7241 ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
7242 ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
7243 ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
7244 ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
7245 ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
7246 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7247 ; SKX-NEXT: retq # sched: [7:1.00]
7248 %a = bitcast i64 %x to <64 x i1>
7249 %b = insertelement <64 x i1>%a, i1 true, i32 5
7250 %c = sext <64 x i1>%b to <64 x i8>
7254 define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
7256 ; GENERIC-LABEL: vmov_test17:
7258 ; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
7259 ; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
7260 ; GENERIC-NEXT: setg %al # sched: [1:0.50]
7261 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
7262 ; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
7263 ; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
7264 ; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
7265 ; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
7266 ; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
7267 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7268 ; GENERIC-NEXT: retq # sched: [1:1.00]
7270 ; SKX-LABEL: vmov_test17:
7272 ; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
7273 ; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25]
7274 ; SKX-NEXT: setg %al # sched: [1:0.50]
7275 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
7276 ; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
7277 ; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
7278 ; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
7279 ; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
7280 ; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
7281 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7282 ; SKX-NEXT: retq # sched: [7:1.00]
7283 %a = bitcast i64 %x to <64 x i1>
7284 %b = icmp sgt i32 %y, %z
7285 %c = insertelement <64 x i1>%a, i1 %b, i32 5
7286 %d = sext <64 x i1>%c to <64 x i8>
7290 define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
7291 ; GENERIC-LABEL: vmov_test18:
7293 ; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
7294 ; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
7295 ; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00]
7296 ; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00]
7297 ; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00]
7298 ; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00]
7299 ; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00]
7300 ; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00]
7301 ; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00]
7302 ; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
7303 ; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
7304 ; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
7305 ; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
7306 ; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
7307 ; GENERIC-NEXT: retq # sched: [1:1.00]
7309 ; SKX-LABEL: vmov_test18:
7311 ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
7312 ; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00]
7313 ; SKX-NEXT: kshiftrw $8, %k2, %k0 # sched: [3:1.00]
7314 ; SKX-NEXT: kshiftrw $9, %k2, %k2 # sched: [3:1.00]
7315 ; SKX-NEXT: kshiftrb $6, %k1, %k3 # sched: [3:1.00]
7316 ; SKX-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00]
7317 ; SKX-NEXT: kshiftlb $7, %k2, %k2 # sched: [3:1.00]
7318 ; SKX-NEXT: kshiftrb $1, %k2, %k2 # sched: [3:1.00]
7319 ; SKX-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00]
7320 ; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00]
7321 ; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
7322 ; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00]
7323 ; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
7324 ; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
7325 ; SKX-NEXT: retq # sched: [7:1.00]
7326 %b = bitcast i8 %a to <8 x i1>
7327 %b1 = bitcast i16 %y to <16 x i1>
7328 %el1 = extractelement <16 x i1>%b1, i32 8
7329 %el2 = extractelement <16 x i1>%b1, i32 9
7330 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
7331 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
7334 define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
7335 ; GENERIC-LABEL: vmov_test21:
7337 ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
7338 ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
7339 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7340 ; GENERIC-NEXT: retq # sched: [1:1.00]
7342 ; SKX-LABEL: vmov_test21:
7344 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
7345 ; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
7346 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7347 ; SKX-NEXT: retq # sched: [7:1.00]
7348 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
7352 define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) {
7353 ; GENERIC-LABEL: vmov_test22:
7355 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7356 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
7357 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7358 ; GENERIC-NEXT: retq # sched: [1:1.00]
7360 ; SKX-LABEL: vmov_test22:
7362 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7363 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
7364 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7365 ; SKX-NEXT: retq # sched: [7:1.00]
7366 store <4 x i1> %a, <4 x i1>* %addr
7370 define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) {
7371 ; GENERIC-LABEL: vmov_test23:
7373 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
7374 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
7375 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7376 ; GENERIC-NEXT: retq # sched: [1:1.00]
7378 ; SKX-LABEL: vmov_test23:
7380 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
7381 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
7382 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7383 ; SKX-NEXT: retq # sched: [7:1.00]
7384 store <2 x i1> %a, <2 x i1>* %addr
7388 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
7389 ; GENERIC-LABEL: store_v1i1:
7391 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
7392 ; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00]
7393 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
7394 ; GENERIC-NEXT: kmovb %k0, (%rsi)
7395 ; GENERIC-NEXT: retq # sched: [1:1.00]
7397 ; SKX-LABEL: store_v1i1:
7399 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
7400 ; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00]
7401 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
7402 ; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00]
7403 ; SKX-NEXT: retq # sched: [7:1.00]
7404 %x = xor <1 x i1> %c, <i1 1>
7405 store <1 x i1> %x, <1 x i1>* %ptr, align 4
7409 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
7410 ; GENERIC-LABEL: store_v2i1:
7412 ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
7413 ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
7414 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7415 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7416 ; GENERIC-NEXT: retq # sched: [1:1.00]
7418 ; SKX-LABEL: store_v2i1:
7420 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
7421 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
7422 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7423 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7424 ; SKX-NEXT: retq # sched: [7:1.00]
7425 %x = xor <2 x i1> %c, <i1 1, i1 1>
7426 store <2 x i1> %x, <2 x i1>* %ptr, align 4
7430 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
7431 ; GENERIC-LABEL: store_v4i1:
7433 ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
7434 ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
7435 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7436 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7437 ; GENERIC-NEXT: retq # sched: [1:1.00]
7439 ; SKX-LABEL: store_v4i1:
7441 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
7442 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
7443 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7444 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7445 ; SKX-NEXT: retq # sched: [7:1.00]
7446 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
7447 store <4 x i1> %x, <4 x i1>* %ptr, align 4
7451 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
7452 ; GENERIC-LABEL: store_v8i1:
7454 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7455 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7456 ; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
7457 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7458 ; GENERIC-NEXT: retq # sched: [1:1.00]
7460 ; SKX-LABEL: store_v8i1:
7462 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7463 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7464 ; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
7465 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7466 ; SKX-NEXT: retq # sched: [7:1.00]
7467 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
7468 store <8 x i1> %x, <8 x i1>* %ptr, align 4
7472 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
7473 ; GENERIC-LABEL: store_v16i1:
7475 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7476 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7477 ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7478 ; GENERIC-NEXT: kmovw %k0, (%rdi)
7479 ; GENERIC-NEXT: retq # sched: [1:1.00]
7481 ; SKX-LABEL: store_v16i1:
7483 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7484 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7485 ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
7486 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
7487 ; SKX-NEXT: retq # sched: [7:1.00]
7488 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
7489 store <16 x i1> %x, <16 x i1>* %ptr, align 4
7504 @f1.v = internal unnamed_addr global i1 false, align 4
7506 define void @f1(i32 %c) {
7507 ; GENERIC-LABEL: f1:
7508 ; GENERIC: # %bb.0: # %entry
7509 ; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
7510 ; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33]
7511 ; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [5:1.00]
7512 ; GENERIC-NEXT: jmp f2 # TAILCALL
7515 ; SKX: # %bb.0: # %entry
7516 ; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
7517 ; SKX-NEXT: xorl $1, %edi # sched: [1:0.25]
7518 ; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00]
7519 ; SKX-NEXT: jmp f2 # TAILCALL
7521 %.b1 = load i1, i1* @f1.v, align 4
7522 %not..b1 = xor i1 %.b1, true
7523 store i1 %not..b1, i1* @f1.v, align 4
7524 %0 = zext i1 %not..b1 to i32
7525 tail call void @f2(i32 %0) #2
7529 declare void @f2(i32) #1
7531 define void @store_i16_i1(i16 %x, i1 *%y) {
7532 ; GENERIC-LABEL: store_i16_i1:
7534 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
7535 ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
7536 ; GENERIC-NEXT: retq # sched: [1:1.00]
7538 ; SKX-LABEL: store_i16_i1:
7540 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
7541 ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
7542 ; SKX-NEXT: retq # sched: [7:1.00]
7543 %c = trunc i16 %x to i1
7548 define void @store_i8_i1(i8 %x, i1 *%y) {
7549 ; GENERIC-LABEL: store_i8_i1:
7551 ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
7552 ; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00]
7553 ; GENERIC-NEXT: retq # sched: [1:1.00]
7555 ; SKX-LABEL: store_i8_i1:
7557 ; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
7558 ; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
7559 ; SKX-NEXT: retq # sched: [7:1.00]
7560 %c = trunc i8 %x to i1
7565 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
7566 ; GENERIC-LABEL: test_build_vec_v32i1:
7568 ; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495
7569 ; GENERIC-NEXT: # sched: [1:0.33]
7570 ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
7571 ; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7572 ; GENERIC-NEXT: retq # sched: [1:1.00]
7574 ; SKX-LABEL: test_build_vec_v32i1:
7576 ; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495
7577 ; SKX-NEXT: # sched: [1:0.25]
7578 ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
7579 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
7580 ; SKX-NEXT: retq # sched: [7:1.00]
7581 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
7585 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
7586 ; GENERIC-LABEL: test_build_vec_v64i1:
7588 ; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [5:1.00]
7589 ; GENERIC-NEXT: retq # sched: [1:1.00]
7591 ; SKX-LABEL: test_build_vec_v64i1:
7593 ; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00]
7594 ; SKX-NEXT: retq # sched: [7:1.00]
7595 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
7599 define void @ktest_1(<8 x double> %in, double * %base) {
7600 ; GENERIC-LABEL: ktest_1:
7602 ; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [4:0.50]
7603 ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
7604 ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
7605 ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
7606 ; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00]
7607 ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
7608 ; GENERIC-NEXT: # %bb.1: # %L1
7609 ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
7610 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7611 ; GENERIC-NEXT: retq # sched: [1:1.00]
7612 ; GENERIC-NEXT: .LBB410_2: # %L2
7613 ; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
7614 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7615 ; GENERIC-NEXT: retq # sched: [1:1.00]
7617 ; SKX-LABEL: ktest_1:
7619 ; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50]
7620 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
7621 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
7622 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
7623 ; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
7624 ; SKX-NEXT: je .LBB410_2 # sched: [1:0.50]
7625 ; SKX-NEXT: # %bb.1: # %L1
7626 ; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
7627 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7628 ; SKX-NEXT: retq # sched: [7:1.00]
7629 ; SKX-NEXT: .LBB410_2: # %L2
7630 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
7631 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7632 ; SKX-NEXT: retq # sched: [7:1.00]
7633 %addr1 = getelementptr double, double * %base, i64 0
7634 %addr2 = getelementptr double, double * %base, i64 1
7636 %vaddr1 = bitcast double* %addr1 to <8 x double>*
7637 %vaddr2 = bitcast double* %addr2 to <8 x double>*
7639 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
7640 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
7642 %sel1 = fcmp ogt <8 x double>%in, %val1
7643 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
7644 %sel2 = fcmp olt <8 x double> %in, %val3
7645 %sel3 = and <8 x i1> %sel1, %sel2
7647 %int_sel3 = bitcast <8 x i1> %sel3 to i8
7648 %res = icmp eq i8 %int_sel3, zeroinitializer
7649 br i1 %res, label %L2, label %L1
7651 store <8 x double> %in, <8 x double>* %vaddr1
7654 store <8 x double> %in, <8 x double>* %vaddr2
7660 define void @ktest_2(<32 x float> %in, float * %base) {
7662 ; GENERIC-LABEL: ktest_2:
7664 ; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [4:0.50]
7665 ; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [4:0.50]
7666 ; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
7667 ; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
7668 ; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
7669 ; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50]
7670 ; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50]
7671 ; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
7672 ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
7673 ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
7674 ; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:1.00]
7675 ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
7676 ; GENERIC-NEXT: # %bb.1: # %L1
7677 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
7678 ; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
7679 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7680 ; GENERIC-NEXT: retq # sched: [1:1.00]
7681 ; GENERIC-NEXT: .LBB411_2: # %L2
7682 ; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
7683 ; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
7684 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7685 ; GENERIC-NEXT: retq # sched: [1:1.00]
7687 ; SKX-LABEL: ktest_2:
7689 ; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50]
7690 ; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
7691 ; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
7692 ; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
7693 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
7694 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
7695 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
7696 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
7697 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
7698 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
7699 ; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00]
7700 ; SKX-NEXT: je .LBB411_2 # sched: [1:0.50]
7701 ; SKX-NEXT: # %bb.1: # %L1
7702 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
7703 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
7704 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7705 ; SKX-NEXT: retq # sched: [7:1.00]
7706 ; SKX-NEXT: .LBB411_2: # %L2
7707 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
7708 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
7709 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7710 ; SKX-NEXT: retq # sched: [7:1.00]
7711 %addr1 = getelementptr float, float * %base, i64 0
7712 %addr2 = getelementptr float, float * %base, i64 1
7714 %vaddr1 = bitcast float* %addr1 to <32 x float>*
7715 %vaddr2 = bitcast float* %addr2 to <32 x float>*
7717 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
7718 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
7720 %sel1 = fcmp ogt <32 x float>%in, %val1
7721 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
7722 %sel2 = fcmp olt <32 x float> %in, %val3
7723 %sel3 = or <32 x i1> %sel1, %sel2
7725 %int_sel3 = bitcast <32 x i1> %sel3 to i32
7726 %res = icmp eq i32 %int_sel3, zeroinitializer
7727 br i1 %res, label %L2, label %L1
7729 store <32 x float> %in, <32 x float>* %vaddr1
7732 store <32 x float> %in, <32 x float>* %vaddr2
7738 define <8 x i64> @load_8i1(<8 x i1>* %a) {
7739 ; GENERIC-LABEL: load_8i1:
7741 ; GENERIC-NEXT: kmovb (%rdi), %k0
7742 ; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
7743 ; GENERIC-NEXT: retq # sched: [1:1.00]
7745 ; SKX-LABEL: load_8i1:
7747 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
7748 ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
7749 ; SKX-NEXT: retq # sched: [7:1.00]
7750 %b = load <8 x i1>, <8 x i1>* %a
7751 %c = sext <8 x i1> %b to <8 x i64>
7755 define <16 x i32> @load_16i1(<16 x i1>* %a) {
7756 ; GENERIC-LABEL: load_16i1:
7758 ; GENERIC-NEXT: kmovw (%rdi), %k0
7759 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
7760 ; GENERIC-NEXT: retq # sched: [1:1.00]
7762 ; SKX-LABEL: load_16i1:
7764 ; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
7765 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
7766 ; SKX-NEXT: retq # sched: [7:1.00]
7767 %b = load <16 x i1>, <16 x i1>* %a
7768 %c = sext <16 x i1> %b to <16 x i32>
7772 define <2 x i16> @load_2i1(<2 x i1>* %a) {
7773 ; GENERIC-LABEL: load_2i1:
7775 ; GENERIC-NEXT: kmovb (%rdi), %k0
7776 ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
7777 ; GENERIC-NEXT: retq # sched: [1:1.00]
7779 ; SKX-LABEL: load_2i1:
7781 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
7782 ; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
7783 ; SKX-NEXT: retq # sched: [7:1.00]
7784 %b = load <2 x i1>, <2 x i1>* %a
7785 %c = sext <2 x i1> %b to <2 x i16>
7789 define <4 x i16> @load_4i1(<4 x i1>* %a) {
7790 ; GENERIC-LABEL: load_4i1:
7792 ; GENERIC-NEXT: kmovb (%rdi), %k0
7793 ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
7794 ; GENERIC-NEXT: retq # sched: [1:1.00]
7796 ; SKX-LABEL: load_4i1:
7798 ; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
7799 ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
7800 ; SKX-NEXT: retq # sched: [7:1.00]
7801 %b = load <4 x i1>, <4 x i1>* %a
7802 %c = sext <4 x i1> %b to <4 x i16>
7806 define <32 x i16> @load_32i1(<32 x i1>* %a) {
7807 ; GENERIC-LABEL: load_32i1:
7809 ; GENERIC-NEXT: kmovd (%rdi), %k0
7810 ; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
7811 ; GENERIC-NEXT: retq # sched: [1:1.00]
7813 ; SKX-LABEL: load_32i1:
7815 ; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00]
7816 ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
7817 ; SKX-NEXT: retq # sched: [7:1.00]
7818 %b = load <32 x i1>, <32 x i1>* %a
7819 %c = sext <32 x i1> %b to <32 x i16>
7823 define <64 x i8> @load_64i1(<64 x i1>* %a) {
7824 ; GENERIC-LABEL: load_64i1:
7826 ; GENERIC-NEXT: kmovq (%rdi), %k0
7827 ; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
7828 ; GENERIC-NEXT: retq # sched: [1:1.00]
7830 ; SKX-LABEL: load_64i1:
7832 ; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00]
7833 ; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
7834 ; SKX-NEXT: retq # sched: [7:1.00]
7835 %b = load <64 x i1>, <64 x i1>* %a
7836 %c = sext <64 x i1> %b to <64 x i8>
7840 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
7841 ; GENERIC-LABEL: store_8i1:
7843 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7844 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7845 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7846 ; GENERIC-NEXT: retq # sched: [1:1.00]
7848 ; SKX-LABEL: store_8i1:
7850 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7851 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7852 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7853 ; SKX-NEXT: retq # sched: [7:1.00]
7854 store <8 x i1> %v, <8 x i1>* %a
7858 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
7859 ; GENERIC-LABEL: store_8i1_1:
7861 ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
7862 ; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
7863 ; GENERIC-NEXT: kmovb %k0, (%rdi)
7864 ; GENERIC-NEXT: retq # sched: [1:1.00]
7866 ; SKX-LABEL: store_8i1_1:
7868 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
7869 ; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
7870 ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
7871 ; SKX-NEXT: retq # sched: [7:1.00]
7872 %v1 = trunc <8 x i16> %v to <8 x i1>
7873 store <8 x i1> %v1, <8 x i1>* %a
7877 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
7878 ; GENERIC-LABEL: store_16i1:
7880 ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
7881 ; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
7882 ; GENERIC-NEXT: kmovw %k0, (%rdi)
7883 ; GENERIC-NEXT: retq # sched: [1:1.00]
7885 ; SKX-LABEL: store_16i1:
7887 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
7888 ; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
7889 ; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
7890 ; SKX-NEXT: retq # sched: [7:1.00]
7891 store <16 x i1> %v, <16 x i1>* %a
7895 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
7896 ; GENERIC-LABEL: store_32i1:
7898 ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
7899 ; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33]
7900 ; GENERIC-NEXT: kmovd %k0, (%rdi)
7901 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7902 ; GENERIC-NEXT: retq # sched: [1:1.00]
7904 ; SKX-LABEL: store_32i1:
7906 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
7907 ; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00]
7908 ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
7909 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7910 ; SKX-NEXT: retq # sched: [7:1.00]
7911 store <32 x i1> %v, <32 x i1>* %a
7915 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
7916 ; GENERIC-LABEL: store_32i1_1:
7918 ; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [3:1.00]
7919 ; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33]
7920 ; GENERIC-NEXT: kmovd %k0, (%rdi)
7921 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7922 ; GENERIC-NEXT: retq # sched: [1:1.00]
7924 ; SKX-LABEL: store_32i1_1:
7926 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:0.50]
7927 ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00]
7928 ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
7929 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7930 ; SKX-NEXT: retq # sched: [7:1.00]
7931 %v1 = trunc <32 x i16> %v to <32 x i1>
7932 store <32 x i1> %v1, <32 x i1>* %a
7937 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
7939 ; GENERIC-LABEL: store_64i1:
7941 ; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [3:1.00]
7942 ; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
7943 ; GENERIC-NEXT: kmovq %k0, (%rdi)
7944 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7945 ; GENERIC-NEXT: retq # sched: [1:1.00]
7947 ; SKX-LABEL: store_64i1:
7949 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:0.50]
7950 ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
7951 ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00]
7952 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7953 ; SKX-NEXT: retq # sched: [7:1.00]
7954 store <64 x i1> %v, <64 x i1>* %a
7958 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
7959 ; GENERIC-LABEL: test_bitcast_v8i1_zext:
7961 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
7962 ; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
7963 ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
7964 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7965 ; GENERIC-NEXT: retq # sched: [1:1.00]
7967 ; SKX-LABEL: test_bitcast_v8i1_zext:
7969 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
7970 ; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00]
7971 ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25]
7972 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7973 ; SKX-NEXT: retq # sched: [7:1.00]
7974 %v1 = icmp eq <16 x i32> %a, zeroinitializer
7975 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7976 %mask1 = bitcast <8 x i1> %mask to i8
7977 %val = zext i8 %mask1 to i32
7978 %val1 = add i32 %val, %val
7982 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
7983 ; GENERIC-LABEL: test_bitcast_v16i1_zext:
7985 ; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
7986 ; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
7987 ; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
7988 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
7989 ; GENERIC-NEXT: retq # sched: [1:1.00]
7991 ; SKX-LABEL: test_bitcast_v16i1_zext:
7993 ; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
7994 ; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00]
7995 ; SKX-NEXT: addl %eax, %eax # sched: [1:0.25]
7996 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
7997 ; SKX-NEXT: retq # sched: [7:1.00]
7998 %v1 = icmp eq <16 x i32> %a, zeroinitializer
7999 %mask1 = bitcast <16 x i1> %v1 to i16
8000 %val = zext i16 %mask1 to i32
8001 %val1 = add i32 %val, %val
8005 define i16 @test_v16i1_add(i16 %x, i16 %y) {
8006 ; GENERIC-LABEL: test_v16i1_add:
8008 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8009 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8010 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
8011 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8012 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
8013 ; GENERIC-NEXT: retq # sched: [1:1.00]
8015 ; SKX-LABEL: test_v16i1_add:
8017 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8018 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8019 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
8020 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8021 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
8022 ; SKX-NEXT: retq # sched: [7:1.00]
8023 %m0 = bitcast i16 %x to <16 x i1>
8024 %m1 = bitcast i16 %y to <16 x i1>
8025 %m2 = add <16 x i1> %m0, %m1
8026 %ret = bitcast <16 x i1> %m2 to i16
8030 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
8031 ; GENERIC-LABEL: test_v16i1_sub:
8033 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8034 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8035 ; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
8036 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8037 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
8038 ; GENERIC-NEXT: retq # sched: [1:1.00]
8040 ; SKX-LABEL: test_v16i1_sub:
8042 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8043 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8044 ; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
8045 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8046 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
8047 ; SKX-NEXT: retq # sched: [7:1.00]
8048 %m0 = bitcast i16 %x to <16 x i1>
8049 %m1 = bitcast i16 %y to <16 x i1>
8050 %m2 = sub <16 x i1> %m0, %m1
8051 %ret = bitcast <16 x i1> %m2 to i16
8055 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
8056 ; GENERIC-LABEL: test_v16i1_mul:
8058 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8059 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8060 ; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00]
8061 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8062 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
8063 ; GENERIC-NEXT: retq # sched: [1:1.00]
8065 ; SKX-LABEL: test_v16i1_mul:
8067 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8068 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8069 ; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00]
8070 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8071 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax
8072 ; SKX-NEXT: retq # sched: [7:1.00]
8073 %m0 = bitcast i16 %x to <16 x i1>
8074 %m1 = bitcast i16 %y to <16 x i1>
8075 %m2 = mul <16 x i1> %m0, %m1
8076 %ret = bitcast <16 x i1> %m2 to i16
8080 define i8 @test_v8i1_add(i8 %x, i8 %y) {
8081 ; GENERIC-LABEL: test_v8i1_add:
8083 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8084 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8085 ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
8086 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8087 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
8088 ; GENERIC-NEXT: retq # sched: [1:1.00]
8090 ; SKX-LABEL: test_v8i1_add:
8092 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8093 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8094 ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
8095 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8096 ; SKX-NEXT: # kill: def $al killed $al killed $eax
8097 ; SKX-NEXT: retq # sched: [7:1.00]
8098 %m0 = bitcast i8 %x to <8 x i1>
8099 %m1 = bitcast i8 %y to <8 x i1>
8100 %m2 = add <8 x i1> %m0, %m1
8101 %ret = bitcast <8 x i1> %m2 to i8
8105 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
8106 ; GENERIC-LABEL: test_v8i1_sub:
8108 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8109 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8110 ; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
8111 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8112 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
8113 ; GENERIC-NEXT: retq # sched: [1:1.00]
8115 ; SKX-LABEL: test_v8i1_sub:
8117 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8118 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8119 ; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
8120 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8121 ; SKX-NEXT: # kill: def $al killed $al killed $eax
8122 ; SKX-NEXT: retq # sched: [7:1.00]
8123 %m0 = bitcast i8 %x to <8 x i1>
8124 %m1 = bitcast i8 %y to <8 x i1>
8125 %m2 = sub <8 x i1> %m0, %m1
8126 %ret = bitcast <8 x i1> %m2 to i8
8130 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
8131 ; GENERIC-LABEL: test_v8i1_mul:
8133 ; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
8134 ; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
8135 ; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00]
8136 ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
8137 ; GENERIC-NEXT: # kill: def $al killed $al killed $eax
8138 ; GENERIC-NEXT: retq # sched: [1:1.00]
8140 ; SKX-LABEL: test_v8i1_mul:
8142 ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
8143 ; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
8144 ; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00]
8145 ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
8146 ; SKX-NEXT: # kill: def $al killed $al killed $eax
8147 ; SKX-NEXT: retq # sched: [7:1.00]
8148 %m0 = bitcast i8 %x to <8 x i1>
8149 %m1 = bitcast i8 %y to <8 x i1>
8150 %m2 = mul <8 x i1> %m0, %m1
8151 %ret = bitcast <8 x i1> %m2 to i8
8155 define <16 x i32> @_inreg16xi32(i32 %a) {
8156 ; GENERIC-LABEL: _inreg16xi32:
8158 ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
8159 ; GENERIC-NEXT: retq # sched: [1:1.00]
8161 ; SKX-LABEL: _inreg16xi32:
8163 ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
8164 ; SKX-NEXT: retq # sched: [7:1.00]
8165 %b = insertelement <16 x i32> undef, i32 %a, i32 0
8166 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
8170 define <8 x i64> @_inreg8xi64(i64 %a) {
8171 ; GENERIC-LABEL: _inreg8xi64:
8173 ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
8174 ; GENERIC-NEXT: retq # sched: [1:1.00]
8176 ; SKX-LABEL: _inreg8xi64:
8178 ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
8179 ; SKX-NEXT: retq # sched: [7:1.00]
8180 %b = insertelement <8 x i64> undef, i64 %a, i32 0
8181 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
8185 define <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
8186 ; GENERIC-LABEL: _ss16xfloat_v4:
8188 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8189 ; GENERIC-NEXT: retq # sched: [1:1.00]
8191 ; SKX-LABEL: _ss16xfloat_v4:
8193 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8194 ; SKX-NEXT: retq # sched: [7:1.00]
8195 %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
8199 define <16 x float> @_inreg16xfloat(float %a) {
8200 ; GENERIC-LABEL: _inreg16xfloat:
8202 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8203 ; GENERIC-NEXT: retq # sched: [1:1.00]
8205 ; SKX-LABEL: _inreg16xfloat:
8207 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8208 ; SKX-NEXT: retq # sched: [7:1.00]
8209 %b = insertelement <16 x float> undef, float %a, i32 0
8210 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8214 define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
8215 ; GENERIC-LABEL: _ss16xfloat_mask:
8217 ; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
8218 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
8219 ; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
8220 ; GENERIC-NEXT: retq # sched: [1:1.00]
8222 ; SKX-LABEL: _ss16xfloat_mask:
8224 ; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
8225 ; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00]
8226 ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
8227 ; SKX-NEXT: retq # sched: [7:1.00]
8228 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8229 %b = insertelement <16 x float> undef, float %a, i32 0
8230 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8231 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
8235 define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
8236 ; GENERIC-LABEL: _ss16xfloat_maskz:
8238 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
8239 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
8240 ; GENERIC-NEXT: retq # sched: [1:1.00]
8242 ; SKX-LABEL: _ss16xfloat_maskz:
8244 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
8245 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
8246 ; SKX-NEXT: retq # sched: [7:1.00]
8247 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8248 %b = insertelement <16 x float> undef, float %a, i32 0
8249 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8250 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
8254 define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
8255 ; GENERIC-LABEL: _ss16xfloat_load:
8257 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [5:1.00]
8258 ; GENERIC-NEXT: retq # sched: [1:1.00]
8260 ; SKX-LABEL: _ss16xfloat_load:
8262 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50]
8263 ; SKX-NEXT: retq # sched: [7:1.00]
8264 %a = load float, float* %a.ptr
8265 %b = insertelement <16 x float> undef, float %a, i32 0
8266 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8270 define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
8271 ; GENERIC-LABEL: _ss16xfloat_mask_load:
8273 ; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
8274 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [5:1.00]
8275 ; GENERIC-NEXT: retq # sched: [1:1.00]
8277 ; SKX-LABEL: _ss16xfloat_mask_load:
8279 ; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
8280 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50]
8281 ; SKX-NEXT: retq # sched: [7:1.00]
8282 %a = load float, float* %a.ptr
8283 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8284 %b = insertelement <16 x float> undef, float %a, i32 0
8285 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8286 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
8290 define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
8291 ; GENERIC-LABEL: _ss16xfloat_maskz_load:
8293 ; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
8294 ; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
8295 ; GENERIC-NEXT: retq # sched: [1:1.00]
8297 ; SKX-LABEL: _ss16xfloat_maskz_load:
8299 ; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
8300 ; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
8301 ; SKX-NEXT: retq # sched: [7:1.00]
8302 %a = load float, float* %a.ptr
8303 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
8304 %b = insertelement <16 x float> undef, float %a, i32 0
8305 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8306 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
8310 define <8 x double> @_inreg8xdouble(double %a) {
8311 ; GENERIC-LABEL: _inreg8xdouble:
8313 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8314 ; GENERIC-NEXT: retq # sched: [1:1.00]
8316 ; SKX-LABEL: _inreg8xdouble:
8318 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8319 ; SKX-NEXT: retq # sched: [7:1.00]
8320 %b = insertelement <8 x double> undef, double %a, i32 0
8321 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8325 define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
8326 ; GENERIC-LABEL: _sd8xdouble_mask:
8328 ; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
8329 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
8330 ; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
8331 ; GENERIC-NEXT: retq # sched: [1:1.00]
8333 ; SKX-LABEL: _sd8xdouble_mask:
8335 ; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
8336 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00]
8337 ; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
8338 ; SKX-NEXT: retq # sched: [7:1.00]
8339 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8340 %b = insertelement <8 x double> undef, double %a, i32 0
8341 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8342 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
8346 define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
8347 ; GENERIC-LABEL: _sd8xdouble_maskz:
8349 ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
8350 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
8351 ; GENERIC-NEXT: retq # sched: [1:1.00]
8353 ; SKX-LABEL: _sd8xdouble_maskz:
8355 ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
8356 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
8357 ; SKX-NEXT: retq # sched: [7:1.00]
8358 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8359 %b = insertelement <8 x double> undef, double %a, i32 0
8360 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8361 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
8365 define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
8366 ; GENERIC-LABEL: _sd8xdouble_load:
8368 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [5:1.00]
8369 ; GENERIC-NEXT: retq # sched: [1:1.00]
8371 ; SKX-LABEL: _sd8xdouble_load:
8373 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50]
8374 ; SKX-NEXT: retq # sched: [7:1.00]
8375 %a = load double, double* %a.ptr
8376 %b = insertelement <8 x double> undef, double %a, i32 0
8377 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8381 define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
8382 ; GENERIC-LABEL: _sd8xdouble_mask_load:
8384 ; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
8385 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [5:1.00]
8386 ; GENERIC-NEXT: retq # sched: [1:1.00]
8388 ; SKX-LABEL: _sd8xdouble_mask_load:
8390 ; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
8391 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
8392 ; SKX-NEXT: retq # sched: [7:1.00]
8393 %a = load double, double* %a.ptr
8394 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8395 %b = insertelement <8 x double> undef, double %a, i32 0
8396 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8397 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
8401 define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
8402 ; GENERIC-LABEL: _sd8xdouble_maskz_load:
8404 ; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
8405 ; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
8406 ; GENERIC-NEXT: retq # sched: [1:1.00]
8408 ; SKX-LABEL: _sd8xdouble_maskz_load:
8410 ; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
8411 ; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
8412 ; SKX-NEXT: retq # sched: [7:1.00]
8413 %a = load double, double* %a.ptr
8414 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
8415 %b = insertelement <8 x double> undef, double %a, i32 0
8416 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
8417 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
8421 define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
8422 ; GENERIC-LABEL: _xmm16xi32:
8424 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8425 ; GENERIC-NEXT: retq # sched: [1:1.00]
8427 ; SKX-LABEL: _xmm16xi32:
8429 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8430 ; SKX-NEXT: retq # sched: [7:1.00]
8431 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
8435 define <16 x float> @_xmm16xfloat(<16 x float> %a) {
8436 ; GENERIC-LABEL: _xmm16xfloat:
8438 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8439 ; GENERIC-NEXT: retq # sched: [1:1.00]
8441 ; SKX-LABEL: _xmm16xfloat:
8443 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8444 ; SKX-NEXT: retq # sched: [7:1.00]
8445 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
8449 define <16 x i32> @test_vbroadcast() {
8450 ; GENERIC-LABEL: test_vbroadcast:
8451 ; GENERIC: # %bb.0: # %entry
8452 ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
8453 ; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
8454 ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
8455 ; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:1.00]
8456 ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
8457 ; GENERIC-NEXT: retq # sched: [1:1.00]
8459 ; SKX-LABEL: test_vbroadcast:
8460 ; SKX: # %bb.0: # %entry
8461 ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
8462 ; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
8463 ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
8464 ; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00]
8465 ; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
8466 ; SKX-NEXT: retq # sched: [7:1.00]
8468 %0 = sext <16 x i1> zeroinitializer to <16 x i32>
8469 %1 = fcmp uno <16 x float> undef, zeroinitializer
8470 %2 = sext <16 x i1> %1 to <16 x i32>
8471 %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
8475 ; We implement the set1 intrinsics with vector initializers. Verify that the
8476 ; IR generated will produce broadcasts at the end.
8477 define <8 x double> @test_set1_pd(double %d) #2 {
8478 ; GENERIC-LABEL: test_set1_pd:
8479 ; GENERIC: # %bb.0: # %entry
8480 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8481 ; GENERIC-NEXT: retq # sched: [1:1.00]
8483 ; SKX-LABEL: test_set1_pd:
8484 ; SKX: # %bb.0: # %entry
8485 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8486 ; SKX-NEXT: retq # sched: [7:1.00]
8488 %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
8489 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
8490 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
8491 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
8492 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
8493 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
8494 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
8495 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
8496 ret <8 x double> %vecinit7.i
8499 define <8 x i64> @test_set1_epi64(i64 %d) #2 {
8500 ; GENERIC-LABEL: test_set1_epi64:
8501 ; GENERIC: # %bb.0: # %entry
8502 ; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
8503 ; GENERIC-NEXT: retq # sched: [1:1.00]
8505 ; SKX-LABEL: test_set1_epi64:
8506 ; SKX: # %bb.0: # %entry
8507 ; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
8508 ; SKX-NEXT: retq # sched: [7:1.00]
8510 %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
8511 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
8512 %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
8513 %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
8514 %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
8515 %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
8516 %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
8517 %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
8518 ret <8 x i64> %vecinit7.i
8521 define <16 x float> @test_set1_ps(float %f) #2 {
8522 ; GENERIC-LABEL: test_set1_ps:
8523 ; GENERIC: # %bb.0: # %entry
8524 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8525 ; GENERIC-NEXT: retq # sched: [1:1.00]
8527 ; SKX-LABEL: test_set1_ps:
8528 ; SKX: # %bb.0: # %entry
8529 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8530 ; SKX-NEXT: retq # sched: [7:1.00]
8532 %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
8533 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
8534 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
8535 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
8536 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
8537 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
8538 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
8539 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
8540 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
8541 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
8542 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
8543 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
8544 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
8545 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
8546 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
8547 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
8548 ret <16 x float> %vecinit15.i
8551 define <16 x i32> @test_set1_epi32(i32 %f) #2 {
8552 ; GENERIC-LABEL: test_set1_epi32:
8553 ; GENERIC: # %bb.0: # %entry
8554 ; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
8555 ; GENERIC-NEXT: retq # sched: [1:1.00]
8557 ; SKX-LABEL: test_set1_epi32:
8558 ; SKX: # %bb.0: # %entry
8559 ; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
8560 ; SKX-NEXT: retq # sched: [7:1.00]
8562 %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
8563 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
8564 %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
8565 %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
8566 %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
8567 %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
8568 %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
8569 %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
8570 %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
8571 %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
8572 %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
8573 %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
8574 %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
8575 %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
8576 %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
8577 %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
8578 ret <16 x i32> %vecinit15.i
8581 ; We implement the scalar broadcast intrinsics with vector initializers.
8582 ; Verify that the IR generated will produce the broadcast at the end.
8583 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
8584 ; GENERIC-LABEL: test_mm512_broadcastsd_pd:
8585 ; GENERIC: # %bb.0: # %entry
8586 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8587 ; GENERIC-NEXT: retq # sched: [1:1.00]
8589 ; SKX-LABEL: test_mm512_broadcastsd_pd:
8590 ; SKX: # %bb.0: # %entry
8591 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8592 ; SKX-NEXT: retq # sched: [7:1.00]
8594 %0 = extractelement <2 x double> %a, i32 0
8595 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
8596 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
8597 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
8598 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
8599 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
8600 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
8601 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
8602 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
8603 ret <8 x double> %vecinit7.i
8606 define <16 x float> @suff_test1(<8 x float>%a) {
8607 ; GENERIC-LABEL: suff_test1:
8609 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8610 ; GENERIC-NEXT: retq # sched: [1:1.00]
8612 ; SKX-LABEL: suff_test1:
8614 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8615 ; SKX-NEXT: retq # sched: [7:1.00]
8616 %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
8617 ret <16 x float>%res
8620 define <8 x double> @suff_test2(<4 x double>%a) {
8621 ; GENERIC-LABEL: suff_test2:
8623 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8624 ; GENERIC-NEXT: retq # sched: [1:1.00]
8626 ; SKX-LABEL: suff_test2:
8628 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8629 ; SKX-NEXT: retq # sched: [7:1.00]
8630 %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
8631 ret <8 x double>%res
8634 define <64 x i8> @_invec32xi8(<32 x i8>%a) {
8635 ; GENERIC-LABEL: _invec32xi8:
8637 ; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00]
8638 ; GENERIC-NEXT: retq # sched: [1:1.00]
8640 ; SKX-LABEL: _invec32xi8:
8642 ; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00]
8643 ; SKX-NEXT: retq # sched: [7:1.00]
8644 %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
8648 define <32 x i16> @_invec16xi16(<16 x i16>%a) {
8649 ; GENERIC-LABEL: _invec16xi16:
8651 ; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00]
8652 ; GENERIC-NEXT: retq # sched: [1:1.00]
8654 ; SKX-LABEL: _invec16xi16:
8656 ; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00]
8657 ; SKX-NEXT: retq # sched: [7:1.00]
8658 %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
8662 define <16 x i32> @_invec8xi32(<8 x i32>%a) {
8663 ; GENERIC-LABEL: _invec8xi32:
8665 ; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
8666 ; GENERIC-NEXT: retq # sched: [1:1.00]
8668 ; SKX-LABEL: _invec8xi32:
8670 ; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
8671 ; SKX-NEXT: retq # sched: [7:1.00]
8672 %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
8676 define <8 x i64> @_invec4xi64(<4 x i64>%a) {
8677 ; GENERIC-LABEL: _invec4xi64:
8679 ; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
8680 ; GENERIC-NEXT: retq # sched: [1:1.00]
8682 ; SKX-LABEL: _invec4xi64:
8684 ; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
8685 ; SKX-NEXT: retq # sched: [7:1.00]
8686 %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
8690 declare void @func_f32(float)
8691 define <16 x float> @broadcast_ss_spill(float %x) {
8692 ; GENERIC-LABEL: broadcast_ss_spill:
8694 ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
8695 ; GENERIC-NEXT: .cfi_def_cfa_offset 32
8696 ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
8697 ; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
8698 ; GENERIC-NEXT: callq func_f32
8699 ; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
8700 ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
8701 ; GENERIC-NEXT: retq # sched: [1:1.00]
8703 ; SKX-LABEL: broadcast_ss_spill:
8705 ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
8706 ; SKX-NEXT: .cfi_def_cfa_offset 32
8707 ; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
8708 ; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8709 ; SKX-NEXT: callq func_f32
8710 ; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
8711 ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25]
8712 ; SKX-NEXT: retq # sched: [7:1.00]
8713 %a = fadd float %x, %x
8714 call void @func_f32(float %a)
8715 %b = insertelement <16 x float> undef, float %a, i32 0
8716 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
8720 declare void @func_f64(double)
8721 define <8 x double> @broadcast_sd_spill(double %x) {
8722 ; GENERIC-LABEL: broadcast_sd_spill:
8724 ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
8725 ; GENERIC-NEXT: .cfi_def_cfa_offset 32
8726 ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
8727 ; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
8728 ; GENERIC-NEXT: callq func_f64
8729 ; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
8730 ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
8731 ; GENERIC-NEXT: retq # sched: [1:1.00]
8733 ; SKX-LABEL: broadcast_sd_spill:
8735 ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
8736 ; SKX-NEXT: .cfi_def_cfa_offset 32
8737 ; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
8738 ; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
8739 ; SKX-NEXT: callq func_f64
8740 ; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
8741 ; SKX-NEXT: addq $24, %rsp # sched: [1:0.25]
8742 ; SKX-NEXT: retq # sched: [7:1.00]
8743 %a = fadd double %x, %x
8744 call void @func_f64(double %a)
8745 %b = insertelement <8 x double> undef, double %a, i32 0
8746 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer