1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The string to specify embedded broadcast in assembly.
94 string BroadcastStr = "{1to" # NumElts # "}";
96 // 8-bit compressed displacement tuple/subvector format. This is only
97 // defined for NumElts <= 8.
98 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
99 !cast<CD8VForm>("CD8VT" # NumElts), ?);
101 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
102 !if (!eq (Size, 256), sub_ymm, ?));
104 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
105 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
108 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
110 // A vector tye of the same width with element type i64. This is used to
111 // create patterns for logic ops.
112 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
114 // A vector type of the same width with element type i32. This is used to
115 // create the canonical constant zero node ImmAllZerosV.
116 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
117 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
119 string ZSuffix = !if (!eq (Size, 128), "Z128",
120 !if (!eq (Size, 256), "Z256", "Z"));
123 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
124 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
125 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
126 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
127 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
128 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
130 // "x" in v32i8x_info means RC = VR256X
131 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
132 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
133 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
134 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
135 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
136 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
138 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
139 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
140 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
141 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
142 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
143 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
145 // We map scalar types to the smallest (128-bit) vector type
146 // with the appropriate element type. This allows to use the same masking logic.
147 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
148 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
149 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
150 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
152 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153 X86VectorVTInfo i128> {
154 X86VectorVTInfo info512 = i512;
155 X86VectorVTInfo info256 = i256;
156 X86VectorVTInfo info128 = i128;
159 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
161 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
163 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
165 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
167 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
169 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
172 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
174 RegisterClass KRC = _krc;
175 RegisterClass KRCWM = _krcwm;
179 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
180 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
181 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
182 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
183 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
184 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
185 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
187 // This multiclass generates the masking variants from the non-masking
188 // variant. It only provides the assembly pieces for the masking variants.
189 // It assumes custom ISel patterns for masking which can be provided as
190 // template arguments.
191 multiclass AVX512_maskable_custom<bits<8> O, Format F,
193 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
195 string AttSrcAsm, string IntelSrcAsm,
197 list<dag> MaskingPattern,
198 list<dag> ZeroMaskingPattern,
199 string MaskingConstraint = "",
200 bit IsCommutable = 0,
201 bit IsKCommutable = 0> {
202 let isCommutable = IsCommutable in
203 def NAME: AVX512<O, F, Outs, Ins,
204 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
205 "$dst, "#IntelSrcAsm#"}",
208 // Prefer over VMOV*rrk Pat<>
209 let isCommutable = IsKCommutable in
210 def NAME#k: AVX512<O, F, Outs, MaskingIns,
211 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
212 "$dst {${mask}}, "#IntelSrcAsm#"}",
215 // In case of the 3src subclass this is overridden with a let.
216 string Constraints = MaskingConstraint;
219 // Zero mask does not add any restrictions to commute operands transformation.
220 // So, it is Ok to use IsCommutable instead of IsKCommutable.
221 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
222 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
223 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
224 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
230 // Common base class of AVX512_maskable and AVX512_maskable_3src.
231 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
233 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
235 string AttSrcAsm, string IntelSrcAsm,
236 dag RHS, dag MaskingRHS,
237 SDNode Select = vselect,
238 string MaskingConstraint = "",
239 bit IsCommutable = 0,
240 bit IsKCommutable = 0> :
241 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
242 AttSrcAsm, IntelSrcAsm,
243 [(set _.RC:$dst, RHS)],
244 [(set _.RC:$dst, MaskingRHS)],
246 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
247 MaskingConstraint, IsCommutable,
250 // This multiclass generates the unconditional/non-masking, the masking and
251 // the zero-masking variant of the vector instruction. In the masking case, the
252 // perserved vector elements come from a new dummy input operand tied to $dst.
253 // This version uses a separate dag for non-masking and masking.
254 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
255 dag Outs, dag Ins, string OpcodeStr,
256 string AttSrcAsm, string IntelSrcAsm,
257 dag RHS, dag MaskRHS,
258 bit IsCommutable = 0, bit IsKCommutable = 0,
259 SDNode Select = vselect> :
260 AVX512_maskable_custom<O, F, Outs, Ins,
261 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
262 !con((ins _.KRCWM:$mask), Ins),
263 OpcodeStr, AttSrcAsm, IntelSrcAsm,
264 [(set _.RC:$dst, RHS)],
266 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
268 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
269 "$src0 = $dst", IsCommutable, IsKCommutable>;
271 // This multiclass generates the unconditional/non-masking, the masking and
272 // the zero-masking variant of the vector instruction. In the masking case, the
273 // perserved vector elements come from a new dummy input operand tied to $dst.
274 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275 dag Outs, dag Ins, string OpcodeStr,
276 string AttSrcAsm, string IntelSrcAsm,
278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_common<O, F, _, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
284 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
285 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
287 // This multiclass generates the unconditional/non-masking, the masking and
288 // the zero-masking variant of the scalar instruction.
289 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
290 dag Outs, dag Ins, string OpcodeStr,
291 string AttSrcAsm, string IntelSrcAsm,
293 bit IsCommutable = 0> :
294 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
295 RHS, IsCommutable, 0, X86selects>;
297 // Similar to AVX512_maskable but in this case one of the source operands
298 // ($src1) is already tied to $dst so we just use that for the preserved
299 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
301 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
302 dag Outs, dag NonTiedIns, string OpcodeStr,
303 string AttSrcAsm, string IntelSrcAsm,
305 bit IsCommutable = 0,
306 bit IsKCommutable = 0,
307 SDNode Select = vselect,
309 AVX512_maskable_common<O, F, _, Outs,
310 !con((ins _.RC:$src1), NonTiedIns),
311 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
312 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313 OpcodeStr, AttSrcAsm, IntelSrcAsm,
314 !if(MaskOnly, (null_frag), RHS),
315 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
316 Select, "", IsCommutable, IsKCommutable>;
318 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
319 // operand differs from the output VT. This requires a bitconvert on
320 // the preserved vector going into the vselect.
321 // NOTE: The unmasked pattern is disabled.
322 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
323 X86VectorVTInfo InVT,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
326 dag RHS, bit IsCommutable = 0> :
327 AVX512_maskable_common<O, F, OutVT, Outs,
328 !con((ins InVT.RC:$src1), NonTiedIns),
329 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
330 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
332 (vselect InVT.KRCWM:$mask, RHS,
333 (bitconvert InVT.RC:$src1)),
334 vselect, "", IsCommutable>;
336 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
337 dag Outs, dag NonTiedIns, string OpcodeStr,
338 string AttSrcAsm, string IntelSrcAsm,
340 bit IsCommutable = 0,
341 bit IsKCommutable = 0,
343 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
344 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
345 X86selects, MaskOnly>;
347 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
350 string AttSrcAsm, string IntelSrcAsm,
352 AVX512_maskable_custom<O, F, Outs, Ins,
353 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
354 !con((ins _.KRCWM:$mask), Ins),
355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
358 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
359 dag Outs, dag NonTiedIns,
361 string AttSrcAsm, string IntelSrcAsm,
363 AVX512_maskable_custom<O, F, Outs,
364 !con((ins _.RC:$src1), NonTiedIns),
365 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
366 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
370 // Instruction with mask that puts result in mask register,
371 // like "compare" and "vptest"
372 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
374 dag Ins, dag MaskingIns,
376 string AttSrcAsm, string IntelSrcAsm,
378 list<dag> MaskingPattern,
379 bit IsCommutable = 0> {
380 let isCommutable = IsCommutable in
381 def NAME: AVX512<O, F, Outs, Ins,
382 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
383 "$dst, "#IntelSrcAsm#"}",
386 def NAME#k: AVX512<O, F, Outs, MaskingIns,
387 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
388 "$dst {${mask}}, "#IntelSrcAsm#"}",
389 MaskingPattern>, EVEX_K;
392 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394 dag Ins, dag MaskingIns,
396 string AttSrcAsm, string IntelSrcAsm,
397 dag RHS, dag MaskingRHS,
398 bit IsCommutable = 0> :
399 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
400 AttSrcAsm, IntelSrcAsm,
401 [(set _.KRC:$dst, RHS)],
402 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
404 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
405 dag Outs, dag Ins, string OpcodeStr,
406 string AttSrcAsm, string IntelSrcAsm,
407 dag RHS, bit IsCommutable = 0> :
408 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
409 !con((ins _.KRCWM:$mask), Ins),
410 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
411 (and _.KRCWM:$mask, RHS), IsCommutable>;
413 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
414 dag Outs, dag Ins, string OpcodeStr,
415 string AttSrcAsm, string IntelSrcAsm> :
416 AVX512_maskable_custom_cmp<O, F, Outs,
417 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
418 AttSrcAsm, IntelSrcAsm, [], []>;
420 // This multiclass generates the unconditional/non-masking, the masking and
421 // the zero-masking variant of the vector instruction. In the masking case, the
422 // perserved vector elements come from a new dummy input operand tied to $dst.
423 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
424 dag Outs, dag Ins, string OpcodeStr,
425 string AttSrcAsm, string IntelSrcAsm,
426 dag RHS, dag MaskedRHS,
427 bit IsCommutable = 0, SDNode Select = vselect> :
428 AVX512_maskable_custom<O, F, Outs, Ins,
429 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
430 !con((ins _.KRCWM:$mask), Ins),
431 OpcodeStr, AttSrcAsm, IntelSrcAsm,
432 [(set _.RC:$dst, RHS)],
434 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
436 (Select _.KRCWM:$mask, MaskedRHS,
438 "$src0 = $dst", IsCommutable>;
441 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
442 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
443 // swizzled by ExecutionDomainFix to pxor.
444 // We set canFoldAsLoad because this can be converted to a constant-pool
445 // load of an all-zeros value if folding it would be beneficial.
446 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
447 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
448 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
449 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
450 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
451 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
454 // Alias instructions that allow VPTERNLOG to be used with a mask to create
455 // a mix of all ones and all zeros elements. This is done this way to force
456 // the same register to be used as input for all three sources.
457 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
458 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
459 (ins VK16WM:$mask), "",
460 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
461 (v16i32 immAllOnesV),
462 (v16i32 immAllZerosV)))]>;
463 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
464 (ins VK8WM:$mask), "",
465 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
466 (bc_v8i64 (v16i32 immAllOnesV)),
467 (bc_v8i64 (v16i32 immAllZerosV))))]>;
470 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
471 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
472 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
473 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
474 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
475 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
478 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
479 // This is expanded by ExpandPostRAPseudos.
480 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
481 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
482 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
483 [(set FR32X:$dst, fp32imm0)]>;
484 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
485 [(set FR64X:$dst, fpimm0)]>;
488 //===----------------------------------------------------------------------===//
489 // AVX-512 - VECTOR INSERT
492 // Supports two different pattern operators for mask and unmasked ops. Allows
493 // null_frag to be passed for one.
494 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
496 SDPatternOperator vinsert_insert,
497 SDPatternOperator vinsert_for_mask,
498 X86FoldableSchedWrite sched> {
499 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
500 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
501 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
502 "vinsert" # From.EltTypeName # "x" # From.NumElts,
503 "$src3, $src2, $src1", "$src1, $src2, $src3",
504 (vinsert_insert:$src3 (To.VT To.RC:$src1),
505 (From.VT From.RC:$src2),
507 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
508 (From.VT From.RC:$src2),
510 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
512 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
513 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
514 "vinsert" # From.EltTypeName # "x" # From.NumElts,
515 "$src3, $src2, $src1", "$src1, $src2, $src3",
516 (vinsert_insert:$src3 (To.VT To.RC:$src1),
517 (From.VT (bitconvert (From.LdFrag addr:$src2))),
519 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
520 (From.VT (bitconvert (From.LdFrag addr:$src2))),
521 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
522 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
523 Sched<[sched.Folded, ReadAfterLd]>;
527 // Passes the same pattern operator for masked and unmasked ops.
528 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
530 SDPatternOperator vinsert_insert,
531 X86FoldableSchedWrite sched> :
532 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
534 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
535 X86VectorVTInfo To, PatFrag vinsert_insert,
536 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
537 let Predicates = p in {
538 def : Pat<(vinsert_insert:$ins
539 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
540 (To.VT (!cast<Instruction>(InstrStr#"rr")
541 To.RC:$src1, From.RC:$src2,
542 (INSERT_get_vinsert_imm To.RC:$ins)))>;
544 def : Pat<(vinsert_insert:$ins
546 (From.VT (bitconvert (From.LdFrag addr:$src2))),
548 (To.VT (!cast<Instruction>(InstrStr#"rm")
549 To.RC:$src1, addr:$src2,
550 (INSERT_get_vinsert_imm To.RC:$ins)))>;
554 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
555 ValueType EltVT64, int Opcode256,
556 X86FoldableSchedWrite sched> {
558 let Predicates = [HasVLX] in
559 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
560 X86VectorVTInfo< 4, EltVT32, VR128X>,
561 X86VectorVTInfo< 8, EltVT32, VR256X>,
562 vinsert128_insert, sched>, EVEX_V256;
564 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
565 X86VectorVTInfo< 4, EltVT32, VR128X>,
566 X86VectorVTInfo<16, EltVT32, VR512>,
567 vinsert128_insert, sched>, EVEX_V512;
569 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
570 X86VectorVTInfo< 4, EltVT64, VR256X>,
571 X86VectorVTInfo< 8, EltVT64, VR512>,
572 vinsert256_insert, sched>, VEX_W, EVEX_V512;
574 // Even with DQI we'd like to only use these instructions for masking.
575 let Predicates = [HasVLX, HasDQI] in
576 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
577 X86VectorVTInfo< 2, EltVT64, VR128X>,
578 X86VectorVTInfo< 4, EltVT64, VR256X>,
579 null_frag, vinsert128_insert, sched>,
582 // Even with DQI we'd like to only use these instructions for masking.
583 let Predicates = [HasDQI] in {
584 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
585 X86VectorVTInfo< 2, EltVT64, VR128X>,
586 X86VectorVTInfo< 8, EltVT64, VR512>,
587 null_frag, vinsert128_insert, sched>,
590 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
591 X86VectorVTInfo< 8, EltVT32, VR256X>,
592 X86VectorVTInfo<16, EltVT32, VR512>,
593 null_frag, vinsert256_insert, sched>,
598 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
599 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
600 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
602 // Codegen pattern with the alternative types,
603 // Even with AVX512DQ we'll still use these for unmasked operations.
604 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
605 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
606 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
607 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
610 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
611 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
612 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
615 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
616 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
617 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
619 // Codegen pattern with the alternative types insert VEC128 into VEC256
620 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
621 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
622 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
623 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
624 // Codegen pattern with the alternative types insert VEC128 into VEC512
625 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
626 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
627 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
628 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
629 // Codegen pattern with the alternative types insert VEC256 into VEC512
630 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
631 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
632 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
633 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
636 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
637 X86VectorVTInfo To, X86VectorVTInfo Cast,
638 PatFrag vinsert_insert,
639 SDNodeXForm INSERT_get_vinsert_imm,
641 let Predicates = p in {
643 (vselect Cast.KRCWM:$mask,
645 (vinsert_insert:$ins (To.VT To.RC:$src1),
646 (From.VT From.RC:$src2),
649 (!cast<Instruction>(InstrStr#"rrk")
650 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
651 (INSERT_get_vinsert_imm To.RC:$ins))>;
653 (vselect Cast.KRCWM:$mask,
655 (vinsert_insert:$ins (To.VT To.RC:$src1),
658 (From.LdFrag addr:$src2))),
661 (!cast<Instruction>(InstrStr#"rmk")
662 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
663 (INSERT_get_vinsert_imm To.RC:$ins))>;
666 (vselect Cast.KRCWM:$mask,
668 (vinsert_insert:$ins (To.VT To.RC:$src1),
669 (From.VT From.RC:$src2),
672 (!cast<Instruction>(InstrStr#"rrkz")
673 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
674 (INSERT_get_vinsert_imm To.RC:$ins))>;
676 (vselect Cast.KRCWM:$mask,
678 (vinsert_insert:$ins (To.VT To.RC:$src1),
681 (From.LdFrag addr:$src2))),
684 (!cast<Instruction>(InstrStr#"rmkz")
685 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
686 (INSERT_get_vinsert_imm To.RC:$ins))>;
690 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
691 v8f32x_info, vinsert128_insert,
692 INSERT_get_vinsert128_imm, [HasVLX]>;
693 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
694 v4f64x_info, vinsert128_insert,
695 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
697 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
698 v8i32x_info, vinsert128_insert,
699 INSERT_get_vinsert128_imm, [HasVLX]>;
700 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
701 v8i32x_info, vinsert128_insert,
702 INSERT_get_vinsert128_imm, [HasVLX]>;
703 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
704 v8i32x_info, vinsert128_insert,
705 INSERT_get_vinsert128_imm, [HasVLX]>;
706 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
707 v4i64x_info, vinsert128_insert,
708 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
709 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
710 v4i64x_info, vinsert128_insert,
711 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
712 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
713 v4i64x_info, vinsert128_insert,
714 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
716 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
717 v16f32_info, vinsert128_insert,
718 INSERT_get_vinsert128_imm, [HasAVX512]>;
719 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
720 v8f64_info, vinsert128_insert,
721 INSERT_get_vinsert128_imm, [HasDQI]>;
723 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
724 v16i32_info, vinsert128_insert,
725 INSERT_get_vinsert128_imm, [HasAVX512]>;
726 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
727 v16i32_info, vinsert128_insert,
728 INSERT_get_vinsert128_imm, [HasAVX512]>;
729 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
730 v16i32_info, vinsert128_insert,
731 INSERT_get_vinsert128_imm, [HasAVX512]>;
732 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
733 v8i64_info, vinsert128_insert,
734 INSERT_get_vinsert128_imm, [HasDQI]>;
735 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
736 v8i64_info, vinsert128_insert,
737 INSERT_get_vinsert128_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
739 v8i64_info, vinsert128_insert,
740 INSERT_get_vinsert128_imm, [HasDQI]>;
742 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
743 v16f32_info, vinsert256_insert,
744 INSERT_get_vinsert256_imm, [HasDQI]>;
745 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
746 v8f64_info, vinsert256_insert,
747 INSERT_get_vinsert256_imm, [HasAVX512]>;
749 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
750 v16i32_info, vinsert256_insert,
751 INSERT_get_vinsert256_imm, [HasDQI]>;
752 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
753 v16i32_info, vinsert256_insert,
754 INSERT_get_vinsert256_imm, [HasDQI]>;
755 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
756 v16i32_info, vinsert256_insert,
757 INSERT_get_vinsert256_imm, [HasDQI]>;
758 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
759 v8i64_info, vinsert256_insert,
760 INSERT_get_vinsert256_imm, [HasAVX512]>;
761 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
762 v8i64_info, vinsert256_insert,
763 INSERT_get_vinsert256_imm, [HasAVX512]>;
764 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
765 v8i64_info, vinsert256_insert,
766 INSERT_get_vinsert256_imm, [HasAVX512]>;
768 // vinsertps - insert f32 to XMM
769 let ExeDomain = SSEPackedSingle in {
770 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
771 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
772 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
773 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
774 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
775 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
776 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
777 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
778 [(set VR128X:$dst, (X86insertps VR128X:$src1,
779 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
781 EVEX_4V, EVEX_CD8<32, CD8VT1>,
782 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
785 //===----------------------------------------------------------------------===//
786 // AVX-512 VECTOR EXTRACT
789 // Supports two different pattern operators for mask and unmasked ops. Allows
790 // null_frag to be passed for one.
791 multiclass vextract_for_size_split<int Opcode,
792 X86VectorVTInfo From, X86VectorVTInfo To,
793 SDPatternOperator vextract_extract,
794 SDPatternOperator vextract_for_mask,
795 SchedWrite SchedRR, SchedWrite SchedMR> {
797 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
798 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
799 (ins From.RC:$src1, u8imm:$idx),
800 "vextract" # To.EltTypeName # "x" # To.NumElts,
801 "$idx, $src1", "$src1, $idx",
802 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
803 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
804 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
806 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
807 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
808 "vextract" # To.EltTypeName # "x" # To.NumElts #
809 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
810 [(store (To.VT (vextract_extract:$idx
811 (From.VT From.RC:$src1), (iPTR imm))),
815 let mayStore = 1, hasSideEffects = 0 in
816 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
817 (ins To.MemOp:$dst, To.KRCWM:$mask,
818 From.RC:$src1, u8imm:$idx),
819 "vextract" # To.EltTypeName # "x" # To.NumElts #
820 "\t{$idx, $src1, $dst {${mask}}|"
821 "$dst {${mask}}, $src1, $idx}", []>,
822 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
826 // Passes the same pattern operator for masked and unmasked ops.
827 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
829 SDPatternOperator vextract_extract,
830 SchedWrite SchedRR, SchedWrite SchedMR> :
831 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
833 // Codegen pattern for the alternative types
834 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
835 X86VectorVTInfo To, PatFrag vextract_extract,
836 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
837 let Predicates = p in {
838 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
839 (To.VT (!cast<Instruction>(InstrStr#"rr")
841 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
842 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
843 (iPTR imm))), addr:$dst),
844 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
845 (EXTRACT_get_vextract_imm To.RC:$ext))>;
849 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
850 ValueType EltVT64, int Opcode256,
851 SchedWrite SchedRR, SchedWrite SchedMR> {
852 let Predicates = [HasAVX512] in {
853 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
854 X86VectorVTInfo<16, EltVT32, VR512>,
855 X86VectorVTInfo< 4, EltVT32, VR128X>,
856 vextract128_extract, SchedRR, SchedMR>,
857 EVEX_V512, EVEX_CD8<32, CD8VT4>;
858 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
859 X86VectorVTInfo< 8, EltVT64, VR512>,
860 X86VectorVTInfo< 4, EltVT64, VR256X>,
861 vextract256_extract, SchedRR, SchedMR>,
862 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
864 let Predicates = [HasVLX] in
865 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
866 X86VectorVTInfo< 8, EltVT32, VR256X>,
867 X86VectorVTInfo< 4, EltVT32, VR128X>,
868 vextract128_extract, SchedRR, SchedMR>,
869 EVEX_V256, EVEX_CD8<32, CD8VT4>;
871 // Even with DQI we'd like to only use these instructions for masking.
872 let Predicates = [HasVLX, HasDQI] in
873 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
874 X86VectorVTInfo< 4, EltVT64, VR256X>,
875 X86VectorVTInfo< 2, EltVT64, VR128X>,
876 null_frag, vextract128_extract, SchedRR, SchedMR>,
877 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
879 // Even with DQI we'd like to only use these instructions for masking.
880 let Predicates = [HasDQI] in {
881 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
882 X86VectorVTInfo< 8, EltVT64, VR512>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
884 null_frag, vextract128_extract, SchedRR, SchedMR>,
885 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
886 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
887 X86VectorVTInfo<16, EltVT32, VR512>,
888 X86VectorVTInfo< 8, EltVT32, VR256X>,
889 null_frag, vextract256_extract, SchedRR, SchedMR>,
890 EVEX_V512, EVEX_CD8<32, CD8VT8>;
894 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
895 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
896 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
898 // extract_subvector codegen patterns with the alternative types.
899 // Even with AVX512DQ we'll still use these for unmasked operations.
900 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
901 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
905 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
906 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
907 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
908 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
910 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
911 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
912 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
913 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
915 // Codegen pattern with the alternative types extract VEC128 from VEC256
916 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
917 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
918 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
919 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
921 // Codegen pattern with the alternative types extract VEC128 from VEC512
922 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
923 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
924 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
925 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
926 // Codegen pattern with the alternative types extract VEC256 from VEC512
927 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
928 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
930 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
933 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
934 // smaller extract to enable EVEX->VEX.
935 let Predicates = [NoVLX] in {
936 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
937 (v2i64 (VEXTRACTI128rr
938 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
940 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
941 (v2f64 (VEXTRACTF128rr
942 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
944 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
945 (v4i32 (VEXTRACTI128rr
946 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
948 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
949 (v4f32 (VEXTRACTF128rr
950 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
952 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
953 (v8i16 (VEXTRACTI128rr
954 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
956 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
957 (v16i8 (VEXTRACTI128rr
958 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
962 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
963 // smaller extract to enable EVEX->VEX.
964 let Predicates = [HasVLX] in {
965 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
966 (v2i64 (VEXTRACTI32x4Z256rr
967 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
969 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
970 (v2f64 (VEXTRACTF32x4Z256rr
971 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
973 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
974 (v4i32 (VEXTRACTI32x4Z256rr
975 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
977 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
978 (v4f32 (VEXTRACTF32x4Z256rr
979 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
981 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
982 (v8i16 (VEXTRACTI32x4Z256rr
983 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
985 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
986 (v16i8 (VEXTRACTI32x4Z256rr
987 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
992 // Additional patterns for handling a bitcast between the vselect and the
993 // extract_subvector.
994 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
995 X86VectorVTInfo To, X86VectorVTInfo Cast,
996 PatFrag vextract_extract,
997 SDNodeXForm EXTRACT_get_vextract_imm,
999 let Predicates = p in {
1000 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1002 (To.VT (vextract_extract:$ext
1003 (From.VT From.RC:$src), (iPTR imm)))),
1005 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1006 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1007 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1009 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1011 (To.VT (vextract_extract:$ext
1012 (From.VT From.RC:$src), (iPTR imm)))),
1013 Cast.ImmAllZerosV)),
1014 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1015 Cast.KRCWM:$mask, From.RC:$src,
1016 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1020 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1021 v4f32x_info, vextract128_extract,
1022 EXTRACT_get_vextract128_imm, [HasVLX]>;
1023 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1024 v2f64x_info, vextract128_extract,
1025 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1027 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1028 v4i32x_info, vextract128_extract,
1029 EXTRACT_get_vextract128_imm, [HasVLX]>;
1030 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1031 v4i32x_info, vextract128_extract,
1032 EXTRACT_get_vextract128_imm, [HasVLX]>;
1033 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1034 v4i32x_info, vextract128_extract,
1035 EXTRACT_get_vextract128_imm, [HasVLX]>;
1036 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1037 v2i64x_info, vextract128_extract,
1038 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1040 v2i64x_info, vextract128_extract,
1041 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1043 v2i64x_info, vextract128_extract,
1044 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1047 v4f32x_info, vextract128_extract,
1048 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1050 v2f64x_info, vextract128_extract,
1051 EXTRACT_get_vextract128_imm, [HasDQI]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1054 v4i32x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1057 v4i32x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1060 v4i32x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1062 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1063 v2i64x_info, vextract128_extract,
1064 EXTRACT_get_vextract128_imm, [HasDQI]>;
1065 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1066 v2i64x_info, vextract128_extract,
1067 EXTRACT_get_vextract128_imm, [HasDQI]>;
1068 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1069 v2i64x_info, vextract128_extract,
1070 EXTRACT_get_vextract128_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1073 v8f32x_info, vextract256_extract,
1074 EXTRACT_get_vextract256_imm, [HasDQI]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1076 v4f64x_info, vextract256_extract,
1077 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1080 v8i32x_info, vextract256_extract,
1081 EXTRACT_get_vextract256_imm, [HasDQI]>;
1082 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1083 v8i32x_info, vextract256_extract,
1084 EXTRACT_get_vextract256_imm, [HasDQI]>;
1085 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1086 v8i32x_info, vextract256_extract,
1087 EXTRACT_get_vextract256_imm, [HasDQI]>;
1088 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1089 v4i64x_info, vextract256_extract,
1090 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1091 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1092 v4i64x_info, vextract256_extract,
1093 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1094 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1095 v4i64x_info, vextract256_extract,
1096 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1098 // vextractps - extract 32 bits from XMM
1099 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1100 (ins VR128X:$src1, u8imm:$src2),
1101 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1102 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1103 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1105 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1106 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1107 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1108 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1110 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1112 //===---------------------------------------------------------------------===//
1113 // AVX-512 BROADCAST
1115 // broadcast with a scalar argument.
1116 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1118 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1119 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1120 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1121 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1122 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1123 (X86VBroadcast SrcInfo.FRC:$src),
1124 DestInfo.RC:$src0)),
1125 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1126 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1127 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1128 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1129 (X86VBroadcast SrcInfo.FRC:$src),
1130 DestInfo.ImmAllZerosV)),
1131 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1132 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1135 // Split version to allow mask and broadcast node to be different types. This
1136 // helps support the 32x2 broadcasts.
1137 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1139 SchedWrite SchedRR, SchedWrite SchedRM,
1140 X86VectorVTInfo MaskInfo,
1141 X86VectorVTInfo DestInfo,
1142 X86VectorVTInfo SrcInfo,
1143 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1144 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1145 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1146 (outs MaskInfo.RC:$dst),
1147 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1151 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1155 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1156 T8PD, EVEX, Sched<[SchedRR]>;
1158 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1159 (outs MaskInfo.RC:$dst),
1160 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1163 (DestInfo.VT (UnmaskedOp
1164 (SrcInfo.ScalarLdFrag addr:$src))))),
1167 (DestInfo.VT (X86VBroadcast
1168 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1169 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1173 def : Pat<(MaskInfo.VT
1175 (DestInfo.VT (UnmaskedOp
1176 (SrcInfo.VT (scalar_to_vector
1177 (SrcInfo.ScalarLdFrag addr:$src))))))),
1178 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1179 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1183 (SrcInfo.VT (scalar_to_vector
1184 (SrcInfo.ScalarLdFrag addr:$src)))))),
1185 MaskInfo.RC:$src0)),
1186 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1187 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1188 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1192 (SrcInfo.VT (scalar_to_vector
1193 (SrcInfo.ScalarLdFrag addr:$src)))))),
1194 MaskInfo.ImmAllZerosV)),
1195 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1196 MaskInfo.KRCWM:$mask, addr:$src)>;
1199 // Helper class to force mask and broadcast result to same type.
1200 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1201 SchedWrite SchedRR, SchedWrite SchedRM,
1202 X86VectorVTInfo DestInfo,
1203 X86VectorVTInfo SrcInfo> :
1204 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1205 DestInfo, DestInfo, SrcInfo>;
1207 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1208 AVX512VLVectorVTInfo _> {
1209 let Predicates = [HasAVX512] in {
1210 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1211 WriteFShuffle256Ld, _.info512, _.info128>,
1212 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1217 let Predicates = [HasVLX] in {
1218 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1219 WriteFShuffle256Ld, _.info256, _.info128>,
1220 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1226 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1227 AVX512VLVectorVTInfo _> {
1228 let Predicates = [HasAVX512] in {
1229 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1230 WriteFShuffle256Ld, _.info512, _.info128>,
1231 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1236 let Predicates = [HasVLX] in {
1237 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1238 WriteFShuffle256Ld, _.info256, _.info128>,
1239 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1242 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info128, _.info128>,
1244 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1249 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1251 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1252 avx512vl_f64_info>, VEX_W1X;
1254 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1255 X86VectorVTInfo _, SDPatternOperator OpNode,
1256 RegisterClass SrcRC> {
1257 let ExeDomain = _.ExeDomain in
1258 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1260 "vpbroadcast"##_.Suffix, "$src", "$src",
1261 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1265 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1266 X86VectorVTInfo _, SDPatternOperator OpNode,
1267 RegisterClass SrcRC, SubRegIndex Subreg> {
1268 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1269 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1270 (outs _.RC:$dst), (ins GR32:$src),
1271 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1272 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1273 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1274 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1276 def : Pat <(_.VT (OpNode SrcRC:$src)),
1277 (!cast<Instruction>(Name#r)
1278 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1280 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1281 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1282 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1284 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1285 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1286 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1289 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1290 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1291 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1292 let Predicates = [prd] in
1293 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1294 OpNode, SrcRC, Subreg>, EVEX_V512;
1295 let Predicates = [prd, HasVLX] in {
1296 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1297 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1298 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1299 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1303 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1304 SDPatternOperator OpNode,
1305 RegisterClass SrcRC, Predicate prd> {
1306 let Predicates = [prd] in
1307 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1309 let Predicates = [prd, HasVLX] in {
1310 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1312 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1317 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1318 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1319 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1320 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1322 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1323 X86VBroadcast, GR32, HasAVX512>;
1324 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1325 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1327 // Provide aliases for broadcast from the same register class that
1328 // automatically does the extract.
1329 multiclass avx512_int_broadcast_rm_lowering<string Name,
1330 X86VectorVTInfo DestInfo,
1331 X86VectorVTInfo SrcInfo> {
1332 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1333 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1334 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1337 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1338 AVX512VLVectorVTInfo _, Predicate prd> {
1339 let Predicates = [prd] in {
1340 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1341 WriteShuffle256Ld, _.info512, _.info128>,
1342 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256>,
1344 // Defined separately to avoid redefinition.
1345 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512>;
1347 let Predicates = [prd, HasVLX] in {
1348 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1349 WriteShuffle256Ld, _.info256, _.info128>,
1350 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256>,
1352 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1353 WriteShuffleXLd, _.info128, _.info128>,
1358 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1359 avx512vl_i8_info, HasBWI>;
1360 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1361 avx512vl_i16_info, HasBWI>;
1362 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1363 avx512vl_i32_info, HasAVX512>;
1364 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1365 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1367 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1368 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1369 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1370 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1371 (_Dst.VT (X86SubVBroadcast
1372 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1373 Sched<[SchedWriteShuffle.YMM.Folded]>,
1377 // This should be used for the AVX512DQ broadcast instructions. It disables
1378 // the unmasked patterns so that we only use the DQ instructions when masking
1380 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1381 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1382 let hasSideEffects = 0, mayLoad = 1 in
1383 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1384 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1386 (_Dst.VT (X86SubVBroadcast
1387 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1388 Sched<[SchedWriteShuffle.YMM.Folded]>,
1392 let Predicates = [HasAVX512] in {
1393 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1394 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1395 (VPBROADCASTQZm addr:$src)>;
1398 let Predicates = [HasVLX] in {
1399 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1400 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1401 (VPBROADCASTQZ128m addr:$src)>;
1402 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1403 (VPBROADCASTQZ256m addr:$src)>;
1405 let Predicates = [HasVLX, HasBWI] in {
1406 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1407 // This means we'll encounter truncated i32 loads; match that here.
1408 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1409 (VPBROADCASTWZ128m addr:$src)>;
1410 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1411 (VPBROADCASTWZ256m addr:$src)>;
1412 def : Pat<(v8i16 (X86VBroadcast
1413 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1414 (VPBROADCASTWZ128m addr:$src)>;
1415 def : Pat<(v16i16 (X86VBroadcast
1416 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1417 (VPBROADCASTWZ256m addr:$src)>;
1420 //===----------------------------------------------------------------------===//
1421 // AVX-512 BROADCAST SUBVECTORS
1424 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1425 v16i32_info, v4i32x_info>,
1426 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1427 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1428 v16f32_info, v4f32x_info>,
1429 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1430 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1431 v8i64_info, v4i64x_info>, VEX_W,
1432 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1433 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1434 v8f64_info, v4f64x_info>, VEX_W,
1435 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1437 let Predicates = [HasAVX512] in {
1438 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1439 (VBROADCASTF64X4rm addr:$src)>;
1440 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1441 (VBROADCASTI64X4rm addr:$src)>;
1442 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1443 (VBROADCASTI64X4rm addr:$src)>;
1444 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1445 (VBROADCASTI64X4rm addr:$src)>;
1447 // Provide fallback in case the load node that is used in the patterns above
1448 // is used by additional users, which prevents the pattern selection.
1449 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1450 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1451 (v4f64 VR256X:$src), 1)>;
1452 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1453 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1454 (v8f32 VR256X:$src), 1)>;
1455 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1456 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1457 (v4i64 VR256X:$src), 1)>;
1458 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1459 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1460 (v8i32 VR256X:$src), 1)>;
1461 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1462 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1463 (v16i16 VR256X:$src), 1)>;
1464 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1465 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1466 (v32i8 VR256X:$src), 1)>;
1468 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1469 (VBROADCASTF32X4rm addr:$src)>;
1470 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1471 (VBROADCASTI32X4rm addr:$src)>;
1472 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1473 (VBROADCASTI32X4rm addr:$src)>;
1474 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1475 (VBROADCASTI32X4rm addr:$src)>;
1477 // Patterns for selects of bitcasted operations.
1478 def : Pat<(vselect VK16WM:$mask,
1479 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1480 (bc_v16f32 (v16i32 immAllZerosV))),
1481 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK16WM:$mask,
1483 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1485 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1486 def : Pat<(vselect VK16WM:$mask,
1487 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1488 (v16i32 immAllZerosV)),
1489 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1490 def : Pat<(vselect VK16WM:$mask,
1491 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1493 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1495 def : Pat<(vselect VK8WM:$mask,
1496 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1497 (bc_v8f64 (v16i32 immAllZerosV))),
1498 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1499 def : Pat<(vselect VK8WM:$mask,
1500 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1502 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1503 def : Pat<(vselect VK8WM:$mask,
1504 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1505 (bc_v8i64 (v16i32 immAllZerosV))),
1506 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1507 def : Pat<(vselect VK8WM:$mask,
1508 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1510 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1513 let Predicates = [HasVLX] in {
1514 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1515 v8i32x_info, v4i32x_info>,
1516 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1517 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1518 v8f32x_info, v4f32x_info>,
1519 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1521 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1522 (VBROADCASTF32X4Z256rm addr:$src)>;
1523 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1524 (VBROADCASTI32X4Z256rm addr:$src)>;
1525 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1526 (VBROADCASTI32X4Z256rm addr:$src)>;
1527 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1528 (VBROADCASTI32X4Z256rm addr:$src)>;
1530 // Patterns for selects of bitcasted operations.
1531 def : Pat<(vselect VK8WM:$mask,
1532 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1533 (bc_v8f32 (v8i32 immAllZerosV))),
1534 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1535 def : Pat<(vselect VK8WM:$mask,
1536 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1538 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1539 def : Pat<(vselect VK8WM:$mask,
1540 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1541 (v8i32 immAllZerosV)),
1542 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1543 def : Pat<(vselect VK8WM:$mask,
1544 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1546 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1549 // Provide fallback in case the load node that is used in the patterns above
1550 // is used by additional users, which prevents the pattern selection.
1551 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1552 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1553 (v2f64 VR128X:$src), 1)>;
1554 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1555 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1556 (v4f32 VR128X:$src), 1)>;
1557 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1558 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1559 (v2i64 VR128X:$src), 1)>;
1560 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1561 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1562 (v4i32 VR128X:$src), 1)>;
1563 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1564 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1565 (v8i16 VR128X:$src), 1)>;
1566 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1567 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1568 (v16i8 VR128X:$src), 1)>;
1571 let Predicates = [HasVLX, HasDQI] in {
1572 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1573 v4i64x_info, v2i64x_info>, VEX_W1X,
1574 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1575 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1576 v4f64x_info, v2f64x_info>, VEX_W1X,
1577 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1579 // Patterns for selects of bitcasted operations.
1580 def : Pat<(vselect VK4WM:$mask,
1581 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1582 (bc_v4f64 (v8i32 immAllZerosV))),
1583 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1584 def : Pat<(vselect VK4WM:$mask,
1585 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1587 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1588 def : Pat<(vselect VK4WM:$mask,
1589 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1590 (bc_v4i64 (v8i32 immAllZerosV))),
1591 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1592 def : Pat<(vselect VK4WM:$mask,
1593 (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1595 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1598 let Predicates = [HasDQI] in {
1599 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1600 v8i64_info, v2i64x_info>, VEX_W,
1601 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1602 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1603 v16i32_info, v8i32x_info>,
1604 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1605 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1606 v8f64_info, v2f64x_info>, VEX_W,
1607 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1608 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1609 v16f32_info, v8f32x_info>,
1610 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1612 // Patterns for selects of bitcasted operations.
1613 def : Pat<(vselect VK16WM:$mask,
1614 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1615 (bc_v16f32 (v16i32 immAllZerosV))),
1616 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1617 def : Pat<(vselect VK16WM:$mask,
1618 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1620 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1621 def : Pat<(vselect VK16WM:$mask,
1622 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1623 (v16i32 immAllZerosV)),
1624 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK16WM:$mask,
1626 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1628 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1630 def : Pat<(vselect VK8WM:$mask,
1631 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1632 (bc_v8f64 (v16i32 immAllZerosV))),
1633 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1634 def : Pat<(vselect VK8WM:$mask,
1635 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1637 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1638 def : Pat<(vselect VK8WM:$mask,
1639 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1640 (bc_v8i64 (v16i32 immAllZerosV))),
1641 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1642 def : Pat<(vselect VK8WM:$mask,
1643 (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1645 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1648 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1649 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1650 let Predicates = [HasDQI] in
1651 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1652 WriteShuffle256Ld, _Dst.info512,
1653 _Src.info512, _Src.info128, null_frag>,
1655 let Predicates = [HasDQI, HasVLX] in
1656 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1657 WriteShuffle256Ld, _Dst.info256,
1658 _Src.info256, _Src.info128, null_frag>,
1662 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1663 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1664 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1666 let Predicates = [HasDQI, HasVLX] in
1667 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1668 WriteShuffleXLd, _Dst.info128,
1669 _Src.info128, _Src.info128, null_frag>,
1673 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1674 avx512vl_i32_info, avx512vl_i64_info>;
1675 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1676 avx512vl_f32_info, avx512vl_f64_info>;
1678 let Predicates = [HasVLX] in {
1679 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1680 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1681 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1682 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1685 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1686 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1687 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1688 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1690 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1691 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1692 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1693 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1695 //===----------------------------------------------------------------------===//
1696 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1698 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1699 X86VectorVTInfo _, RegisterClass KRC> {
1700 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1701 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1702 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1703 EVEX, Sched<[WriteShuffle]>;
1706 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1707 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1708 let Predicates = [HasCDI] in
1709 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1710 let Predicates = [HasCDI, HasVLX] in {
1711 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1712 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1716 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1717 avx512vl_i32_info, VK16>;
1718 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1719 avx512vl_i64_info, VK8>, VEX_W;
1721 //===----------------------------------------------------------------------===//
1722 // -- VPERMI2 - 3 source operands form --
1723 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1724 X86FoldableSchedWrite sched,
1725 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1726 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1727 hasSideEffects = 0 in {
1728 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1729 (ins _.RC:$src2, _.RC:$src3),
1730 OpcodeStr, "$src3, $src2", "$src2, $src3",
1731 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1732 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1735 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1736 (ins _.RC:$src2, _.MemOp:$src3),
1737 OpcodeStr, "$src3, $src2", "$src2, $src3",
1738 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1739 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1740 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1744 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1745 X86FoldableSchedWrite sched,
1746 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1747 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1748 hasSideEffects = 0, mayLoad = 1 in
1749 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1750 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1751 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1752 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1753 (_.VT (X86VPermt2 _.RC:$src2,
1754 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1755 AVX5128IBase, EVEX_4V, EVEX_B,
1756 Sched<[sched.Folded, ReadAfterLd]>;
1759 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1760 X86FoldableSchedWrite sched,
1761 AVX512VLVectorVTInfo VTInfo,
1762 AVX512VLVectorVTInfo ShuffleMask> {
1763 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1764 ShuffleMask.info512>,
1765 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1766 ShuffleMask.info512>, EVEX_V512;
1767 let Predicates = [HasVLX] in {
1768 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1769 ShuffleMask.info128>,
1770 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1771 ShuffleMask.info128>, EVEX_V128;
1772 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1773 ShuffleMask.info256>,
1774 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1775 ShuffleMask.info256>, EVEX_V256;
1779 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1780 X86FoldableSchedWrite sched,
1781 AVX512VLVectorVTInfo VTInfo,
1782 AVX512VLVectorVTInfo Idx,
1784 let Predicates = [Prd] in
1785 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1786 Idx.info512>, EVEX_V512;
1787 let Predicates = [Prd, HasVLX] in {
1788 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1789 Idx.info128>, EVEX_V128;
1790 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1791 Idx.info256>, EVEX_V256;
1795 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1796 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1797 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1798 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1799 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1800 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1801 VEX_W, EVEX_CD8<16, CD8VF>;
1802 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1803 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1805 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1806 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1807 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1808 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1810 // Extra patterns to deal with extra bitcasts due to passthru and index being
1811 // different types on the fp versions.
1812 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1813 X86VectorVTInfo IdxVT,
1814 X86VectorVTInfo CastVT> {
1815 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1816 (X86VPermt2 (_.VT _.RC:$src2),
1817 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1818 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1819 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1820 _.RC:$src2, _.RC:$src3)>;
1821 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1822 (X86VPermt2 _.RC:$src2,
1823 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1824 (_.LdFrag addr:$src3)),
1825 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1826 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1827 _.RC:$src2, addr:$src3)>;
1828 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1829 (X86VPermt2 _.RC:$src2,
1830 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1831 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1832 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1833 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1834 _.RC:$src2, addr:$src3)>;
1837 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1838 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1839 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1840 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1843 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1844 X86FoldableSchedWrite sched,
1845 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1846 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1847 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1848 (ins IdxVT.RC:$src2, _.RC:$src3),
1849 OpcodeStr, "$src3, $src2", "$src2, $src3",
1850 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1851 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1853 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1854 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1855 OpcodeStr, "$src3, $src2", "$src2, $src3",
1856 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1857 (bitconvert (_.LdFrag addr:$src3)))), 1>,
1858 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1861 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1862 X86FoldableSchedWrite sched,
1863 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1864 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1865 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1866 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1867 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1868 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1869 (_.VT (X86VPermt2 _.RC:$src1,
1870 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1871 AVX5128IBase, EVEX_4V, EVEX_B,
1872 Sched<[sched.Folded, ReadAfterLd]>;
1875 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1876 X86FoldableSchedWrite sched,
1877 AVX512VLVectorVTInfo VTInfo,
1878 AVX512VLVectorVTInfo ShuffleMask> {
1879 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1880 ShuffleMask.info512>,
1881 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1882 ShuffleMask.info512>, EVEX_V512;
1883 let Predicates = [HasVLX] in {
1884 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1885 ShuffleMask.info128>,
1886 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1887 ShuffleMask.info128>, EVEX_V128;
1888 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1889 ShuffleMask.info256>,
1890 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1891 ShuffleMask.info256>, EVEX_V256;
1895 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1896 X86FoldableSchedWrite sched,
1897 AVX512VLVectorVTInfo VTInfo,
1898 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1899 let Predicates = [Prd] in
1900 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1901 Idx.info512>, EVEX_V512;
1902 let Predicates = [Prd, HasVLX] in {
1903 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1904 Idx.info128>, EVEX_V128;
1905 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1906 Idx.info256>, EVEX_V256;
1910 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1911 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1912 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1913 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1914 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1915 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1916 VEX_W, EVEX_CD8<16, CD8VF>;
1917 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1918 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1920 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1921 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1922 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1923 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1925 //===----------------------------------------------------------------------===//
1926 // AVX-512 - BLEND using mask
1929 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1930 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1931 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1932 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1933 (ins _.RC:$src1, _.RC:$src2),
1934 !strconcat(OpcodeStr,
1935 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1936 EVEX_4V, Sched<[sched]>;
1937 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1938 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1939 !strconcat(OpcodeStr,
1940 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1941 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1942 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1943 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1944 !strconcat(OpcodeStr,
1945 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1946 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1947 let mayLoad = 1 in {
1948 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1949 (ins _.RC:$src1, _.MemOp:$src2),
1950 !strconcat(OpcodeStr,
1951 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1952 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1953 Sched<[sched.Folded, ReadAfterLd]>;
1954 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1955 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1956 !strconcat(OpcodeStr,
1957 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1958 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1959 Sched<[sched.Folded, ReadAfterLd]>;
1960 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1961 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1962 !strconcat(OpcodeStr,
1963 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1964 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1965 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1969 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1970 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1971 let mayLoad = 1, hasSideEffects = 0 in {
1972 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1974 !strconcat(OpcodeStr,
1975 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1976 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1977 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1978 Sched<[sched.Folded, ReadAfterLd]>;
1980 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1981 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1982 !strconcat(OpcodeStr,
1983 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1984 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1985 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1986 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1988 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1989 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1990 !strconcat(OpcodeStr,
1991 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1992 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1993 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1994 Sched<[sched.Folded, ReadAfterLd]>;
1998 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1999 AVX512VLVectorVTInfo VTInfo> {
2000 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2001 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2004 let Predicates = [HasVLX] in {
2005 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2006 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2008 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2009 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2014 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2015 AVX512VLVectorVTInfo VTInfo> {
2016 let Predicates = [HasBWI] in
2017 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2020 let Predicates = [HasBWI, HasVLX] in {
2021 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2023 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2028 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2030 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2031 avx512vl_f64_info>, VEX_W;
2032 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2034 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2035 avx512vl_i64_info>, VEX_W;
2036 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2038 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2039 avx512vl_i16_info>, VEX_W;
2041 //===----------------------------------------------------------------------===//
2042 // Compare Instructions
2043 //===----------------------------------------------------------------------===//
2045 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2047 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2048 X86FoldableSchedWrite sched> {
2049 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2051 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2052 "vcmp${cc}"#_.Suffix,
2053 "$src2, $src1", "$src1, $src2",
2054 (OpNode (_.VT _.RC:$src1),
2056 imm:$cc)>, EVEX_4V, Sched<[sched]>;
2058 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2060 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2061 "vcmp${cc}"#_.Suffix,
2062 "$src2, $src1", "$src1, $src2",
2063 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2064 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2065 Sched<[sched.Folded, ReadAfterLd]>;
2067 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2069 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2070 "vcmp${cc}"#_.Suffix,
2071 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2072 (OpNodeRnd (_.VT _.RC:$src1),
2075 (i32 FROUND_NO_EXC))>,
2076 EVEX_4V, EVEX_B, Sched<[sched]>;
2077 // Accept explicit immediate argument form instead of comparison code.
2078 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2079 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2081 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2083 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2084 Sched<[sched]>, NotMemoryFoldable;
2086 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2088 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2090 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2091 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2092 Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2094 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2096 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2099 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2100 }// let isAsmParserOnly = 1, hasSideEffects = 0
2102 let isCodeGenOnly = 1 in {
2103 let isCommutable = 1 in
2104 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2105 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2106 !strconcat("vcmp${cc}", _.Suffix,
2107 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2108 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2111 EVEX_4V, Sched<[sched]>;
2112 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2114 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2115 !strconcat("vcmp${cc}", _.Suffix,
2116 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2117 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2118 (_.ScalarLdFrag addr:$src2),
2120 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2121 Sched<[sched.Folded, ReadAfterLd]>;
2125 let Predicates = [HasAVX512] in {
2126 let ExeDomain = SSEPackedSingle in
2127 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2128 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2129 let ExeDomain = SSEPackedDouble in
2130 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2131 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2134 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2135 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2137 let isCommutable = IsCommutable in
2138 def rr : AVX512BI<opc, MRMSrcReg,
2139 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2140 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2141 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2142 EVEX_4V, Sched<[sched]>;
2143 def rm : AVX512BI<opc, MRMSrcMem,
2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2145 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2146 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2147 (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2148 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2149 let isCommutable = IsCommutable in
2150 def rrk : AVX512BI<opc, MRMSrcReg,
2151 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2152 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2153 "$dst {${mask}}, $src1, $src2}"),
2154 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2155 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2156 EVEX_4V, EVEX_K, Sched<[sched]>;
2157 def rmk : AVX512BI<opc, MRMSrcMem,
2158 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2159 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2160 "$dst {${mask}}, $src1, $src2}"),
2161 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2162 (OpNode (_.VT _.RC:$src1),
2164 (_.LdFrag addr:$src2))))))]>,
2165 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2168 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2169 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2171 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2172 def rmb : AVX512BI<opc, MRMSrcMem,
2173 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2174 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2175 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2176 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2177 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2178 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2179 def rmbk : AVX512BI<opc, MRMSrcMem,
2180 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2181 _.ScalarMemOp:$src2),
2182 !strconcat(OpcodeStr,
2183 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2184 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2185 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2186 (OpNode (_.VT _.RC:$src1),
2188 (_.ScalarLdFrag addr:$src2)))))]>,
2189 EVEX_4V, EVEX_K, EVEX_B,
2190 Sched<[sched.Folded, ReadAfterLd]>;
2193 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2194 X86SchedWriteWidths sched,
2195 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2196 bit IsCommutable = 0> {
2197 let Predicates = [prd] in
2198 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2199 VTInfo.info512, IsCommutable>, EVEX_V512;
2201 let Predicates = [prd, HasVLX] in {
2202 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2203 VTInfo.info256, IsCommutable>, EVEX_V256;
2204 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2205 VTInfo.info128, IsCommutable>, EVEX_V128;
2209 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2210 PatFrag OpNode, X86SchedWriteWidths sched,
2211 AVX512VLVectorVTInfo VTInfo,
2212 Predicate prd, bit IsCommutable = 0> {
2213 let Predicates = [prd] in
2214 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2215 VTInfo.info512, IsCommutable>, EVEX_V512;
2217 let Predicates = [prd, HasVLX] in {
2218 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2219 VTInfo.info256, IsCommutable>, EVEX_V256;
2220 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2221 VTInfo.info128, IsCommutable>, EVEX_V128;
2225 // This fragment treats X86cmpm as commutable to help match loads in both
2226 // operands for PCMPEQ.
2227 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2228 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2229 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2230 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2231 (setcc node:$src1, node:$src2, SETGT)>;
2233 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2234 // increase the pattern complexity the way an immediate would.
2235 let AddedComplexity = 2 in {
2236 // FIXME: Is there a better scheduler class for VPCMP?
2237 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2238 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2239 EVEX_CD8<8, CD8VF>, VEX_WIG;
2241 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2242 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2243 EVEX_CD8<16, CD8VF>, VEX_WIG;
2245 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2246 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2247 EVEX_CD8<32, CD8VF>;
2249 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2250 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2251 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2253 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2254 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2255 EVEX_CD8<8, CD8VF>, VEX_WIG;
2257 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2258 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2259 EVEX_CD8<16, CD8VF>, VEX_WIG;
2261 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2262 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2263 EVEX_CD8<32, CD8VF>;
2265 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2266 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2267 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2270 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2271 PatFrag CommFrag, X86FoldableSchedWrite sched,
2272 X86VectorVTInfo _, string Name> {
2273 let isCommutable = 1 in
2274 def rri : AVX512AIi8<opc, MRMSrcReg,
2275 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2276 !strconcat("vpcmp${cc}", Suffix,
2277 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2278 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2281 EVEX_4V, Sched<[sched]>;
2282 def rmi : AVX512AIi8<opc, MRMSrcMem,
2283 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2284 !strconcat("vpcmp${cc}", Suffix,
2285 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2286 [(set _.KRC:$dst, (_.KVT
2289 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2291 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2292 let isCommutable = 1 in
2293 def rrik : AVX512AIi8<opc, MRMSrcReg,
2294 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2296 !strconcat("vpcmp${cc}", Suffix,
2297 "\t{$src2, $src1, $dst {${mask}}|",
2298 "$dst {${mask}}, $src1, $src2}"),
2299 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2300 (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2303 EVEX_4V, EVEX_K, Sched<[sched]>;
2304 def rmik : AVX512AIi8<opc, MRMSrcMem,
2305 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2307 !strconcat("vpcmp${cc}", Suffix,
2308 "\t{$src2, $src1, $dst {${mask}}|",
2309 "$dst {${mask}}, $src1, $src2}"),
2310 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2315 (_.LdFrag addr:$src2))),
2317 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2319 // Accept explicit immediate argument form instead of comparison code.
2320 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2321 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2322 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2323 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2324 "$dst, $src1, $src2, $cc}"), []>,
2325 EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2327 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2328 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2329 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2330 "$dst, $src1, $src2, $cc}"), []>,
2331 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2332 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2333 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2335 !strconcat("vpcmp", Suffix,
2336 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2337 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2338 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2340 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2341 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2343 !strconcat("vpcmp", Suffix,
2344 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2345 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2346 EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
2350 def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2351 (_.VT _.RC:$src1), cond)),
2352 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2353 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2355 def : Pat<(and _.KRCWM:$mask,
2356 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2357 (_.VT _.RC:$src1), cond))),
2358 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2359 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2360 (CommFrag.OperandTransform $cc))>;
2363 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2364 PatFrag CommFrag, X86FoldableSchedWrite sched,
2365 X86VectorVTInfo _, string Name> :
2366 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2367 def rmib : AVX512AIi8<opc, MRMSrcMem,
2368 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2370 !strconcat("vpcmp${cc}", Suffix,
2371 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2372 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2373 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2376 (_.ScalarLdFrag addr:$src2)),
2378 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2379 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2380 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2381 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2382 !strconcat("vpcmp${cc}", Suffix,
2383 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2384 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2385 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2389 (_.ScalarLdFrag addr:$src2)),
2391 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2393 // Accept explicit immediate argument form instead of comparison code.
2394 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2395 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2396 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2398 !strconcat("vpcmp", Suffix,
2399 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2400 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2401 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2403 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2404 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2405 _.ScalarMemOp:$src2, u8imm:$cc),
2406 !strconcat("vpcmp", Suffix,
2407 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2408 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2409 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2413 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2414 (_.VT _.RC:$src1), cond)),
2415 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2416 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2418 def : Pat<(and _.KRCWM:$mask,
2419 (_.KVT (CommFrag:$cc (X86VBroadcast
2420 (_.ScalarLdFrag addr:$src2)),
2421 (_.VT _.RC:$src1), cond))),
2422 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2423 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2424 (CommFrag.OperandTransform $cc))>;
2427 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2428 PatFrag CommFrag, X86SchedWriteWidths sched,
2429 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2430 let Predicates = [prd] in
2431 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2432 VTInfo.info512, NAME>, EVEX_V512;
2434 let Predicates = [prd, HasVLX] in {
2435 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2436 VTInfo.info256, NAME>, EVEX_V256;
2437 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2438 VTInfo.info128, NAME>, EVEX_V128;
2442 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2443 PatFrag CommFrag, X86SchedWriteWidths sched,
2444 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2445 let Predicates = [prd] in
2446 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2447 VTInfo.info512, NAME>, EVEX_V512;
2449 let Predicates = [prd, HasVLX] in {
2450 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2451 VTInfo.info256, NAME>, EVEX_V256;
2452 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2453 VTInfo.info128, NAME>, EVEX_V128;
2457 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2458 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2459 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2460 return getI8Imm(SSECC, SDLoc(N));
2463 // Swapped operand version of the above.
2464 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2465 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2466 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2467 SSECC = X86::getSwappedVPCMPImm(SSECC);
2468 return getI8Imm(SSECC, SDLoc(N));
2471 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2472 (setcc node:$src1, node:$src2, node:$cc), [{
2473 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2474 return !ISD::isUnsignedIntSetCC(CC);
2477 // Same as above, but commutes immediate. Use for load folding.
2478 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2479 (setcc node:$src1, node:$src2, node:$cc), [{
2480 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2481 return !ISD::isUnsignedIntSetCC(CC);
2482 }], X86pcmpm_imm_commute>;
2484 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2485 (setcc node:$src1, node:$src2, node:$cc), [{
2486 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2487 return ISD::isUnsignedIntSetCC(CC);
2490 // Same as above, but commutes immediate. Use for load folding.
2491 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2492 (setcc node:$src1, node:$src2, node:$cc), [{
2493 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2494 return ISD::isUnsignedIntSetCC(CC);
2495 }], X86pcmpm_imm_commute>;
2497 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2498 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2499 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2501 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2502 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2505 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2506 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2507 VEX_W, EVEX_CD8<16, CD8VF>;
2508 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2509 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2510 VEX_W, EVEX_CD8<16, CD8VF>;
2512 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2513 SchedWriteVecALU, avx512vl_i32_info,
2514 HasAVX512>, EVEX_CD8<32, CD8VF>;
2515 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2516 SchedWriteVecALU, avx512vl_i32_info,
2517 HasAVX512>, EVEX_CD8<32, CD8VF>;
2519 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2520 SchedWriteVecALU, avx512vl_i64_info,
2521 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2522 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2523 SchedWriteVecALU, avx512vl_i64_info,
2524 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2526 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2528 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2529 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2530 "vcmp${cc}"#_.Suffix,
2531 "$src2, $src1", "$src1, $src2",
2532 (X86cmpm (_.VT _.RC:$src1),
2537 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2538 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2539 "vcmp${cc}"#_.Suffix,
2540 "$src2, $src1", "$src1, $src2",
2541 (X86cmpm (_.VT _.RC:$src1),
2542 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2544 Sched<[sched.Folded, ReadAfterLd]>;
2546 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2548 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2549 "vcmp${cc}"#_.Suffix,
2550 "${src2}"##_.BroadcastStr##", $src1",
2551 "$src1, ${src2}"##_.BroadcastStr,
2552 (X86cmpm (_.VT _.RC:$src1),
2553 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2555 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2556 // Accept explicit immediate argument form instead of comparison code.
2557 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2558 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2560 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2562 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2563 Sched<[sched]>, NotMemoryFoldable;
2565 let mayLoad = 1 in {
2566 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2568 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2570 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2571 Sched<[sched.Folded, ReadAfterLd]>,
2574 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2576 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2578 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2579 "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2580 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2585 // Patterns for selecting with loads in other operand.
2586 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2587 CommutableCMPCC:$cc),
2588 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2591 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2593 CommutableCMPCC:$cc)),
2594 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2595 _.RC:$src1, addr:$src2,
2598 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2599 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2600 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2603 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2604 (_.ScalarLdFrag addr:$src2)),
2606 CommutableCMPCC:$cc)),
2607 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2608 _.RC:$src1, addr:$src2,
2612 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2613 // comparison code form (VCMP[EQ/LT/LE/...]
2614 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2615 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2616 "vcmp${cc}"#_.Suffix,
2617 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2618 (X86cmpmRnd (_.VT _.RC:$src1),
2621 (i32 FROUND_NO_EXC))>,
2622 EVEX_B, Sched<[sched]>;
2624 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2625 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2627 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2629 "$cc, {sae}, $src2, $src1",
2630 "$src1, $src2, {sae}, $cc">,
2631 EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2635 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2636 let Predicates = [HasAVX512] in {
2637 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2638 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2641 let Predicates = [HasAVX512,HasVLX] in {
2642 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2643 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2647 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2648 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2649 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2650 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2652 // Patterns to select fp compares with load as first operand.
2653 let Predicates = [HasAVX512] in {
2654 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2655 CommutableCMPCC:$cc)),
2656 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2658 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2659 CommutableCMPCC:$cc)),
2660 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2663 // ----------------------------------------------------------------
2665 //handle fpclass instruction mask = op(reg_scalar,imm)
2666 // op(mem_scalar,imm)
2667 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2668 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2670 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2671 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2672 (ins _.RC:$src1, i32u8imm:$src2),
2673 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2674 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2675 (i32 imm:$src2)))]>,
2677 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2678 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2679 OpcodeStr##_.Suffix#
2680 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2681 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2682 (OpNode (_.VT _.RC:$src1),
2683 (i32 imm:$src2))))]>,
2684 EVEX_K, Sched<[sched]>;
2685 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2686 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2687 OpcodeStr##_.Suffix##
2688 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690 (OpNode _.ScalarIntMemCPat:$src1,
2691 (i32 imm:$src2)))]>,
2692 Sched<[sched.Folded, ReadAfterLd]>;
2693 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2694 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2695 OpcodeStr##_.Suffix##
2696 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2697 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2698 (OpNode _.ScalarIntMemCPat:$src1,
2699 (i32 imm:$src2))))]>,
2700 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2704 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2705 // fpclass(reg_vec, mem_vec, imm)
2706 // fpclass(reg_vec, broadcast(eltVt), imm)
2707 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2708 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2709 string mem, string broadcast>{
2710 let ExeDomain = _.ExeDomain in {
2711 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2712 (ins _.RC:$src1, i32u8imm:$src2),
2713 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2714 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2715 (i32 imm:$src2)))]>,
2717 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2718 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2719 OpcodeStr##_.Suffix#
2720 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2721 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2722 (OpNode (_.VT _.RC:$src1),
2723 (i32 imm:$src2))))]>,
2724 EVEX_K, Sched<[sched]>;
2725 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2726 (ins _.MemOp:$src1, i32u8imm:$src2),
2727 OpcodeStr##_.Suffix##mem#
2728 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2729 [(set _.KRC:$dst,(OpNode
2730 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2731 (i32 imm:$src2)))]>,
2732 Sched<[sched.Folded, ReadAfterLd]>;
2733 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2734 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2735 OpcodeStr##_.Suffix##mem#
2736 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2737 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2738 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2739 (i32 imm:$src2))))]>,
2740 EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2741 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2742 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2743 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2744 _.BroadcastStr##", $dst|$dst, ${src1}"
2745 ##_.BroadcastStr##", $src2}",
2746 [(set _.KRC:$dst,(OpNode
2747 (_.VT (X86VBroadcast
2748 (_.ScalarLdFrag addr:$src1))),
2749 (i32 imm:$src2)))]>,
2750 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2751 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2752 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2753 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2754 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2755 _.BroadcastStr##", $src2}",
2756 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2757 (_.VT (X86VBroadcast
2758 (_.ScalarLdFrag addr:$src1))),
2759 (i32 imm:$src2))))]>,
2760 EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2764 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2765 bits<8> opc, SDNode OpNode,
2766 X86SchedWriteWidths sched, Predicate prd,
2768 let Predicates = [prd] in {
2769 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2770 _.info512, "{z}", broadcast>, EVEX_V512;
2772 let Predicates = [prd, HasVLX] in {
2773 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2774 _.info128, "{x}", broadcast>, EVEX_V128;
2775 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2776 _.info256, "{y}", broadcast>, EVEX_V256;
2780 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2781 bits<8> opcScalar, SDNode VecOpNode,
2782 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2784 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2785 VecOpNode, sched, prd, "{l}">,
2786 EVEX_CD8<32, CD8VF>;
2787 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2788 VecOpNode, sched, prd, "{q}">,
2789 EVEX_CD8<64, CD8VF> , VEX_W;
2790 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2791 sched.Scl, f32x_info, prd>,
2792 EVEX_CD8<32, CD8VT1>;
2793 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2794 sched.Scl, f64x_info, prd>,
2795 EVEX_CD8<64, CD8VT1>, VEX_W;
2798 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2799 X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2800 AVX512AIi8Base, EVEX;
2802 //-----------------------------------------------------------------
2803 // Mask register copy, including
2804 // - copy between mask registers
2805 // - load/store mask registers
2806 // - copy from GPR to mask register and vice versa
2808 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2809 string OpcodeStr, RegisterClass KRC,
2810 ValueType vvt, X86MemOperand x86memop> {
2811 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2812 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2813 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2815 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2816 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2817 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2819 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2820 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2821 [(store KRC:$src, addr:$dst)]>,
2822 Sched<[WriteStore]>;
2825 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2827 RegisterClass KRC, RegisterClass GRC> {
2828 let hasSideEffects = 0 in {
2829 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2830 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2832 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2833 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2838 let Predicates = [HasDQI] in
2839 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2840 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2843 let Predicates = [HasAVX512] in
2844 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2845 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2848 let Predicates = [HasBWI] in {
2849 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2851 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2853 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2855 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2859 // GR from/to mask register
2860 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2861 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2862 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2863 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2865 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2866 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2867 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2868 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2870 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2871 (KMOVWrk VK16:$src)>;
2872 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2873 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2875 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2876 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2877 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2878 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2880 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2881 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2882 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2883 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2884 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2885 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2886 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2887 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2890 let Predicates = [HasDQI] in {
2891 def : Pat<(store VK1:$src, addr:$dst),
2892 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2894 def : Pat<(v1i1 (load addr:$src)),
2895 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2896 def : Pat<(v2i1 (load addr:$src)),
2897 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2898 def : Pat<(v4i1 (load addr:$src)),
2899 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2902 let Predicates = [HasAVX512] in {
2903 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2904 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2907 let Predicates = [HasAVX512] in {
2908 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2909 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2910 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2912 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2913 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2916 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2917 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2918 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2919 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2920 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2921 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2922 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2924 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2925 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2928 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2932 // Mask unary operation
2934 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2935 RegisterClass KRC, SDPatternOperator OpNode,
2936 X86FoldableSchedWrite sched, Predicate prd> {
2937 let Predicates = [prd] in
2938 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2939 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2940 [(set KRC:$dst, (OpNode KRC:$src))]>,
2944 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2945 SDPatternOperator OpNode,
2946 X86FoldableSchedWrite sched> {
2947 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2948 sched, HasDQI>, VEX, PD;
2949 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2950 sched, HasAVX512>, VEX, PS;
2951 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2952 sched, HasBWI>, VEX, PD, VEX_W;
2953 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2954 sched, HasBWI>, VEX, PS, VEX_W;
2957 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2958 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2960 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2961 let Predicates = [HasAVX512, NoDQI] in
2962 def : Pat<(vnot VK8:$src),
2963 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2965 def : Pat<(vnot VK4:$src),
2966 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2967 def : Pat<(vnot VK2:$src),
2968 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2970 // Mask binary operation
2971 // - KAND, KANDN, KOR, KXNOR, KXOR
2972 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2973 RegisterClass KRC, SDPatternOperator OpNode,
2974 X86FoldableSchedWrite sched, Predicate prd,
2976 let Predicates = [prd], isCommutable = IsCommutable in
2977 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2978 !strconcat(OpcodeStr,
2979 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2980 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2984 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2985 SDPatternOperator OpNode,
2986 X86FoldableSchedWrite sched, bit IsCommutable,
2987 Predicate prdW = HasAVX512> {
2988 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2989 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2990 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2991 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2992 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2993 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2994 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2995 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2998 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2999 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3000 // These nodes use 'vnot' instead of 'not' to support vectors.
3001 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3002 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3004 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3005 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3006 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3007 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3008 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3009 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3010 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3012 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3014 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3015 // for the DQI set, this type is legal and KxxxB instruction is used
3016 let Predicates = [NoDQI] in
3017 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3019 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3020 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3022 // All types smaller than 8 bits require conversion anyway
3023 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3024 (COPY_TO_REGCLASS (Inst
3025 (COPY_TO_REGCLASS VK1:$src1, VK16),
3026 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3027 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3028 (COPY_TO_REGCLASS (Inst
3029 (COPY_TO_REGCLASS VK2:$src1, VK16),
3030 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3031 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3032 (COPY_TO_REGCLASS (Inst
3033 (COPY_TO_REGCLASS VK4:$src1, VK16),
3034 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3037 defm : avx512_binop_pat<and, and, KANDWrr>;
3038 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3039 defm : avx512_binop_pat<or, or, KORWrr>;
3040 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3041 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3044 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3045 RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3047 let Predicates = [prd] in {
3048 let hasSideEffects = 0 in
3049 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3050 (ins KRC:$src1, KRC:$src2),
3051 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3052 VEX_4V, VEX_L, Sched<[sched]>;
3054 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3055 (!cast<Instruction>(NAME##rr)
3056 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3057 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3061 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3062 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3063 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3066 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3067 SDNode OpNode, X86FoldableSchedWrite sched,
3069 let Predicates = [prd], Defs = [EFLAGS] in
3070 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3071 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3072 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3076 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3077 X86FoldableSchedWrite sched,
3078 Predicate prdW = HasAVX512> {
3079 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3081 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3083 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3085 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3089 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3090 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3091 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3094 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3095 SDNode OpNode, X86FoldableSchedWrite sched> {
3096 let Predicates = [HasAVX512] in
3097 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3098 !strconcat(OpcodeStr,
3099 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3100 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3104 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3105 SDNode OpNode, X86FoldableSchedWrite sched> {
3106 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3107 sched>, VEX, TAPD, VEX_W;
3108 let Predicates = [HasDQI] in
3109 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3111 let Predicates = [HasBWI] in {
3112 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3113 sched>, VEX, TAPD, VEX_W;
3114 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3119 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3120 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3122 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3123 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3124 X86VectorVTInfo Narrow,
3125 X86VectorVTInfo Wide> {
3126 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3127 (Narrow.VT Narrow.RC:$src2))),
3129 (!cast<Instruction>(InstStr#"Zrr")
3130 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3131 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3134 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3135 (Frag (Narrow.VT Narrow.RC:$src1),
3136 (Narrow.VT Narrow.RC:$src2)))),
3138 (!cast<Instruction>(InstStr#"Zrrk")
3139 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3140 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3141 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3145 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3146 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3148 X86VectorVTInfo Narrow,
3149 X86VectorVTInfo Wide> {
3150 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3151 (Narrow.VT Narrow.RC:$src2), cond)),
3153 (!cast<Instruction>(InstStr##Zrri)
3154 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3155 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3156 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3158 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3159 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3160 (Narrow.VT Narrow.RC:$src2),
3162 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3163 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3164 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3165 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3166 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3169 // Same as above, but for fp types which don't use PatFrags.
3170 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3171 X86VectorVTInfo Narrow,
3172 X86VectorVTInfo Wide> {
3173 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3174 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3176 (!cast<Instruction>(InstStr##Zrri)
3177 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3178 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3179 imm:$cc), Narrow.KRC)>;
3181 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3182 (OpNode (Narrow.VT Narrow.RC:$src1),
3183 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3184 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3185 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3186 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3187 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3188 imm:$cc), Narrow.KRC)>;
3191 let Predicates = [HasAVX512, NoVLX] in {
3192 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3193 // increase the pattern complexity the way an immediate would.
3194 let AddedComplexity = 2 in {
3195 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3196 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3198 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3199 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3201 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3202 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3204 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3205 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3208 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3209 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3211 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3212 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3214 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3215 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3217 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3218 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3220 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3221 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3222 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3223 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3226 let Predicates = [HasBWI, NoVLX] in {
3227 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3228 // increase the pattern complexity the way an immediate would.
3229 let AddedComplexity = 2 in {
3230 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3231 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3233 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3234 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3236 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3237 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3239 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3240 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3243 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3244 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3246 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3247 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3249 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3250 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3252 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3253 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3256 // Mask setting all 0s or 1s
3257 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3258 let Predicates = [HasAVX512] in
3259 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3260 SchedRW = [WriteZero] in
3261 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3262 [(set KRC:$dst, (VT Val))]>;
3265 multiclass avx512_mask_setop_w<PatFrag Val> {
3266 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3267 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3268 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3271 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3272 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3274 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3275 let Predicates = [HasAVX512] in {
3276 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3277 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3278 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3279 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3280 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3281 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3282 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3283 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3286 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3287 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3288 RegisterClass RC, ValueType VT> {
3289 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3290 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3292 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3293 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3295 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3296 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3297 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3298 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3299 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3300 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3302 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3303 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3304 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3305 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3306 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3308 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3309 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3310 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3311 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3313 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3314 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3315 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3317 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3318 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3320 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3322 //===----------------------------------------------------------------------===//
3323 // AVX-512 - Aligned and unaligned load and store
3326 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3327 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3328 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3329 bit NoRMPattern = 0,
3330 SDPatternOperator SelectOprr = vselect> {
3331 let hasSideEffects = 0 in {
3332 let isMoveReg = 1 in
3333 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3334 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3335 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3336 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3337 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3338 (ins _.KRCWM:$mask, _.RC:$src),
3339 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3340 "${dst} {${mask}} {z}, $src}"),
3341 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3343 _.ImmAllZerosV)))], _.ExeDomain>,
3344 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3346 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3347 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3348 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3349 !if(NoRMPattern, [],
3351 (_.VT (bitconvert (ld_frag addr:$src))))]),
3352 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3353 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3355 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3356 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3357 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3358 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3359 "${dst} {${mask}}, $src1}"),
3360 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3362 (_.VT _.RC:$src0))))], _.ExeDomain>,
3363 EVEX, EVEX_K, Sched<[Sched.RR]>;
3364 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3365 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3366 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3367 "${dst} {${mask}}, $src1}"),
3368 [(set _.RC:$dst, (_.VT
3369 (vselect _.KRCWM:$mask,
3370 (_.VT (bitconvert (ld_frag addr:$src1))),
3371 (_.VT _.RC:$src0))))], _.ExeDomain>,
3372 EVEX, EVEX_K, Sched<[Sched.RM]>;
3374 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3375 (ins _.KRCWM:$mask, _.MemOp:$src),
3376 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3377 "${dst} {${mask}} {z}, $src}",
3378 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3379 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3380 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3382 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3383 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3385 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3386 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3388 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3389 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3390 _.KRCWM:$mask, addr:$ptr)>;
3393 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3394 AVX512VLVectorVTInfo _, Predicate prd,
3395 X86SchedWriteMoveLSWidths Sched,
3396 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3397 let Predicates = [prd] in
3398 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3399 _.info512.AlignedLdFrag, masked_load_aligned512,
3400 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3402 let Predicates = [prd, HasVLX] in {
3403 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3404 _.info256.AlignedLdFrag, masked_load_aligned256,
3405 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3406 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3407 _.info128.AlignedLdFrag, masked_load_aligned128,
3408 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3412 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3413 AVX512VLVectorVTInfo _, Predicate prd,
3414 X86SchedWriteMoveLSWidths Sched,
3415 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3416 SDPatternOperator SelectOprr = vselect> {
3417 let Predicates = [prd] in
3418 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3419 masked_load_unaligned, Sched.ZMM, "",
3420 NoRMPattern, SelectOprr>, EVEX_V512;
3422 let Predicates = [prd, HasVLX] in {
3423 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3424 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3425 NoRMPattern, SelectOprr>, EVEX_V256;
3426 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3427 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3428 NoRMPattern, SelectOprr>, EVEX_V128;
3432 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3433 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3434 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3435 bit NoMRPattern = 0> {
3436 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3437 let isMoveReg = 1 in
3438 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3439 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3440 [], _.ExeDomain>, EVEX,
3441 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3442 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3443 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3444 (ins _.KRCWM:$mask, _.RC:$src),
3445 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3446 "${dst} {${mask}}, $src}",
3447 [], _.ExeDomain>, EVEX, EVEX_K,
3448 FoldGenData<BaseName#_.ZSuffix#rrk>,
3450 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3451 (ins _.KRCWM:$mask, _.RC:$src),
3452 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3453 "${dst} {${mask}} {z}, $src}",
3454 [], _.ExeDomain>, EVEX, EVEX_KZ,
3455 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3459 let hasSideEffects = 0, mayStore = 1 in
3460 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3461 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3462 !if(NoMRPattern, [],
3463 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3464 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3465 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3466 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3467 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3468 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3469 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3472 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3473 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3474 _.KRCWM:$mask, _.RC:$src)>;
3476 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3477 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3478 _.RC:$dst, _.RC:$src), 0>;
3479 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3480 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3481 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3482 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3483 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3484 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3487 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3488 AVX512VLVectorVTInfo _, Predicate prd,
3489 X86SchedWriteMoveLSWidths Sched,
3490 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3491 let Predicates = [prd] in
3492 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3493 masked_store_unaligned, Sched.ZMM, "",
3494 NoMRPattern>, EVEX_V512;
3495 let Predicates = [prd, HasVLX] in {
3496 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3497 masked_store_unaligned, Sched.YMM,
3498 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3499 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3500 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3501 NoMRPattern>, EVEX_V128;
3505 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3506 AVX512VLVectorVTInfo _, Predicate prd,
3507 X86SchedWriteMoveLSWidths Sched,
3508 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3509 let Predicates = [prd] in
3510 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3511 masked_store_aligned512, Sched.ZMM, "",
3512 NoMRPattern>, EVEX_V512;
3514 let Predicates = [prd, HasVLX] in {
3515 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3516 masked_store_aligned256, Sched.YMM,
3517 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3518 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3519 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3520 NoMRPattern>, EVEX_V128;
3524 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3525 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3526 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3527 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3528 PS, EVEX_CD8<32, CD8VF>;
3530 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3531 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3532 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3533 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3534 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3536 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3537 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3538 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3539 SchedWriteFMoveLS, "VMOVUPS">,
3540 PS, EVEX_CD8<32, CD8VF>;
3542 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3543 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3544 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3545 SchedWriteFMoveLS, "VMOVUPD">,
3546 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3548 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3549 HasAVX512, SchedWriteVecMoveLS,
3551 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3552 HasAVX512, SchedWriteVecMoveLS,
3554 PD, EVEX_CD8<32, CD8VF>;
3556 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3557 HasAVX512, SchedWriteVecMoveLS,
3559 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3560 HasAVX512, SchedWriteVecMoveLS,
3562 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3564 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3565 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3566 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3567 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3568 XD, EVEX_CD8<8, CD8VF>;
3570 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3571 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3572 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3573 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3574 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3576 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3577 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3578 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3579 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3580 XS, EVEX_CD8<32, CD8VF>;
3582 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3583 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3584 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3585 SchedWriteVecMoveLS, "VMOVDQU">,
3586 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3588 // Special instructions to help with spilling when we don't have VLX. We need
3589 // to load or store from a ZMM register instead. These are converted in
3590 // expandPostRAPseudos.
3591 let isReMaterializable = 1, canFoldAsLoad = 1,
3592 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3593 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3594 "", []>, Sched<[WriteFLoadX]>;
3595 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3596 "", []>, Sched<[WriteFLoadY]>;
3597 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3598 "", []>, Sched<[WriteFLoadX]>;
3599 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3600 "", []>, Sched<[WriteFLoadY]>;
3603 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3604 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3605 "", []>, Sched<[WriteFStoreX]>;
3606 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3607 "", []>, Sched<[WriteFStoreY]>;
3608 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3609 "", []>, Sched<[WriteFStoreX]>;
3610 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3611 "", []>, Sched<[WriteFStoreY]>;
3614 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3615 (v8i64 VR512:$src))),
3616 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3619 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3620 (v16i32 VR512:$src))),
3621 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3623 // These patterns exist to prevent the above patterns from introducing a second
3624 // mask inversion when one already exists.
3625 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3626 (bc_v8i64 (v16i32 immAllZerosV)),
3627 (v8i64 VR512:$src))),
3628 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3629 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3630 (v16i32 immAllZerosV),
3631 (v16i32 VR512:$src))),
3632 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3634 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3635 X86VectorVTInfo Wide> {
3636 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3637 Narrow.RC:$src1, Narrow.RC:$src0)),
3640 (!cast<Instruction>(InstrStr#"rrk")
3641 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3642 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3643 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3646 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3647 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3650 (!cast<Instruction>(InstrStr#"rrkz")
3651 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3652 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3656 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3657 // available. Use a 512-bit operation and extract.
3658 let Predicates = [HasAVX512, NoVLX] in {
3659 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3660 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3661 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3662 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3664 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3665 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3666 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3667 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3670 let Predicates = [HasBWI, NoVLX] in {
3671 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3672 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3674 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3675 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3678 let Predicates = [HasAVX512] in {
3680 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3681 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3682 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3683 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3684 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3685 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3686 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3687 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3688 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3689 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3690 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3691 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3694 let Predicates = [HasVLX] in {
3696 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3697 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3698 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3699 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3700 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3701 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3702 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3703 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3704 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3705 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3706 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3707 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3710 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3711 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3712 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3713 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3714 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3715 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3716 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3717 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3718 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3719 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3720 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3721 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3724 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3725 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3726 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3728 (To.VT (extract_subvector
3729 (From.VT From.RC:$src), (iPTR 0)))),
3731 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3732 Cast.RC:$src0, Cast.KRCWM:$mask,
3733 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3735 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3737 (To.VT (extract_subvector
3738 (From.VT From.RC:$src), (iPTR 0)))),
3739 Cast.ImmAllZerosV)),
3740 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3742 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3746 let Predicates = [HasVLX] in {
3747 // A masked extract from the first 128-bits of a 256-bit vector can be
3748 // implemented with masked move.
3749 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3750 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3751 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3752 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3753 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3754 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3755 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3756 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3757 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3758 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3759 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3760 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3762 // A masked extract from the first 128-bits of a 512-bit vector can be
3763 // implemented with masked move.
3764 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3765 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3766 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3767 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3768 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3769 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3770 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3771 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3772 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3773 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3774 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3775 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3777 // A masked extract from the first 256-bits of a 512-bit vector can be
3778 // implemented with masked move.
3779 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3780 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3781 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3782 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3783 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3784 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3785 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3786 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3787 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3788 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3789 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3790 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3793 // Move Int Doubleword to Packed Double Int
3795 let ExeDomain = SSEPackedInt in {
3796 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3797 "vmovd\t{$src, $dst|$dst, $src}",
3799 (v4i32 (scalar_to_vector GR32:$src)))]>,
3800 EVEX, Sched<[WriteVecMoveFromGpr]>;
3801 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3802 "vmovd\t{$src, $dst|$dst, $src}",
3804 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3805 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3806 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3807 "vmovq\t{$src, $dst|$dst, $src}",
3809 (v2i64 (scalar_to_vector GR64:$src)))]>,
3810 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3811 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3812 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3814 "vmovq\t{$src, $dst|$dst, $src}", []>,
3815 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3816 let isCodeGenOnly = 1 in {
3817 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3818 "vmovq\t{$src, $dst|$dst, $src}",
3819 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3820 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3821 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3822 "vmovq\t{$src, $dst|$dst, $src}",
3823 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3824 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3825 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3826 "vmovq\t{$src, $dst|$dst, $src}",
3827 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3828 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3829 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3830 "vmovq\t{$src, $dst|$dst, $src}",
3831 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3832 EVEX, VEX_W, Sched<[WriteVecStore]>,
3833 EVEX_CD8<64, CD8VT1>;
3835 } // ExeDomain = SSEPackedInt
3837 // Move Int Doubleword to Single Scalar
3839 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3840 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3841 "vmovd\t{$src, $dst|$dst, $src}",
3842 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3843 EVEX, Sched<[WriteVecMoveFromGpr]>;
3845 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3846 "vmovd\t{$src, $dst|$dst, $src}",
3847 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3848 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3849 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3851 // Move doubleword from xmm register to r/m32
3853 let ExeDomain = SSEPackedInt in {
3854 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3855 "vmovd\t{$src, $dst|$dst, $src}",
3856 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3858 EVEX, Sched<[WriteVecMoveToGpr]>;
3859 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3860 (ins i32mem:$dst, VR128X:$src),
3861 "vmovd\t{$src, $dst|$dst, $src}",
3862 [(store (i32 (extractelt (v4i32 VR128X:$src),
3863 (iPTR 0))), addr:$dst)]>,
3864 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3865 } // ExeDomain = SSEPackedInt
3867 // Move quadword from xmm1 register to r/m64
3869 let ExeDomain = SSEPackedInt in {
3870 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3871 "vmovq\t{$src, $dst|$dst, $src}",
3872 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3874 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3875 Requires<[HasAVX512]>;
3877 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3878 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3879 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3880 EVEX, VEX_W, Sched<[WriteVecStore]>,
3881 Requires<[HasAVX512, In64BitMode]>;
3883 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3884 (ins i64mem:$dst, VR128X:$src),
3885 "vmovq\t{$src, $dst|$dst, $src}",
3886 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3888 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3889 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3891 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3892 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3894 "vmovq\t{$src, $dst|$dst, $src}", []>,
3895 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3896 } // ExeDomain = SSEPackedInt
3898 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3899 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3901 // Move Scalar Single to Double Int
3903 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3904 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3906 "vmovd\t{$src, $dst|$dst, $src}",
3907 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3908 EVEX, Sched<[WriteVecMoveToGpr]>;
3909 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3910 (ins i32mem:$dst, FR32X:$src),
3911 "vmovd\t{$src, $dst|$dst, $src}",
3912 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3913 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3914 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3916 // Move Quadword Int to Packed Quadword Int
3918 let ExeDomain = SSEPackedInt in {
3919 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3921 "vmovq\t{$src, $dst|$dst, $src}",
3923 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3924 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3925 } // ExeDomain = SSEPackedInt
3927 // Allow "vmovd" but print "vmovq".
3928 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3929 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3930 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3931 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3933 //===----------------------------------------------------------------------===//
3934 // AVX-512 MOVSS, MOVSD
3935 //===----------------------------------------------------------------------===//
3937 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3938 X86VectorVTInfo _> {
3939 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3940 (ins _.RC:$src1, _.RC:$src2),
3941 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3942 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3943 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3944 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3945 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3946 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3947 "$dst {${mask}} {z}, $src1, $src2}"),
3948 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3949 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3951 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3952 let Constraints = "$src0 = $dst" in
3953 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3954 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3955 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3956 "$dst {${mask}}, $src1, $src2}"),
3957 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3958 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3959 (_.VT _.RC:$src0))))],
3960 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3961 let canFoldAsLoad = 1, isReMaterializable = 1 in
3962 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3963 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3964 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3965 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3966 let mayLoad = 1, hasSideEffects = 0 in {
3967 let Constraints = "$src0 = $dst" in
3968 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3969 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3970 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3971 "$dst {${mask}}, $src}"),
3972 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3973 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3974 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3975 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3976 "$dst {${mask}} {z}, $src}"),
3977 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3979 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3980 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3981 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3982 EVEX, Sched<[WriteFStore]>;
3983 let mayStore = 1, hasSideEffects = 0 in
3984 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3985 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3986 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3987 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3991 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3992 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3994 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3995 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3998 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3999 PatLeaf ZeroFP, X86VectorVTInfo _> {
4001 def : Pat<(_.VT (OpNode _.RC:$src0,
4002 (_.VT (scalar_to_vector
4003 (_.EltVT (X86selects VK1WM:$mask,
4004 (_.EltVT _.FRC:$src1),
4005 (_.EltVT _.FRC:$src2))))))),
4006 (!cast<Instruction>(InstrStr#rrk)
4007 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
4010 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
4012 def : Pat<(_.VT (OpNode _.RC:$src0,
4013 (_.VT (scalar_to_vector
4014 (_.EltVT (X86selects VK1WM:$mask,
4015 (_.EltVT _.FRC:$src1),
4016 (_.EltVT ZeroFP))))))),
4017 (!cast<Instruction>(InstrStr#rrkz)
4020 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
4023 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4024 dag Mask, RegisterClass MaskRC> {
4026 def : Pat<(masked_store addr:$dst, Mask,
4027 (_.info512.VT (insert_subvector undef,
4028 (_.info128.VT _.info128.RC:$src),
4030 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4031 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4032 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4036 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4037 AVX512VLVectorVTInfo _,
4038 dag Mask, RegisterClass MaskRC,
4039 SubRegIndex subreg> {
4041 def : Pat<(masked_store addr:$dst, Mask,
4042 (_.info512.VT (insert_subvector undef,
4043 (_.info128.VT _.info128.RC:$src),
4045 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4046 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4047 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4051 // This matches the more recent codegen from clang that avoids emitting a 512
4052 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4053 // bits on AVX512F only targets.
4054 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4055 AVX512VLVectorVTInfo _,
4056 dag Mask512, dag Mask128,
4057 RegisterClass MaskRC,
4058 SubRegIndex subreg> {
4061 def : Pat<(masked_store addr:$dst, Mask512,
4062 (_.info512.VT (insert_subvector undef,
4063 (_.info128.VT _.info128.RC:$src),
4065 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4066 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4067 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4069 // AVX512VL pattern.
4070 def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4076 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4077 dag Mask, RegisterClass MaskRC> {
4079 def : Pat<(_.info128.VT (extract_subvector
4080 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4081 (_.info512.VT (bitconvert
4082 (v16i32 immAllZerosV))))),
4084 (!cast<Instruction>(InstrStr#rmkz)
4085 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4088 def : Pat<(_.info128.VT (extract_subvector
4089 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4090 (_.info512.VT (insert_subvector undef,
4091 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4094 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4095 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4100 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4101 AVX512VLVectorVTInfo _,
4102 dag Mask, RegisterClass MaskRC,
4103 SubRegIndex subreg> {
4105 def : Pat<(_.info128.VT (extract_subvector
4106 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4107 (_.info512.VT (bitconvert
4108 (v16i32 immAllZerosV))))),
4110 (!cast<Instruction>(InstrStr#rmkz)
4111 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4114 def : Pat<(_.info128.VT (extract_subvector
4115 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4116 (_.info512.VT (insert_subvector undef,
4117 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4120 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4121 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4126 // This matches the more recent codegen from clang that avoids emitting a 512
4127 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4128 // bits on AVX512F only targets.
4129 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4130 AVX512VLVectorVTInfo _,
4131 dag Mask512, dag Mask128,
4132 RegisterClass MaskRC,
4133 SubRegIndex subreg> {
4134 // AVX512F patterns.
4135 def : Pat<(_.info128.VT (extract_subvector
4136 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4137 (_.info512.VT (bitconvert
4138 (v16i32 immAllZerosV))))),
4140 (!cast<Instruction>(InstrStr#rmkz)
4141 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4144 def : Pat<(_.info128.VT (extract_subvector
4145 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4146 (_.info512.VT (insert_subvector undef,
4147 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4150 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4151 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4154 // AVX512Vl patterns.
4155 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4156 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4157 (!cast<Instruction>(InstrStr#rmkz)
4158 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4161 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4162 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4163 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4164 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4168 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4169 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4171 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4172 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4173 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4174 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4175 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4176 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4178 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4179 (v16i1 (insert_subvector
4180 (v16i1 immAllZerosV),
4181 (v4i1 (extract_subvector
4182 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4185 (v4i1 (extract_subvector
4186 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4187 (iPTR 0))), GR8, sub_8bit>;
4188 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4193 (v16i1 immAllZerosV),
4194 (v2i1 (extract_subvector
4195 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4199 (v2i1 (extract_subvector
4200 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4201 (iPTR 0))), GR8, sub_8bit>;
4203 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4204 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4205 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4206 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4207 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4208 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4210 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4211 (v16i1 (insert_subvector
4212 (v16i1 immAllZerosV),
4213 (v4i1 (extract_subvector
4214 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4217 (v4i1 (extract_subvector
4218 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4219 (iPTR 0))), GR8, sub_8bit>;
4220 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4225 (v16i1 immAllZerosV),
4226 (v2i1 (extract_subvector
4227 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4231 (v2i1 (extract_subvector
4232 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4233 (iPTR 0))), GR8, sub_8bit>;
4235 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4236 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
4237 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4238 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
4240 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4241 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
4242 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4243 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
4245 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4246 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4247 (ins VR128X:$src1, VR128X:$src2),
4248 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4249 []>, XS, EVEX_4V, VEX_LIG,
4250 FoldGenData<"VMOVSSZrr">,
4251 Sched<[SchedWriteFShuffle.XMM]>;
4253 let Constraints = "$src0 = $dst" in
4254 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4255 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4256 VR128X:$src1, VR128X:$src2),
4257 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4258 "$dst {${mask}}, $src1, $src2}",
4259 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4260 FoldGenData<"VMOVSSZrrk">,
4261 Sched<[SchedWriteFShuffle.XMM]>;
4263 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4264 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4265 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4266 "$dst {${mask}} {z}, $src1, $src2}",
4267 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4268 FoldGenData<"VMOVSSZrrkz">,
4269 Sched<[SchedWriteFShuffle.XMM]>;
4271 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4272 (ins VR128X:$src1, VR128X:$src2),
4273 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4274 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4275 FoldGenData<"VMOVSDZrr">,
4276 Sched<[SchedWriteFShuffle.XMM]>;
4278 let Constraints = "$src0 = $dst" in
4279 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4280 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4281 VR128X:$src1, VR128X:$src2),
4282 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4283 "$dst {${mask}}, $src1, $src2}",
4284 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4285 VEX_W, FoldGenData<"VMOVSDZrrk">,
4286 Sched<[SchedWriteFShuffle.XMM]>;
4288 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4289 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4291 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4292 "$dst {${mask}} {z}, $src1, $src2}",
4293 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4294 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4295 Sched<[SchedWriteFShuffle.XMM]>;
4298 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4299 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4300 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4301 "$dst {${mask}}, $src1, $src2}",
4302 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4303 VR128X:$src1, VR128X:$src2), 0>;
4304 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4305 "$dst {${mask}} {z}, $src1, $src2}",
4306 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4307 VR128X:$src1, VR128X:$src2), 0>;
4308 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4309 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4310 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4311 "$dst {${mask}}, $src1, $src2}",
4312 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4313 VR128X:$src1, VR128X:$src2), 0>;
4314 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4315 "$dst {${mask}} {z}, $src1, $src2}",
4316 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4317 VR128X:$src1, VR128X:$src2), 0>;
4319 let Predicates = [HasAVX512] in {
4320 let AddedComplexity = 15 in {
4321 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4322 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4323 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4324 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4327 // Move low f32 and clear high bits.
4328 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4329 (SUBREG_TO_REG (i32 0),
4330 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4331 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4332 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4333 (SUBREG_TO_REG (i32 0),
4334 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4335 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
4336 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4337 (SUBREG_TO_REG (i32 0),
4338 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4339 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4340 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4341 (SUBREG_TO_REG (i32 0),
4342 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4343 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
4345 let AddedComplexity = 20 in {
4346 // MOVSSrm zeros the high parts of the register; represent this
4347 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4348 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4349 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4350 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4351 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4352 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4353 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4354 def : Pat<(v4f32 (X86vzload addr:$src)),
4355 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4357 // MOVSDrm zeros the high parts of the register; represent this
4358 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4359 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4360 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4361 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4362 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4363 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4364 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4365 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4366 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4367 def : Pat<(v2f64 (X86vzload addr:$src)),
4368 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4370 // Represent the same patterns above but in the form they appear for
4372 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4373 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4374 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4375 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4376 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4377 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4378 def : Pat<(v8f32 (X86vzload addr:$src)),
4379 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4380 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4381 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4382 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4383 def : Pat<(v4f64 (X86vzload addr:$src)),
4384 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4386 // Represent the same patterns above but in the form they appear for
4388 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4389 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4390 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4391 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4392 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4393 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4394 def : Pat<(v16f32 (X86vzload addr:$src)),
4395 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4396 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4397 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4398 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4399 def : Pat<(v8f64 (X86vzload addr:$src)),
4400 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4402 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4403 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4404 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4406 // Move low f64 and clear high bits.
4407 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4408 (SUBREG_TO_REG (i32 0),
4409 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4410 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
4411 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4412 (SUBREG_TO_REG (i32 0),
4413 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4414 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
4416 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4417 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4418 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
4419 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4420 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4421 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
4423 // Extract and store.
4424 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4426 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4428 // Shuffle with VMOVSS
4429 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4430 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4432 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4433 (VMOVSSZrr VR128X:$src1,
4434 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4436 // Shuffle with VMOVSD
4437 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4438 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4440 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4441 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4444 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4445 let AddedComplexity = 15 in
4446 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4448 "vmovq\t{$src, $dst|$dst, $src}",
4449 [(set VR128X:$dst, (v2i64 (X86vzmovl
4450 (v2i64 VR128X:$src))))]>,
4454 let Predicates = [HasAVX512] in {
4455 let AddedComplexity = 15 in {
4456 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4457 (VMOVDI2PDIZrr GR32:$src)>;
4459 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4460 (VMOV64toPQIZrr GR64:$src)>;
4462 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4463 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4464 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4466 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4467 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4468 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4470 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4471 let AddedComplexity = 20 in {
4472 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4473 (VMOVDI2PDIZrm addr:$src)>;
4474 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4475 (VMOVDI2PDIZrm addr:$src)>;
4476 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4477 (VMOVDI2PDIZrm addr:$src)>;
4478 def : Pat<(v4i32 (X86vzload addr:$src)),
4479 (VMOVDI2PDIZrm addr:$src)>;
4480 def : Pat<(v8i32 (X86vzload addr:$src)),
4481 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4482 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4483 (VMOVQI2PQIZrm addr:$src)>;
4484 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4485 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4486 def : Pat<(v2i64 (X86vzload addr:$src)),
4487 (VMOVQI2PQIZrm addr:$src)>;
4488 def : Pat<(v4i64 (X86vzload addr:$src)),
4489 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4492 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4493 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4494 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4495 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4496 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4497 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4498 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4500 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4501 def : Pat<(v16i32 (X86vzload addr:$src)),
4502 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4503 def : Pat<(v8i64 (X86vzload addr:$src)),
4504 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4507 //===----------------------------------------------------------------------===//
4508 // AVX-512 - Non-temporals
4509 //===----------------------------------------------------------------------===//
4511 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4512 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4513 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4514 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4516 let Predicates = [HasVLX] in {
4517 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4519 "vmovntdqa\t{$src, $dst|$dst, $src}",
4520 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4521 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4523 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4525 "vmovntdqa\t{$src, $dst|$dst, $src}",
4526 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4527 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4530 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4531 X86SchedWriteMoveLS Sched,
4532 PatFrag st_frag = alignednontemporalstore> {
4533 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4534 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4535 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4536 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4537 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4540 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4541 AVX512VLVectorVTInfo VTInfo,
4542 X86SchedWriteMoveLSWidths Sched> {
4543 let Predicates = [HasAVX512] in
4544 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4546 let Predicates = [HasAVX512, HasVLX] in {
4547 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4548 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4552 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4553 SchedWriteVecMoveLSNT>, PD;
4554 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4555 SchedWriteFMoveLSNT>, PD, VEX_W;
4556 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4557 SchedWriteFMoveLSNT>, PS;
4559 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4560 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4561 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4562 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4563 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4564 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4565 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4567 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4568 (VMOVNTDQAZrm addr:$src)>;
4569 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4570 (VMOVNTDQAZrm addr:$src)>;
4571 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4572 (VMOVNTDQAZrm addr:$src)>;
4575 let Predicates = [HasVLX], AddedComplexity = 400 in {
4576 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4577 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4578 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4579 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4580 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4581 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4583 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4584 (VMOVNTDQAZ256rm addr:$src)>;
4585 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4586 (VMOVNTDQAZ256rm addr:$src)>;
4587 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4588 (VMOVNTDQAZ256rm addr:$src)>;
4590 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4591 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4592 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4593 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4594 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4595 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4597 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4598 (VMOVNTDQAZ128rm addr:$src)>;
4599 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4600 (VMOVNTDQAZ128rm addr:$src)>;
4601 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4602 (VMOVNTDQAZ128rm addr:$src)>;
4605 //===----------------------------------------------------------------------===//
4606 // AVX-512 - Integer arithmetic
4608 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4609 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4610 bit IsCommutable = 0> {
4611 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4612 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4613 "$src2, $src1", "$src1, $src2",
4614 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4615 IsCommutable>, AVX512BIBase, EVEX_4V,
4618 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4619 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4620 "$src2, $src1", "$src1, $src2",
4621 (_.VT (OpNode _.RC:$src1,
4622 (bitconvert (_.LdFrag addr:$src2))))>,
4623 AVX512BIBase, EVEX_4V,
4624 Sched<[sched.Folded, ReadAfterLd]>;
4627 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4628 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4629 bit IsCommutable = 0> :
4630 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4631 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4632 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4633 "${src2}"##_.BroadcastStr##", $src1",
4634 "$src1, ${src2}"##_.BroadcastStr,
4635 (_.VT (OpNode _.RC:$src1,
4637 (_.ScalarLdFrag addr:$src2))))>,
4638 AVX512BIBase, EVEX_4V, EVEX_B,
4639 Sched<[sched.Folded, ReadAfterLd]>;
4642 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4643 AVX512VLVectorVTInfo VTInfo,
4644 X86SchedWriteWidths sched, Predicate prd,
4645 bit IsCommutable = 0> {
4646 let Predicates = [prd] in
4647 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4648 IsCommutable>, EVEX_V512;
4650 let Predicates = [prd, HasVLX] in {
4651 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4652 sched.YMM, IsCommutable>, EVEX_V256;
4653 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4654 sched.XMM, IsCommutable>, EVEX_V128;
4658 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4659 AVX512VLVectorVTInfo VTInfo,
4660 X86SchedWriteWidths sched, Predicate prd,
4661 bit IsCommutable = 0> {
4662 let Predicates = [prd] in
4663 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4664 IsCommutable>, EVEX_V512;
4666 let Predicates = [prd, HasVLX] in {
4667 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4668 sched.YMM, IsCommutable>, EVEX_V256;
4669 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4670 sched.XMM, IsCommutable>, EVEX_V128;
4674 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4675 X86SchedWriteWidths sched, Predicate prd,
4676 bit IsCommutable = 0> {
4677 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4678 sched, prd, IsCommutable>,
4679 VEX_W, EVEX_CD8<64, CD8VF>;
4682 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683 X86SchedWriteWidths sched, Predicate prd,
4684 bit IsCommutable = 0> {
4685 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4686 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4689 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4690 X86SchedWriteWidths sched, Predicate prd,
4691 bit IsCommutable = 0> {
4692 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4693 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4697 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698 X86SchedWriteWidths sched, Predicate prd,
4699 bit IsCommutable = 0> {
4700 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4701 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4705 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4706 SDNode OpNode, X86SchedWriteWidths sched,
4707 Predicate prd, bit IsCommutable = 0> {
4708 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4711 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4715 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4716 SDNode OpNode, X86SchedWriteWidths sched,
4717 Predicate prd, bit IsCommutable = 0> {
4718 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4721 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4725 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4726 bits<8> opc_d, bits<8> opc_q,
4727 string OpcodeStr, SDNode OpNode,
4728 X86SchedWriteWidths sched,
4729 bit IsCommutable = 0> {
4730 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4731 sched, HasAVX512, IsCommutable>,
4732 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4733 sched, HasBWI, IsCommutable>;
4736 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4737 X86FoldableSchedWrite sched,
4738 SDNode OpNode,X86VectorVTInfo _Src,
4739 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4740 bit IsCommutable = 0> {
4741 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4742 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4743 "$src2, $src1","$src1, $src2",
4745 (_Src.VT _Src.RC:$src1),
4746 (_Src.VT _Src.RC:$src2))),
4748 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4749 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4750 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4751 "$src2, $src1", "$src1, $src2",
4752 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4753 (bitconvert (_Src.LdFrag addr:$src2))))>,
4754 AVX512BIBase, EVEX_4V,
4755 Sched<[sched.Folded, ReadAfterLd]>;
4757 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4758 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4760 "${src2}"##_Brdct.BroadcastStr##", $src1",
4761 "$src1, ${src2}"##_Brdct.BroadcastStr,
4762 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4763 (_Brdct.VT (X86VBroadcast
4764 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4765 AVX512BIBase, EVEX_4V, EVEX_B,
4766 Sched<[sched.Folded, ReadAfterLd]>;
4769 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4770 SchedWriteVecALU, 1>;
4771 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4772 SchedWriteVecALU, 0>;
4773 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4774 SchedWriteVecALU, HasBWI, 1>;
4775 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4776 SchedWriteVecALU, HasBWI, 0>;
4777 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4778 SchedWriteVecALU, HasBWI, 1>;
4779 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4780 SchedWriteVecALU, HasBWI, 0>;
4781 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4782 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4783 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4784 SchedWriteVecIMul, HasBWI, 1>;
4785 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4786 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4787 NotEVEX2VEXConvertible;
4788 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4790 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4792 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4793 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4794 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4795 SchedWriteVecALU, HasBWI, 1>;
4796 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4797 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4798 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4799 SchedWriteVecIMul, HasAVX512, 1>;
4801 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4802 X86SchedWriteWidths sched,
4803 AVX512VLVectorVTInfo _SrcVTInfo,
4804 AVX512VLVectorVTInfo _DstVTInfo,
4805 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4806 let Predicates = [prd] in
4807 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4808 _SrcVTInfo.info512, _DstVTInfo.info512,
4809 v8i64_info, IsCommutable>,
4810 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4811 let Predicates = [HasVLX, prd] in {
4812 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4813 _SrcVTInfo.info256, _DstVTInfo.info256,
4814 v4i64x_info, IsCommutable>,
4815 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4816 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4817 _SrcVTInfo.info128, _DstVTInfo.info128,
4818 v2i64x_info, IsCommutable>,
4819 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4823 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4824 avx512vl_i8_info, avx512vl_i8_info,
4825 X86multishift, HasVBMI, 0>, T8PD;
4827 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4828 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4829 X86FoldableSchedWrite sched> {
4830 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4831 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4833 "${src2}"##_Src.BroadcastStr##", $src1",
4834 "$src1, ${src2}"##_Src.BroadcastStr,
4835 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4836 (_Src.VT (X86VBroadcast
4837 (_Src.ScalarLdFrag addr:$src2))))))>,
4838 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4839 Sched<[sched.Folded, ReadAfterLd]>;
4842 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4843 SDNode OpNode,X86VectorVTInfo _Src,
4844 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4845 bit IsCommutable = 0> {
4846 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4847 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4848 "$src2, $src1","$src1, $src2",
4850 (_Src.VT _Src.RC:$src1),
4851 (_Src.VT _Src.RC:$src2))),
4853 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4854 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4855 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4856 "$src2, $src1", "$src1, $src2",
4857 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4858 (bitconvert (_Src.LdFrag addr:$src2))))>,
4859 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4860 Sched<[sched.Folded, ReadAfterLd]>;
4863 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4865 let Predicates = [HasBWI] in
4866 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4867 v32i16_info, SchedWriteShuffle.ZMM>,
4868 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4869 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4870 let Predicates = [HasBWI, HasVLX] in {
4871 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4872 v16i16x_info, SchedWriteShuffle.YMM>,
4873 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4874 v16i16x_info, SchedWriteShuffle.YMM>,
4876 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4877 v8i16x_info, SchedWriteShuffle.XMM>,
4878 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4879 v8i16x_info, SchedWriteShuffle.XMM>,
4883 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4885 let Predicates = [HasBWI] in
4886 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4887 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4888 let Predicates = [HasBWI, HasVLX] in {
4889 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4890 v32i8x_info, SchedWriteShuffle.YMM>,
4892 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4893 v16i8x_info, SchedWriteShuffle.XMM>,
4898 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4899 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4900 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4901 let Predicates = [HasBWI] in
4902 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4903 _Dst.info512, SchedWriteVecIMul.ZMM,
4904 IsCommutable>, EVEX_V512;
4905 let Predicates = [HasBWI, HasVLX] in {
4906 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4907 _Dst.info256, SchedWriteVecIMul.YMM,
4908 IsCommutable>, EVEX_V256;
4909 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4910 _Dst.info128, SchedWriteVecIMul.XMM,
4911 IsCommutable>, EVEX_V128;
4915 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4916 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4917 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4918 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4920 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4921 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4922 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4923 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4925 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4926 SchedWriteVecALU, HasBWI, 1>, T8PD;
4927 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4928 SchedWriteVecALU, HasBWI, 1>;
4929 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4930 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4931 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4932 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4933 NotEVEX2VEXConvertible;
4935 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4936 SchedWriteVecALU, HasBWI, 1>;
4937 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4938 SchedWriteVecALU, HasBWI, 1>, T8PD;
4939 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4940 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4941 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4942 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4943 NotEVEX2VEXConvertible;
4945 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4946 SchedWriteVecALU, HasBWI, 1>, T8PD;
4947 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4948 SchedWriteVecALU, HasBWI, 1>;
4949 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4950 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4951 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4952 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4953 NotEVEX2VEXConvertible;
4955 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4956 SchedWriteVecALU, HasBWI, 1>;
4957 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4958 SchedWriteVecALU, HasBWI, 1>, T8PD;
4959 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4960 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4961 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4962 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4963 NotEVEX2VEXConvertible;
4965 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4966 let Predicates = [HasDQI, NoVLX] in {
4967 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4970 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4971 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4974 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4977 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4978 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4982 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4983 let Predicates = [HasDQI, NoVLX] in {
4984 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4987 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4988 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4991 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4999 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5000 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5004 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5007 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5011 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5015 let Predicates = [HasAVX512, NoVLX] in {
5016 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5017 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5018 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5019 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5022 //===----------------------------------------------------------------------===//
5023 // AVX-512 Logical Instructions
5024 //===----------------------------------------------------------------------===//
5026 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
5027 // be set to null_frag for 32-bit elements.
5028 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5029 SDPatternOperator OpNode,
5030 SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5031 X86VectorVTInfo _, bit IsCommutable = 0> {
5032 let hasSideEffects = 0 in
5033 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5034 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5035 "$src2, $src1", "$src1, $src2",
5036 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5037 (bitconvert (_.VT _.RC:$src2)))),
5038 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5040 IsCommutable>, AVX512BIBase, EVEX_4V,
5043 let hasSideEffects = 0, mayLoad = 1 in
5044 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5045 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5046 "$src2, $src1", "$src1, $src2",
5047 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5048 (bitconvert (_.LdFrag addr:$src2)))),
5049 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5050 (bitconvert (_.LdFrag addr:$src2))))))>,
5051 AVX512BIBase, EVEX_4V,
5052 Sched<[sched.Folded, ReadAfterLd]>;
5055 // OpNodeMsk is the OpNode to use where element size is important. So use
5056 // for all of the broadcast patterns.
5057 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5058 SDPatternOperator OpNode,
5059 SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5060 bit IsCommutable = 0> :
5061 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5063 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5064 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5065 "${src2}"##_.BroadcastStr##", $src1",
5066 "$src1, ${src2}"##_.BroadcastStr,
5067 (_.i64VT (OpNodeMsk _.RC:$src1,
5069 (_.VT (X86VBroadcast
5070 (_.ScalarLdFrag addr:$src2)))))),
5071 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5073 (_.VT (X86VBroadcast
5074 (_.ScalarLdFrag addr:$src2))))))))>,
5075 AVX512BIBase, EVEX_4V, EVEX_B,
5076 Sched<[sched.Folded, ReadAfterLd]>;
5079 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5080 SDPatternOperator OpNode,
5081 SDNode OpNodeMsk, X86SchedWriteWidths sched,
5082 AVX512VLVectorVTInfo VTInfo,
5083 bit IsCommutable = 0> {
5084 let Predicates = [HasAVX512] in
5085 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5086 VTInfo.info512, IsCommutable>, EVEX_V512;
5088 let Predicates = [HasAVX512, HasVLX] in {
5089 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5090 VTInfo.info256, IsCommutable>, EVEX_V256;
5091 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5092 VTInfo.info128, IsCommutable>, EVEX_V128;
5096 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5097 SDNode OpNode, X86SchedWriteWidths sched,
5098 bit IsCommutable = 0> {
5099 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5100 avx512vl_i64_info, IsCommutable>,
5101 VEX_W, EVEX_CD8<64, CD8VF>;
5102 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5103 avx512vl_i32_info, IsCommutable>,
5104 EVEX_CD8<32, CD8VF>;
5107 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5108 SchedWriteVecLogic, 1>;
5109 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5110 SchedWriteVecLogic, 1>;
5111 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5112 SchedWriteVecLogic, 1>;
5113 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5114 SchedWriteVecLogic>;
5116 //===----------------------------------------------------------------------===//
5117 // AVX-512 FP arithmetic
5118 //===----------------------------------------------------------------------===//
5120 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5121 SDNode OpNode, SDNode VecNode,
5122 X86FoldableSchedWrite sched, bit IsCommutable> {
5123 let ExeDomain = _.ExeDomain in {
5124 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5125 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5126 "$src2, $src1", "$src1, $src2",
5127 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5128 (i32 FROUND_CURRENT)))>,
5131 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5132 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5133 "$src2, $src1", "$src1, $src2",
5134 (_.VT (VecNode _.RC:$src1,
5135 _.ScalarIntMemCPat:$src2,
5136 (i32 FROUND_CURRENT)))>,
5137 Sched<[sched.Folded, ReadAfterLd]>;
5138 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5139 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5140 (ins _.FRC:$src1, _.FRC:$src2),
5141 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5142 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5144 let isCommutable = IsCommutable;
5146 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5147 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5148 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5149 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5150 (_.ScalarLdFrag addr:$src2)))]>,
5151 Sched<[sched.Folded, ReadAfterLd]>;
5156 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5157 SDNode VecNode, X86FoldableSchedWrite sched,
5158 bit IsCommutable = 0> {
5159 let ExeDomain = _.ExeDomain in
5160 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5161 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5162 "$rc, $src2, $src1", "$src1, $src2, $rc",
5163 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5164 (i32 imm:$rc)), IsCommutable>,
5165 EVEX_B, EVEX_RC, Sched<[sched]>;
5167 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5168 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5169 X86FoldableSchedWrite sched, bit IsCommutable> {
5170 let ExeDomain = _.ExeDomain in {
5171 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5172 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5173 "$src2, $src1", "$src1, $src2",
5174 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5177 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5178 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5179 "$src2, $src1", "$src1, $src2",
5180 (_.VT (VecNode _.RC:$src1,
5181 _.ScalarIntMemCPat:$src2))>,
5182 Sched<[sched.Folded, ReadAfterLd]>;
5184 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5185 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5186 (ins _.FRC:$src1, _.FRC:$src2),
5187 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5188 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5190 let isCommutable = IsCommutable;
5192 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5193 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5194 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5195 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5196 (_.ScalarLdFrag addr:$src2)))]>,
5197 Sched<[sched.Folded, ReadAfterLd]>;
5200 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5201 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5202 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5203 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5204 (i32 FROUND_NO_EXC))>, EVEX_B,
5209 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5210 SDNode VecNode, X86SchedWriteSizes sched,
5212 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5213 sched.PS.Scl, IsCommutable>,
5214 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5215 sched.PS.Scl, IsCommutable>,
5216 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5217 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5218 sched.PD.Scl, IsCommutable>,
5219 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5220 sched.PD.Scl, IsCommutable>,
5221 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5224 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5225 SDNode VecNode, SDNode SaeNode,
5226 X86SchedWriteSizes sched, bit IsCommutable> {
5227 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5228 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5229 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5230 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5231 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5232 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5234 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5235 SchedWriteFAddSizes, 1>;
5236 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5237 SchedWriteFMulSizes, 1>;
5238 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5239 SchedWriteFAddSizes, 0>;
5240 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5241 SchedWriteFDivSizes, 0>;
5242 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5243 SchedWriteFCmpSizes, 0>;
5244 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5245 SchedWriteFCmpSizes, 0>;
5247 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5248 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5249 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5250 X86VectorVTInfo _, SDNode OpNode,
5251 X86FoldableSchedWrite sched> {
5252 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5253 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5254 (ins _.FRC:$src1, _.FRC:$src2),
5255 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5256 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5258 let isCommutable = 1;
5260 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5261 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5262 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5263 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5264 (_.ScalarLdFrag addr:$src2)))]>,
5265 Sched<[sched.Folded, ReadAfterLd]>;
5268 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5269 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5270 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5272 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5273 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5274 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5276 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5277 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5278 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5280 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5281 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5282 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5284 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5285 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5287 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5288 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5289 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5290 "$src2, $src1", "$src1, $src2",
5291 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable>,
5292 EVEX_4V, Sched<[sched]>;
5293 let mayLoad = 1 in {
5294 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5295 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5296 "$src2, $src1", "$src1, $src2",
5297 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5298 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5299 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5300 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5301 "${src2}"##_.BroadcastStr##", $src1",
5302 "$src1, ${src2}"##_.BroadcastStr,
5303 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5304 (_.ScalarLdFrag addr:$src2))))>,
5306 Sched<[sched.Folded, ReadAfterLd]>;
5311 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5312 SDPatternOperator OpNodeRnd,
5313 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5314 let ExeDomain = _.ExeDomain in
5315 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5316 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5317 "$rc, $src2, $src1", "$src1, $src2, $rc",
5318 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5319 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5322 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5323 SDPatternOperator OpNodeRnd,
5324 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5325 let ExeDomain = _.ExeDomain in
5326 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5327 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5328 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5329 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5330 EVEX_4V, EVEX_B, Sched<[sched]>;
5333 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5334 Predicate prd, X86SchedWriteSizes sched,
5335 bit IsCommutable = 0> {
5336 let Predicates = [prd] in {
5337 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5338 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5339 EVEX_CD8<32, CD8VF>;
5340 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5341 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5342 EVEX_CD8<64, CD8VF>;
5345 // Define only if AVX512VL feature is present.
5346 let Predicates = [prd, HasVLX] in {
5347 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5348 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5349 EVEX_CD8<32, CD8VF>;
5350 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5351 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5352 EVEX_CD8<32, CD8VF>;
5353 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5354 sched.PD.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
5355 EVEX_CD8<64, CD8VF>;
5356 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5357 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5358 EVEX_CD8<64, CD8VF>;
5362 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5363 X86SchedWriteSizes sched> {
5364 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5366 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5367 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5369 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5372 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5373 X86SchedWriteSizes sched> {
5374 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5376 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5377 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5379 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5382 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5383 SchedWriteFAddSizes, 1>,
5384 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5385 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5386 SchedWriteFMulSizes, 1>,
5387 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5388 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5389 SchedWriteFAddSizes>,
5390 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5391 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5392 SchedWriteFDivSizes>,
5393 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5394 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5395 SchedWriteFCmpSizes, 0>,
5396 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5397 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5398 SchedWriteFCmpSizes, 0>,
5399 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5400 let isCodeGenOnly = 1 in {
5401 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5402 SchedWriteFCmpSizes, 1>;
5403 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5404 SchedWriteFCmpSizes, 1>;
5406 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5407 SchedWriteFLogicSizes, 1>;
5408 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5409 SchedWriteFLogicSizes, 0>;
5410 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5411 SchedWriteFLogicSizes, 1>;
5412 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5413 SchedWriteFLogicSizes, 1>;
5415 // Patterns catch floating point selects with bitcasted integer logic ops.
5416 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5417 X86VectorVTInfo _, Predicate prd> {
5418 let Predicates = [prd] in {
5419 // Masked register-register logical operations.
5420 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5421 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5423 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5424 _.RC:$src1, _.RC:$src2)>;
5425 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5426 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5428 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5430 // Masked register-memory logical operations.
5431 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5432 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5433 (load addr:$src2)))),
5435 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5436 _.RC:$src1, addr:$src2)>;
5437 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5438 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5440 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5442 // Register-broadcast logical operations.
5443 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5444 (bitconvert (_.VT (X86VBroadcast
5445 (_.ScalarLdFrag addr:$src2)))))),
5446 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5447 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5449 (_.i64VT (OpNode _.RC:$src1,
5452 (_.ScalarLdFrag addr:$src2))))))),
5454 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5455 _.RC:$src1, addr:$src2)>;
5456 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5458 (_.i64VT (OpNode _.RC:$src1,
5461 (_.ScalarLdFrag addr:$src2))))))),
5463 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5464 _.RC:$src1, addr:$src2)>;
5468 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5469 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5470 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5471 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5472 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5473 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5474 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5477 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5478 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5479 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5480 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5482 let Predicates = [HasVLX,HasDQI] in {
5483 // Use packed logical operations for scalar ops.
5484 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5485 (COPY_TO_REGCLASS (VANDPDZ128rr
5486 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5487 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5488 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5489 (COPY_TO_REGCLASS (VORPDZ128rr
5490 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5491 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5492 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5493 (COPY_TO_REGCLASS (VXORPDZ128rr
5494 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5495 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5496 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5497 (COPY_TO_REGCLASS (VANDNPDZ128rr
5498 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5499 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5501 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5502 (COPY_TO_REGCLASS (VANDPSZ128rr
5503 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5504 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5505 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5506 (COPY_TO_REGCLASS (VORPSZ128rr
5507 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5508 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5509 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5510 (COPY_TO_REGCLASS (VXORPSZ128rr
5511 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5512 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5513 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5514 (COPY_TO_REGCLASS (VANDNPSZ128rr
5515 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5516 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5519 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5520 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5521 let ExeDomain = _.ExeDomain in {
5522 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5523 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5524 "$src2, $src1", "$src1, $src2",
5525 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5526 EVEX_4V, Sched<[sched]>;
5527 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5528 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5529 "$src2, $src1", "$src1, $src2",
5530 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5531 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5532 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5533 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5534 "${src2}"##_.BroadcastStr##", $src1",
5535 "$src1, ${src2}"##_.BroadcastStr,
5536 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5537 (_.ScalarLdFrag addr:$src2))),
5538 (i32 FROUND_CURRENT))>,
5539 EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
5543 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5544 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5545 let ExeDomain = _.ExeDomain in {
5546 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5547 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5548 "$src2, $src1", "$src1, $src2",
5549 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5551 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5552 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5553 "$src2, $src1", "$src1, $src2",
5554 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5555 (i32 FROUND_CURRENT))>,
5556 Sched<[sched.Folded, ReadAfterLd]>;
5560 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5561 SDNode OpNode, SDNode OpNodeScal,
5562 X86SchedWriteWidths sched> {
5563 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5564 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5565 EVEX_V512, EVEX_CD8<32, CD8VF>;
5566 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5567 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5568 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5569 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5570 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5571 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5572 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5573 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5574 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5576 // Define only if AVX512VL feature is present.
5577 let Predicates = [HasVLX] in {
5578 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5579 EVEX_V128, EVEX_CD8<32, CD8VF>;
5580 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5581 EVEX_V256, EVEX_CD8<32, CD8VF>;
5582 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5583 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5584 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5585 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5588 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5589 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5591 //===----------------------------------------------------------------------===//
5592 // AVX-512 VPTESTM instructions
5593 //===----------------------------------------------------------------------===//
5595 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5596 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5598 let ExeDomain = _.ExeDomain in {
5599 let isCommutable = 1 in
5600 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5601 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5602 "$src2, $src1", "$src1, $src2",
5603 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5605 EVEX_4V, Sched<[sched]>;
5606 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5607 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5608 "$src2, $src1", "$src1, $src2",
5610 (_.i64VT (and _.RC:$src1,
5611 (bitconvert (_.LdFrag addr:$src2))))),
5613 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5614 Sched<[sched.Folded, ReadAfterLd]>;
5617 // Patterns for compare with 0 that just use the same source twice.
5618 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5619 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5620 _.RC:$src, _.RC:$src))>;
5622 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5623 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5624 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5627 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5628 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5629 let ExeDomain = _.ExeDomain in
5630 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5631 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5632 "${src2}"##_.BroadcastStr##", $src1",
5633 "$src1, ${src2}"##_.BroadcastStr,
5634 (OpNode (and _.RC:$src1,
5636 (_.ScalarLdFrag addr:$src2))),
5638 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5639 Sched<[sched.Folded, ReadAfterLd]>;
5642 // Use 512bit version to implement 128/256 bit in case NoVLX.
5643 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5644 X86VectorVTInfo _, string Name> {
5645 def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5647 (_.KVT (COPY_TO_REGCLASS
5648 (!cast<Instruction>(Name # "Zrr")
5649 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5650 _.RC:$src1, _.SubRegIdx),
5651 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5652 _.RC:$src2, _.SubRegIdx)),
5655 def : Pat<(_.KVT (and _.KRC:$mask,
5656 (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5659 (!cast<Instruction>(Name # "Zrrk")
5660 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5661 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5662 _.RC:$src1, _.SubRegIdx),
5663 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5664 _.RC:$src2, _.SubRegIdx)),
5667 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5668 (_.KVT (COPY_TO_REGCLASS
5669 (!cast<Instruction>(Name # "Zrr")
5670 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5671 _.RC:$src, _.SubRegIdx),
5672 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5673 _.RC:$src, _.SubRegIdx)),
5676 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5678 (!cast<Instruction>(Name # "Zrrk")
5679 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5680 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5681 _.RC:$src, _.SubRegIdx),
5682 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5683 _.RC:$src, _.SubRegIdx)),
5687 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5688 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5689 let Predicates = [HasAVX512] in
5690 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5691 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5693 let Predicates = [HasAVX512, HasVLX] in {
5694 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5695 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5696 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5697 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5699 let Predicates = [HasAVX512, NoVLX] in {
5700 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5701 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5705 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5706 X86SchedWriteWidths sched> {
5707 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5709 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5710 avx512vl_i64_info>, VEX_W;
5713 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5714 PatFrag OpNode, X86SchedWriteWidths sched> {
5715 let Predicates = [HasBWI] in {
5716 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5717 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5718 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5719 v64i8_info, NAME#"B">, EVEX_V512;
5721 let Predicates = [HasVLX, HasBWI] in {
5723 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5724 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5725 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5726 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5727 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5728 v32i8x_info, NAME#"B">, EVEX_V256;
5729 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5730 v16i8x_info, NAME#"B">, EVEX_V128;
5733 let Predicates = [HasAVX512, NoVLX] in {
5734 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5735 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5736 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5737 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5741 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5742 // as commutable here because we already canonicalized all zeros vectors to the
5743 // RHS during lowering.
5744 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5745 (setcc node:$src1, node:$src2, SETEQ)>;
5746 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5747 (setcc node:$src1, node:$src2, SETNE)>;
5749 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5750 PatFrag OpNode, X86SchedWriteWidths sched> :
5751 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5752 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5754 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5755 SchedWriteVecLogic>, T8PD;
5756 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5757 SchedWriteVecLogic>, T8XS;
5759 //===----------------------------------------------------------------------===//
5760 // AVX-512 Shift instructions
5761 //===----------------------------------------------------------------------===//
5763 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5764 string OpcodeStr, SDNode OpNode,
5765 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5766 let ExeDomain = _.ExeDomain in {
5767 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5768 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5769 "$src2, $src1", "$src1, $src2",
5770 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5772 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5773 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5774 "$src2, $src1", "$src1, $src2",
5775 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5777 Sched<[sched.Folded]>;
5781 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5782 string OpcodeStr, SDNode OpNode,
5783 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5784 let ExeDomain = _.ExeDomain in
5785 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5786 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5787 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5788 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5789 EVEX_B, Sched<[sched.Folded]>;
5792 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5793 X86FoldableSchedWrite sched, ValueType SrcVT,
5794 PatFrag bc_frag, X86VectorVTInfo _> {
5795 // src2 is always 128-bit
5796 let ExeDomain = _.ExeDomain in {
5797 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5798 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5799 "$src2, $src1", "$src1, $src2",
5800 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5801 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5802 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5803 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5804 "$src2, $src1", "$src1, $src2",
5805 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5807 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5811 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5812 X86SchedWriteWidths sched, ValueType SrcVT,
5813 PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5815 let Predicates = [prd] in
5816 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5817 bc_frag, VTInfo.info512>, EVEX_V512,
5818 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5819 let Predicates = [prd, HasVLX] in {
5820 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5821 bc_frag, VTInfo.info256>, EVEX_V256,
5822 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5823 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5824 bc_frag, VTInfo.info128>, EVEX_V128,
5825 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5829 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5830 string OpcodeStr, SDNode OpNode,
5831 X86SchedWriteWidths sched,
5832 bit NotEVEX2VEXConvertibleQ = 0> {
5833 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5834 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5835 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5836 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5837 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5838 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5839 bc_v2i64, avx512vl_i16_info, HasBWI>;
5842 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5843 string OpcodeStr, SDNode OpNode,
5844 X86SchedWriteWidths sched,
5845 AVX512VLVectorVTInfo VTInfo> {
5846 let Predicates = [HasAVX512] in
5847 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5848 sched.ZMM, VTInfo.info512>,
5849 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5850 VTInfo.info512>, EVEX_V512;
5851 let Predicates = [HasAVX512, HasVLX] in {
5852 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5853 sched.YMM, VTInfo.info256>,
5854 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5855 VTInfo.info256>, EVEX_V256;
5856 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5857 sched.XMM, VTInfo.info128>,
5858 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5859 VTInfo.info128>, EVEX_V128;
5863 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5864 string OpcodeStr, SDNode OpNode,
5865 X86SchedWriteWidths sched> {
5866 let Predicates = [HasBWI] in
5867 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5868 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5869 let Predicates = [HasVLX, HasBWI] in {
5870 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5871 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5872 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5873 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5877 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5878 Format ImmFormR, Format ImmFormM,
5879 string OpcodeStr, SDNode OpNode,
5880 X86SchedWriteWidths sched,
5881 bit NotEVEX2VEXConvertibleQ = 0> {
5882 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5883 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5884 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5885 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5886 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5889 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5890 SchedWriteVecShiftImm>,
5891 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5892 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5894 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5895 SchedWriteVecShiftImm>,
5896 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5897 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5899 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5900 SchedWriteVecShiftImm, 1>,
5901 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5902 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5904 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5905 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5906 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5907 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5910 SchedWriteVecShift>;
5911 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5912 SchedWriteVecShift, 1>;
5913 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5914 SchedWriteVecShift>;
5916 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5917 let Predicates = [HasAVX512, NoVLX] in {
5918 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5919 (EXTRACT_SUBREG (v8i64
5921 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5922 VR128X:$src2)), sub_ymm)>;
5924 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5925 (EXTRACT_SUBREG (v8i64
5927 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5928 VR128X:$src2)), sub_xmm)>;
5930 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5931 (EXTRACT_SUBREG (v8i64
5933 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5934 imm:$src2)), sub_ymm)>;
5936 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5937 (EXTRACT_SUBREG (v8i64
5939 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5940 imm:$src2)), sub_xmm)>;
5943 //===-------------------------------------------------------------------===//
5944 // Variable Bit Shifts
5945 //===-------------------------------------------------------------------===//
5947 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5948 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5949 let ExeDomain = _.ExeDomain in {
5950 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5951 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5952 "$src2, $src1", "$src1, $src2",
5953 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5954 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5955 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5956 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5957 "$src2, $src1", "$src1, $src2",
5958 (_.VT (OpNode _.RC:$src1,
5959 (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
5960 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5961 Sched<[sched.Folded, ReadAfterLd]>;
5965 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5966 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5967 let ExeDomain = _.ExeDomain in
5968 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5969 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5970 "${src2}"##_.BroadcastStr##", $src1",
5971 "$src1, ${src2}"##_.BroadcastStr,
5972 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5973 (_.ScalarLdFrag addr:$src2)))))>,
5974 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5975 Sched<[sched.Folded, ReadAfterLd]>;
5978 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5979 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5980 let Predicates = [HasAVX512] in
5981 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5982 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5984 let Predicates = [HasAVX512, HasVLX] in {
5985 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5986 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5987 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5988 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5992 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5993 SDNode OpNode, X86SchedWriteWidths sched> {
5994 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5996 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5997 avx512vl_i64_info>, VEX_W;
6000 // Use 512bit version to implement 128/256 bit in case NoVLX.
6001 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6002 SDNode OpNode, list<Predicate> p> {
6003 let Predicates = p in {
6004 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6005 (_.info256.VT _.info256.RC:$src2))),
6007 (!cast<Instruction>(OpcodeStr#"Zrr")
6008 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6009 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6012 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6013 (_.info128.VT _.info128.RC:$src2))),
6015 (!cast<Instruction>(OpcodeStr#"Zrr")
6016 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6017 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6021 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6022 SDNode OpNode, X86SchedWriteWidths sched> {
6023 let Predicates = [HasBWI] in
6024 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6026 let Predicates = [HasVLX, HasBWI] in {
6028 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6030 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6035 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6036 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6038 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6039 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6041 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6042 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6044 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6045 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6047 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6048 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6049 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6050 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6052 // Special handing for handling VPSRAV intrinsics.
6053 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6054 list<Predicate> p> {
6055 let Predicates = p in {
6056 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6057 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6059 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6060 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6061 _.RC:$src1, addr:$src2)>;
6062 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6063 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6064 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6065 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6066 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6067 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6069 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6070 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6071 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6072 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6073 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6074 _.RC:$src1, _.RC:$src2)>;
6075 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6076 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6078 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6079 _.RC:$src1, addr:$src2)>;
6083 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6084 list<Predicate> p> :
6085 avx512_var_shift_int_lowering<InstrStr, _, p> {
6086 let Predicates = p in {
6087 def : Pat<(_.VT (X86vsrav _.RC:$src1,
6088 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6089 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6090 _.RC:$src1, addr:$src2)>;
6091 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6092 (X86vsrav _.RC:$src1,
6093 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6095 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6096 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6097 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6098 (X86vsrav _.RC:$src1,
6099 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6101 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6102 _.RC:$src1, addr:$src2)>;
6106 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6107 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6108 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6109 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6110 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6111 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6112 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6113 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6114 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6116 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6117 let Predicates = [HasAVX512, NoVLX] in {
6118 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6119 (EXTRACT_SUBREG (v8i64
6121 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6122 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6124 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6125 (EXTRACT_SUBREG (v8i64
6127 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6128 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6131 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6132 (EXTRACT_SUBREG (v16i32
6134 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6135 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6137 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6138 (EXTRACT_SUBREG (v16i32
6140 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6141 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6144 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6145 (EXTRACT_SUBREG (v8i64
6147 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6148 imm:$src2)), sub_xmm)>;
6149 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6150 (EXTRACT_SUBREG (v8i64
6152 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6153 imm:$src2)), sub_ymm)>;
6155 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6156 (EXTRACT_SUBREG (v16i32
6158 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6159 imm:$src2)), sub_xmm)>;
6160 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6161 (EXTRACT_SUBREG (v16i32
6163 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6164 imm:$src2)), sub_ymm)>;
6167 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6168 let Predicates = [HasAVX512, NoVLX] in {
6169 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6170 (EXTRACT_SUBREG (v8i64
6172 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6173 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6175 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6176 (EXTRACT_SUBREG (v8i64
6178 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6179 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6182 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6183 (EXTRACT_SUBREG (v16i32
6185 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6186 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6188 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6189 (EXTRACT_SUBREG (v16i32
6191 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6192 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6195 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6196 (EXTRACT_SUBREG (v8i64
6198 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6199 imm:$src2)), sub_xmm)>;
6200 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6201 (EXTRACT_SUBREG (v8i64
6203 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6204 imm:$src2)), sub_ymm)>;
6206 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6207 (EXTRACT_SUBREG (v16i32
6209 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6210 imm:$src2)), sub_xmm)>;
6211 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6212 (EXTRACT_SUBREG (v16i32
6214 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6215 imm:$src2)), sub_ymm)>;
6218 //===-------------------------------------------------------------------===//
6219 // 1-src variable permutation VPERMW/D/Q
6220 //===-------------------------------------------------------------------===//
6222 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6223 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6224 let Predicates = [HasAVX512] in
6225 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6226 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6228 let Predicates = [HasAVX512, HasVLX] in
6229 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6230 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6233 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6234 string OpcodeStr, SDNode OpNode,
6235 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6236 let Predicates = [HasAVX512] in
6237 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6238 sched, VTInfo.info512>,
6239 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6240 sched, VTInfo.info512>, EVEX_V512;
6241 let Predicates = [HasAVX512, HasVLX] in
6242 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6243 sched, VTInfo.info256>,
6244 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6245 sched, VTInfo.info256>, EVEX_V256;
6248 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6249 Predicate prd, SDNode OpNode,
6250 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6251 let Predicates = [prd] in
6252 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6254 let Predicates = [HasVLX, prd] in {
6255 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6257 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6262 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6263 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6264 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6265 WriteVarShuffle256, avx512vl_i8_info>;
6267 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6268 WriteVarShuffle256, avx512vl_i32_info>;
6269 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6270 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6271 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6272 WriteFVarShuffle256, avx512vl_f32_info>;
6273 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6274 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6276 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6277 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6278 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6279 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6280 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6281 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6283 //===----------------------------------------------------------------------===//
6284 // AVX-512 - VPERMIL
6285 //===----------------------------------------------------------------------===//
6287 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6288 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6289 X86VectorVTInfo Ctrl> {
6290 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6291 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6292 "$src2, $src1", "$src1, $src2",
6293 (_.VT (OpNode _.RC:$src1,
6294 (Ctrl.VT Ctrl.RC:$src2)))>,
6295 T8PD, EVEX_4V, Sched<[sched]>;
6296 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6297 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6298 "$src2, $src1", "$src1, $src2",
6301 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6302 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6303 Sched<[sched.Folded, ReadAfterLd]>;
6304 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6305 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6306 "${src2}"##_.BroadcastStr##", $src1",
6307 "$src1, ${src2}"##_.BroadcastStr,
6310 (Ctrl.VT (X86VBroadcast
6311 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6312 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6313 Sched<[sched.Folded, ReadAfterLd]>;
6316 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6317 X86SchedWriteWidths sched,
6318 AVX512VLVectorVTInfo _,
6319 AVX512VLVectorVTInfo Ctrl> {
6320 let Predicates = [HasAVX512] in {
6321 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6322 _.info512, Ctrl.info512>, EVEX_V512;
6324 let Predicates = [HasAVX512, HasVLX] in {
6325 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6326 _.info128, Ctrl.info128>, EVEX_V128;
6327 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6328 _.info256, Ctrl.info256>, EVEX_V256;
6332 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6333 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6334 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6336 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6337 X86VPermilpi, SchedWriteFShuffle, _>,
6338 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6341 let ExeDomain = SSEPackedSingle in
6342 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6344 let ExeDomain = SSEPackedDouble in
6345 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6346 avx512vl_i64_info>, VEX_W1X;
6348 //===----------------------------------------------------------------------===//
6349 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6350 //===----------------------------------------------------------------------===//
6352 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6353 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6354 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6355 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6356 X86PShufhw, SchedWriteShuffle>,
6357 EVEX, AVX512XSIi8Base;
6358 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6359 X86PShuflw, SchedWriteShuffle>,
6360 EVEX, AVX512XDIi8Base;
6362 //===----------------------------------------------------------------------===//
6363 // AVX-512 - VPSHUFB
6364 //===----------------------------------------------------------------------===//
6366 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6367 X86SchedWriteWidths sched> {
6368 let Predicates = [HasBWI] in
6369 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6372 let Predicates = [HasVLX, HasBWI] in {
6373 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6375 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6380 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6381 SchedWriteVarShuffle>, VEX_WIG;
6383 //===----------------------------------------------------------------------===//
6384 // Move Low to High and High to Low packed FP Instructions
6385 //===----------------------------------------------------------------------===//
6387 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6388 (ins VR128X:$src1, VR128X:$src2),
6389 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6390 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6391 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6392 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6393 (ins VR128X:$src1, VR128X:$src2),
6394 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6395 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6396 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6398 //===----------------------------------------------------------------------===//
6399 // VMOVHPS/PD VMOVLPS Instructions
6400 // All patterns was taken from SSS implementation.
6401 //===----------------------------------------------------------------------===//
6403 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6404 SDPatternOperator OpNode,
6405 X86VectorVTInfo _> {
6406 let ExeDomain = _.ExeDomain in
6407 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6408 (ins _.RC:$src1, f64mem:$src2),
6409 !strconcat(OpcodeStr,
6410 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6414 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6415 Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
6418 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6419 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6420 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6421 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6422 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6423 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6424 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", null_frag,
6425 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6427 let Predicates = [HasAVX512] in {
6429 def : Pat<(X86Movlhps VR128X:$src1,
6430 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6431 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6432 def : Pat<(X86Movlhps VR128X:$src1,
6433 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
6434 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6436 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6437 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6438 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6440 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6441 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6442 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6445 let SchedRW = [WriteFStore] in {
6446 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6447 (ins f64mem:$dst, VR128X:$src),
6448 "vmovhps\t{$src, $dst|$dst, $src}",
6449 [(store (f64 (extractelt
6450 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6451 (bc_v2f64 (v4f32 VR128X:$src))),
6452 (iPTR 0))), addr:$dst)]>,
6453 EVEX, EVEX_CD8<32, CD8VT2>;
6454 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6455 (ins f64mem:$dst, VR128X:$src),
6456 "vmovhpd\t{$src, $dst|$dst, $src}",
6457 [(store (f64 (extractelt
6458 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6459 (iPTR 0))), addr:$dst)]>,
6460 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6461 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6462 (ins f64mem:$dst, VR128X:$src),
6463 "vmovlps\t{$src, $dst|$dst, $src}",
6464 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6465 (iPTR 0))), addr:$dst)]>,
6466 EVEX, EVEX_CD8<32, CD8VT2>;
6467 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6468 (ins f64mem:$dst, VR128X:$src),
6469 "vmovlpd\t{$src, $dst|$dst, $src}",
6470 [(store (f64 (extractelt (v2f64 VR128X:$src),
6471 (iPTR 0))), addr:$dst)]>,
6472 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6475 let Predicates = [HasAVX512] in {
6477 def : Pat<(store (f64 (extractelt
6478 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6479 (iPTR 0))), addr:$dst),
6480 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6482 //===----------------------------------------------------------------------===//
6483 // FMA - Fused Multiply Operations
6486 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6487 X86FoldableSchedWrite sched,
6488 X86VectorVTInfo _, string Suff> {
6489 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6490 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6491 (ins _.RC:$src2, _.RC:$src3),
6492 OpcodeStr, "$src3, $src2", "$src2, $src3",
6493 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6494 AVX512FMA3Base, Sched<[sched]>;
6496 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6497 (ins _.RC:$src2, _.MemOp:$src3),
6498 OpcodeStr, "$src3, $src2", "$src2, $src3",
6499 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6500 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6502 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6503 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6504 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6505 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6507 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6508 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6512 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6513 X86FoldableSchedWrite sched,
6514 X86VectorVTInfo _, string Suff> {
6515 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6516 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6517 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6518 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6519 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6520 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6523 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6524 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6525 AVX512VLVectorVTInfo _, string Suff> {
6526 let Predicates = [HasAVX512] in {
6527 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6529 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6531 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6533 let Predicates = [HasVLX, HasAVX512] in {
6534 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6536 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6537 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6539 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6543 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6545 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6546 SchedWriteFMA, avx512vl_f32_info, "PS">;
6547 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6548 SchedWriteFMA, avx512vl_f64_info, "PD">,
6552 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6553 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6554 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6555 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6556 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6557 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6560 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6561 X86FoldableSchedWrite sched,
6562 X86VectorVTInfo _, string Suff> {
6563 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6564 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6565 (ins _.RC:$src2, _.RC:$src3),
6566 OpcodeStr, "$src3, $src2", "$src2, $src3",
6567 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6568 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6570 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6571 (ins _.RC:$src2, _.MemOp:$src3),
6572 OpcodeStr, "$src3, $src2", "$src2, $src3",
6573 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6574 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6576 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6577 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6578 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6579 "$src2, ${src3}"##_.BroadcastStr,
6580 (_.VT (OpNode _.RC:$src2,
6581 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6582 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6583 Sched<[sched.Folded, ReadAfterLd]>;
6587 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6588 X86FoldableSchedWrite sched,
6589 X86VectorVTInfo _, string Suff> {
6590 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6591 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6592 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6593 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6594 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6596 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6599 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6600 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6601 AVX512VLVectorVTInfo _, string Suff> {
6602 let Predicates = [HasAVX512] in {
6603 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6605 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6607 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6609 let Predicates = [HasVLX, HasAVX512] in {
6610 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6612 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6613 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6615 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6619 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6620 SDNode OpNodeRnd > {
6621 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6622 SchedWriteFMA, avx512vl_f32_info, "PS">;
6623 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6624 SchedWriteFMA, avx512vl_f64_info, "PD">,
6628 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6629 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6630 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6631 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6632 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6633 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6635 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6636 X86FoldableSchedWrite sched,
6637 X86VectorVTInfo _, string Suff> {
6638 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6639 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6640 (ins _.RC:$src2, _.RC:$src3),
6641 OpcodeStr, "$src3, $src2", "$src2, $src3",
6642 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6643 AVX512FMA3Base, Sched<[sched]>;
6645 // Pattern is 312 order so that the load is in a different place from the
6646 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6647 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6648 (ins _.RC:$src2, _.MemOp:$src3),
6649 OpcodeStr, "$src3, $src2", "$src2, $src3",
6650 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6651 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6653 // Pattern is 312 order so that the load is in a different place from the
6654 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6655 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6656 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6657 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6658 "$src2, ${src3}"##_.BroadcastStr,
6659 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6660 _.RC:$src1, _.RC:$src2)), 1, 0>,
6661 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6665 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6666 X86FoldableSchedWrite sched,
6667 X86VectorVTInfo _, string Suff> {
6668 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6669 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6670 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6671 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6672 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6674 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6677 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6678 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6679 AVX512VLVectorVTInfo _, string Suff> {
6680 let Predicates = [HasAVX512] in {
6681 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6683 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6685 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6687 let Predicates = [HasVLX, HasAVX512] in {
6688 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6690 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6691 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6693 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6697 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6698 SDNode OpNodeRnd > {
6699 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6700 SchedWriteFMA, avx512vl_f32_info, "PS">;
6701 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6702 SchedWriteFMA, avx512vl_f64_info, "PD">,
6706 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6707 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6708 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6709 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6710 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6711 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6714 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6715 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6716 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6717 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6718 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6719 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6720 "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>,
6721 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6723 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6724 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6725 "$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>,
6726 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6728 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6729 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6730 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
6731 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6733 let isCodeGenOnly = 1, isCommutable = 1 in {
6734 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6735 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6736 !strconcat(OpcodeStr,
6737 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6738 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6739 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6740 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6741 !strconcat(OpcodeStr,
6742 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6743 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6745 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6746 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6747 !strconcat(OpcodeStr,
6748 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6749 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6750 Sched<[SchedWriteFMA.Scl]>;
6751 }// isCodeGenOnly = 1
6752 }// Constraints = "$src1 = $dst"
6755 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6756 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6757 SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3,
6758 SDNode OpNodeRnds3, X86VectorVTInfo _,
6760 let ExeDomain = _.ExeDomain in {
6761 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6762 // Operands for intrinsic are in 123 order to preserve passthu
6764 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6765 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6766 _.ScalarIntMemCPat:$src3)),
6767 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6769 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6771 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6772 (_.ScalarLdFrag addr:$src3)))),
6773 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6774 _.FRC:$src3, (i32 imm:$rc)))), 0>;
6776 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6777 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6778 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6780 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6782 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6784 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6785 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6786 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6787 _.FRC:$src1, (i32 imm:$rc)))), 1>;
6789 // One pattern is 312 order so that the load is in a different place from the
6790 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6791 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6793 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6796 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6798 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6799 _.FRC:$src1, _.FRC:$src2))),
6800 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6801 _.FRC:$src2, (i32 imm:$rc)))), 1>;
6805 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6806 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6807 SDNode OpNodes1, SDNode OpNodeRnds1, SDNode OpNodes3,
6808 SDNode OpNodeRnds3> {
6809 let Predicates = [HasAVX512] in {
6810 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6811 OpNodeRnd, OpNodes1, OpNodeRnds1, OpNodes3,
6812 OpNodeRnds3, f32x_info, "SS">,
6813 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6814 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6815 OpNodeRnd, OpNodes1, OpNodeRnds1, OpNodes3,
6816 OpNodeRnds3, f64x_info, "SD">,
6817 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6821 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd,
6822 X86Fmadds1, X86FmaddRnds1, X86Fmadds3,
6824 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd,
6825 X86Fmsubs1, X86FmsubRnds1, X86Fmsubs3,
6827 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd,
6828 X86Fnmadds1, X86FnmaddRnds1, X86Fnmadds3,
6830 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd,
6831 X86Fnmsubs1, X86FnmsubRnds1, X86Fnmsubs3,
6834 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6835 string Suffix, SDNode Move,
6836 X86VectorVTInfo _, PatLeaf ZeroFP> {
6837 let Predicates = [HasAVX512] in {
6838 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6842 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6843 VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6844 (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6846 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6848 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6849 (_.ScalarLdFrag addr:$src3)))))),
6850 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6851 VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6854 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6855 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6856 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6857 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6858 VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6861 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6862 (X86selects VK1WM:$mask,
6864 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6866 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6867 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6868 VR128X:$src1, VK1WM:$mask,
6869 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6870 (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6872 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6873 (X86selects VK1WM:$mask,
6875 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6876 (_.ScalarLdFrag addr:$src3)),
6877 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6878 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6879 VR128X:$src1, VK1WM:$mask,
6880 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6882 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6883 (X86selects VK1WM:$mask,
6884 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6885 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6886 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6887 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6888 VR128X:$src1, VK1WM:$mask,
6889 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6891 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6892 (X86selects VK1WM:$mask,
6893 (Op _.FRC:$src2, _.FRC:$src3,
6894 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6895 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6896 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6897 VR128X:$src1, VK1WM:$mask,
6898 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6899 (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6901 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6902 (X86selects VK1WM:$mask,
6903 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6904 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6905 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6906 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6907 VR128X:$src1, VK1WM:$mask,
6908 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6910 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6911 (X86selects VK1WM:$mask,
6913 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6915 (_.EltVT ZeroFP)))))),
6916 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6917 VR128X:$src1, VK1WM:$mask,
6918 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6919 (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6921 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6922 (X86selects VK1WM:$mask,
6923 (Op _.FRC:$src2, _.FRC:$src3,
6924 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6925 (_.EltVT ZeroFP)))))),
6926 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6927 VR128X:$src1, VK1WM:$mask,
6928 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6929 (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6931 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6932 (X86selects VK1WM:$mask,
6934 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6935 (_.ScalarLdFrag addr:$src3)),
6936 (_.EltVT ZeroFP)))))),
6937 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6938 VR128X:$src1, VK1WM:$mask,
6939 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6941 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6942 (X86selects VK1WM:$mask,
6943 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6944 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6945 (_.EltVT ZeroFP)))))),
6946 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6947 VR128X:$src1, VK1WM:$mask,
6948 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6950 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6951 (X86selects VK1WM:$mask,
6952 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6953 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6954 (_.EltVT ZeroFP)))))),
6955 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6956 VR128X:$src1, VK1WM:$mask,
6957 (COPY_TO_REGCLASS _.FRC:$src2, VR128X), addr:$src3)>;
6959 // Patterns with rounding mode.
6960 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6962 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6963 _.FRC:$src3, (i32 imm:$rc)))))),
6964 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6965 VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6966 (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
6968 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6969 (X86selects VK1WM:$mask,
6971 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6972 _.FRC:$src3, (i32 imm:$rc)),
6973 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6974 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6975 VR128X:$src1, VK1WM:$mask,
6976 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6977 (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
6979 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6980 (X86selects VK1WM:$mask,
6981 (RndOp _.FRC:$src2, _.FRC:$src3,
6982 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6984 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6985 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6986 VR128X:$src1, VK1WM:$mask,
6987 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6988 (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
6990 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6991 (X86selects VK1WM:$mask,
6993 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6994 _.FRC:$src3, (i32 imm:$rc)),
6995 (_.EltVT ZeroFP)))))),
6996 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6997 VR128X:$src1, VK1WM:$mask,
6998 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6999 (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
7001 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7002 (X86selects VK1WM:$mask,
7003 (RndOp _.FRC:$src2, _.FRC:$src3,
7004 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7006 (_.EltVT ZeroFP)))))),
7007 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7008 VR128X:$src1, VK1WM:$mask,
7009 (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
7010 (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
7014 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7015 X86Movss, v4f32x_info, fp32imm0>;
7016 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7017 X86Movss, v4f32x_info, fp32imm0>;
7018 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7019 X86Movss, v4f32x_info, fp32imm0>;
7020 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7021 X86Movss, v4f32x_info, fp32imm0>;
7023 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7024 X86Movsd, v2f64x_info, fp64imm0>;
7025 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7026 X86Movsd, v2f64x_info, fp64imm0>;
7027 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7028 X86Movsd, v2f64x_info, fp64imm0>;
7029 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7030 X86Movsd, v2f64x_info, fp64imm0>;
7032 //===----------------------------------------------------------------------===//
7033 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7034 //===----------------------------------------------------------------------===//
7035 let Constraints = "$src1 = $dst" in {
7036 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7037 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7038 // NOTE: The SDNode have the multiply operands first with the add last.
7039 // This enables commuted load patterns to be autogenerated by tablegen.
7040 let ExeDomain = _.ExeDomain in {
7041 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7042 (ins _.RC:$src2, _.RC:$src3),
7043 OpcodeStr, "$src3, $src2", "$src2, $src3",
7044 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7045 AVX512FMA3Base, Sched<[sched]>;
7047 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7048 (ins _.RC:$src2, _.MemOp:$src3),
7049 OpcodeStr, "$src3, $src2", "$src2, $src3",
7050 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7051 AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
7053 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7054 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7055 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7056 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7058 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7060 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
7063 } // Constraints = "$src1 = $dst"
7065 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7066 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7067 let Predicates = [HasIFMA] in {
7068 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7069 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7071 let Predicates = [HasVLX, HasIFMA] in {
7072 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7073 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7074 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7075 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7079 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7080 SchedWriteVecIMul, avx512vl_i64_info>,
7082 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7083 SchedWriteVecIMul, avx512vl_i64_info>,
7086 //===----------------------------------------------------------------------===//
7087 // AVX-512 Scalar convert from sign integer to float/double
7088 //===----------------------------------------------------------------------===//
7090 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7091 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7092 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7093 let hasSideEffects = 0 in {
7094 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7095 (ins DstVT.FRC:$src1, SrcRC:$src),
7096 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7097 EVEX_4V, Sched<[sched]>;
7099 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7100 (ins DstVT.FRC:$src1, x86memop:$src),
7101 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7102 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7103 } // hasSideEffects = 0
7104 let isCodeGenOnly = 1 in {
7105 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7106 (ins DstVT.RC:$src1, SrcRC:$src2),
7107 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7108 [(set DstVT.RC:$dst,
7109 (OpNode (DstVT.VT DstVT.RC:$src1),
7111 (i32 FROUND_CURRENT)))]>,
7112 EVEX_4V, Sched<[sched]>;
7114 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7115 (ins DstVT.RC:$src1, x86memop:$src2),
7116 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7117 [(set DstVT.RC:$dst,
7118 (OpNode (DstVT.VT DstVT.RC:$src1),
7119 (ld_frag addr:$src2),
7120 (i32 FROUND_CURRENT)))]>,
7121 EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7122 }//isCodeGenOnly = 1
7125 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7126 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7127 X86VectorVTInfo DstVT, string asm> {
7128 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7129 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7131 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7132 [(set DstVT.RC:$dst,
7133 (OpNode (DstVT.VT DstVT.RC:$src1),
7136 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7139 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7140 X86FoldableSchedWrite sched,
7141 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7142 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7143 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7144 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7145 ld_frag, asm>, VEX_LIG;
7148 let Predicates = [HasAVX512] in {
7149 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7150 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7151 XS, EVEX_CD8<32, CD8VT1>;
7152 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7153 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7154 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7155 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7156 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7157 XD, EVEX_CD8<32, CD8VT1>;
7158 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7159 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7160 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7162 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7163 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7164 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7165 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7167 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7168 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7169 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7170 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7171 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7172 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7173 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7174 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7176 def : Pat<(f32 (sint_to_fp GR32:$src)),
7177 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7178 def : Pat<(f32 (sint_to_fp GR64:$src)),
7179 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7180 def : Pat<(f64 (sint_to_fp GR32:$src)),
7181 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7182 def : Pat<(f64 (sint_to_fp GR64:$src)),
7183 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7185 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7186 v4f32x_info, i32mem, loadi32,
7187 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7188 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7189 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7190 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7191 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7192 i32mem, loadi32, "cvtusi2sd{l}">,
7193 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7194 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7195 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7196 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7198 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7199 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7200 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7201 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7203 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7204 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7205 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7206 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7207 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7208 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7209 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7210 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7212 def : Pat<(f32 (uint_to_fp GR32:$src)),
7213 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7214 def : Pat<(f32 (uint_to_fp GR64:$src)),
7215 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7216 def : Pat<(f64 (uint_to_fp GR32:$src)),
7217 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7218 def : Pat<(f64 (uint_to_fp GR64:$src)),
7219 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7222 //===----------------------------------------------------------------------===//
7223 // AVX-512 Scalar convert from float/double to integer
7224 //===----------------------------------------------------------------------===//
7226 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7227 X86VectorVTInfo DstVT, SDNode OpNode,
7228 X86FoldableSchedWrite sched, string asm,
7230 bit CodeGenOnly = 1> {
7231 let Predicates = [HasAVX512] in {
7232 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7233 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7234 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
7235 EVEX, VEX_LIG, Sched<[sched]>;
7236 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7237 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7238 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7239 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7241 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7242 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7243 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7244 [(set DstVT.RC:$dst, (OpNode
7245 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
7246 (i32 FROUND_CURRENT)))]>,
7247 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7249 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7250 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7251 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7252 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7253 } // Predicates = [HasAVX512]
7256 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7257 X86VectorVTInfo DstVT, SDNode OpNode,
7258 X86FoldableSchedWrite sched, string asm,
7260 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
7261 let Predicates = [HasAVX512] in {
7262 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7263 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7264 SrcVT.IntScalarMemOp:$src), 0, "att">;
7265 } // Predicates = [HasAVX512]
7268 // Convert float/double to signed/unsigned int 32/64
7269 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
7270 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
7271 XS, EVEX_CD8<32, CD8VT1>;
7272 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
7273 X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
7274 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7275 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
7276 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
7277 XS, EVEX_CD8<32, CD8VT1>;
7278 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
7279 X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
7280 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7281 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
7282 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
7283 XD, EVEX_CD8<64, CD8VT1>;
7284 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
7285 X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
7286 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7287 defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
7288 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7289 XD, EVEX_CD8<64, CD8VT1>;
7290 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
7291 X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7292 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7294 // The SSE version of these instructions are disabled for AVX512.
7295 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
7296 let Predicates = [HasAVX512] in {
7297 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
7298 (VCVTSS2SIZrr_Int VR128X:$src)>;
7299 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
7300 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
7301 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
7302 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
7303 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
7304 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
7305 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
7306 (VCVTSD2SIZrr_Int VR128X:$src)>;
7307 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
7308 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
7309 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
7310 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
7311 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
7312 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
7315 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7316 // which produce unnecessary vmovs{s,d} instructions
7317 let Predicates = [HasAVX512] in {
7318 def : Pat<(v4f32 (X86Movss
7319 (v4f32 VR128X:$dst),
7320 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7321 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7323 def : Pat<(v4f32 (X86Movss
7324 (v4f32 VR128X:$dst),
7325 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7326 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7328 def : Pat<(v4f32 (X86Movss
7329 (v4f32 VR128X:$dst),
7330 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7331 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7333 def : Pat<(v4f32 (X86Movss
7334 (v4f32 VR128X:$dst),
7335 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7336 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7338 def : Pat<(v2f64 (X86Movsd
7339 (v2f64 VR128X:$dst),
7340 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7341 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7343 def : Pat<(v2f64 (X86Movsd
7344 (v2f64 VR128X:$dst),
7345 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7346 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7348 def : Pat<(v2f64 (X86Movsd
7349 (v2f64 VR128X:$dst),
7350 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7351 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7353 def : Pat<(v2f64 (X86Movsd
7354 (v2f64 VR128X:$dst),
7355 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7356 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7358 def : Pat<(v4f32 (X86Movss
7359 (v4f32 VR128X:$dst),
7360 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7361 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7363 def : Pat<(v4f32 (X86Movss
7364 (v4f32 VR128X:$dst),
7365 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7366 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7368 def : Pat<(v4f32 (X86Movss
7369 (v4f32 VR128X:$dst),
7370 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7371 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7373 def : Pat<(v4f32 (X86Movss
7374 (v4f32 VR128X:$dst),
7375 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7376 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7378 def : Pat<(v2f64 (X86Movsd
7379 (v2f64 VR128X:$dst),
7380 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7381 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7383 def : Pat<(v2f64 (X86Movsd
7384 (v2f64 VR128X:$dst),
7385 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7386 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7388 def : Pat<(v2f64 (X86Movsd
7389 (v2f64 VR128X:$dst),
7390 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7391 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7393 def : Pat<(v2f64 (X86Movsd
7394 (v2f64 VR128X:$dst),
7395 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7396 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7397 } // Predicates = [HasAVX512]
7399 // Convert float/double to signed/unsigned int 32/64 with truncation
7400 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7401 X86VectorVTInfo _DstRC, SDNode OpNode,
7402 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7403 string aliasStr, bit CodeGenOnly = 1>{
7404 let Predicates = [HasAVX512] in {
7405 let isCodeGenOnly = 1 in {
7406 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7407 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7408 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7409 EVEX, Sched<[sched]>;
7410 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7411 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7412 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7413 EVEX, Sched<[sched.Folded, ReadAfterLd]>;
7416 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7417 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7418 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7419 (i32 FROUND_CURRENT)))]>,
7420 EVEX, VEX_LIG, Sched<[sched]>;
7421 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7422 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7423 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7424 (i32 FROUND_NO_EXC)))]>,
7425 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7426 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7427 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7428 (ins _SrcRC.IntScalarMemOp:$src),
7429 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7430 [(set _DstRC.RC:$dst, (OpNodeRnd
7431 (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
7432 (i32 FROUND_CURRENT)))]>,
7433 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7435 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7436 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7437 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7438 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7442 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7443 X86VectorVTInfo _SrcRC,
7444 X86VectorVTInfo _DstRC, SDNode OpNode,
7445 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7447 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
7449 let Predicates = [HasAVX512] in {
7450 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7451 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7452 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7456 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7457 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
7458 XS, EVEX_CD8<32, CD8VT1>;
7459 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7460 fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
7461 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7462 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7463 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
7464 XD, EVEX_CD8<64, CD8VT1>;
7465 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7466 fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
7467 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7469 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7470 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
7471 XS, EVEX_CD8<32, CD8VT1>;
7472 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7473 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
7474 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7475 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7476 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
7477 XD, EVEX_CD8<64, CD8VT1>;
7478 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7479 fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
7480 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7482 let Predicates = [HasAVX512] in {
7483 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
7484 (VCVTTSS2SIZrr_Int VR128X:$src)>;
7485 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
7486 (VCVTTSS2SIZrm_Int ssmem:$src)>;
7487 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
7488 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
7489 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
7490 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
7491 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
7492 (VCVTTSD2SIZrr_Int VR128X:$src)>;
7493 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
7494 (VCVTTSD2SIZrm_Int sdmem:$src)>;
7495 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
7496 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
7497 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
7498 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
7501 //===----------------------------------------------------------------------===//
7502 // AVX-512 Convert form float to double and back
7503 //===----------------------------------------------------------------------===//
7505 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7506 X86VectorVTInfo _Src, SDNode OpNode,
7507 X86FoldableSchedWrite sched> {
7508 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7509 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7510 "$src2, $src1", "$src1, $src2",
7511 (_.VT (OpNode (_.VT _.RC:$src1),
7512 (_Src.VT _Src.RC:$src2),
7513 (i32 FROUND_CURRENT)))>,
7514 EVEX_4V, VEX_LIG, Sched<[sched]>;
7515 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7516 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7517 "$src2, $src1", "$src1, $src2",
7518 (_.VT (OpNode (_.VT _.RC:$src1),
7519 (_Src.VT _Src.ScalarIntMemCPat:$src2),
7520 (i32 FROUND_CURRENT)))>,
7522 Sched<[sched.Folded, ReadAfterLd]>;
7524 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7525 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7526 (ins _.FRC:$src1, _Src.FRC:$src2),
7527 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7528 EVEX_4V, VEX_LIG, Sched<[sched]>;
7530 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7531 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7532 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7533 EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7537 // Scalar Coversion with SAE - suppress all exceptions
7538 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7539 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7540 X86FoldableSchedWrite sched> {
7541 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7542 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7543 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7544 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7545 (_Src.VT _Src.RC:$src2),
7546 (i32 FROUND_NO_EXC)))>,
7547 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7550 // Scalar Conversion with rounding control (RC)
7551 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7552 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7553 X86FoldableSchedWrite sched> {
7554 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7555 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7556 "$rc, $src2, $src1", "$src1, $src2, $rc",
7557 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7558 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7559 EVEX_4V, VEX_LIG, Sched<[sched]>,
7562 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7563 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7564 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7565 let Predicates = [HasAVX512] in {
7566 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7567 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7568 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7572 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7573 X86FoldableSchedWrite sched,
7574 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7575 let Predicates = [HasAVX512] in {
7576 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7577 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7578 EVEX_CD8<32, CD8VT1>, XS;
7581 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7582 X86froundRnd, WriteCvtSD2SS, f64x_info,
7584 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7585 X86fpextRnd, WriteCvtSS2SD, f32x_info,
7588 def : Pat<(f64 (fpextend FR32X:$src)),
7589 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7590 Requires<[HasAVX512]>;
7591 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7592 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7593 Requires<[HasAVX512, OptForSize]>;
7595 def : Pat<(f64 (extloadf32 addr:$src)),
7596 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7597 Requires<[HasAVX512, OptForSize]>;
7599 def : Pat<(f64 (extloadf32 addr:$src)),
7600 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7601 Requires<[HasAVX512, OptForSpeed]>;
7603 def : Pat<(f32 (fpround FR64X:$src)),
7604 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7605 Requires<[HasAVX512]>;
7607 def : Pat<(v4f32 (X86Movss
7608 (v4f32 VR128X:$dst),
7609 (v4f32 (scalar_to_vector
7610 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7611 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7612 Requires<[HasAVX512]>;
7614 def : Pat<(v2f64 (X86Movsd
7615 (v2f64 VR128X:$dst),
7616 (v2f64 (scalar_to_vector
7617 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7618 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7619 Requires<[HasAVX512]>;
7621 //===----------------------------------------------------------------------===//
7622 // AVX-512 Vector convert from signed/unsigned integer to float/double
7623 // and from float/double to signed/unsigned integer
7624 //===----------------------------------------------------------------------===//
7626 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7627 X86VectorVTInfo _Src, SDNode OpNode,
7628 X86FoldableSchedWrite sched,
7629 string Broadcast = _.BroadcastStr,
7630 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7632 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7633 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7634 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7635 EVEX, Sched<[sched]>;
7637 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7638 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7639 (_.VT (OpNode (_Src.VT
7640 (bitconvert (_Src.LdFrag addr:$src)))))>,
7641 EVEX, Sched<[sched.Folded]>;
7643 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7644 (ins _Src.ScalarMemOp:$src), OpcodeStr,
7645 "${src}"##Broadcast, "${src}"##Broadcast,
7646 (_.VT (OpNode (_Src.VT
7647 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7649 Sched<[sched.Folded]>;
7651 // Coversion with SAE - suppress all exceptions
7652 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7653 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7654 X86FoldableSchedWrite sched> {
7655 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7656 (ins _Src.RC:$src), OpcodeStr,
7657 "{sae}, $src", "$src, {sae}",
7658 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7659 (i32 FROUND_NO_EXC)))>,
7660 EVEX, EVEX_B, Sched<[sched]>;
7663 // Conversion with rounding control (RC)
7664 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7665 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7666 X86FoldableSchedWrite sched> {
7667 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7668 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7669 "$rc, $src", "$src, $rc",
7670 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7671 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7674 // Extend Float to Double
7675 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7676 X86SchedWriteWidths sched> {
7677 let Predicates = [HasAVX512] in {
7678 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7679 fpextend, sched.ZMM>,
7680 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7681 X86vfpextRnd, sched.ZMM>, EVEX_V512;
7683 let Predicates = [HasVLX] in {
7684 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7685 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7686 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7687 sched.YMM>, EVEX_V256;
7691 // Truncate Double to Float
7692 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7693 let Predicates = [HasAVX512] in {
7694 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7695 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7696 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7698 let Predicates = [HasVLX] in {
7699 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7700 X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7701 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7702 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7704 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7705 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7706 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7707 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7708 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7709 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7710 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7711 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7715 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7716 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7717 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7718 PS, EVEX_CD8<32, CD8VH>;
7720 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7721 (VCVTPS2PDZrm addr:$src)>;
7723 let Predicates = [HasVLX] in {
7724 let AddedComplexity = 15 in {
7725 def : Pat<(X86vzmovl (v2f64 (bitconvert
7726 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7727 (VCVTPD2PSZ128rr VR128X:$src)>;
7728 def : Pat<(X86vzmovl (v2f64 (bitconvert
7729 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7730 (VCVTPD2PSZ128rm addr:$src)>;
7732 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7733 (VCVTPS2PDZ128rm addr:$src)>;
7734 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7735 (VCVTPS2PDZ256rm addr:$src)>;
7738 // Convert Signed/Unsigned Doubleword to Double
7739 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7740 SDNode OpNode128, X86SchedWriteWidths sched> {
7741 // No rounding in this op
7742 let Predicates = [HasAVX512] in
7743 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7744 sched.ZMM>, EVEX_V512;
7746 let Predicates = [HasVLX] in {
7747 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7748 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7749 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7750 sched.YMM>, EVEX_V256;
7754 // Convert Signed/Unsigned Doubleword to Float
7755 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7756 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7757 let Predicates = [HasAVX512] in
7758 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7760 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7761 OpNodeRnd, sched.ZMM>, EVEX_V512;
7763 let Predicates = [HasVLX] in {
7764 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7765 sched.XMM>, EVEX_V128;
7766 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7767 sched.YMM>, EVEX_V256;
7771 // Convert Float to Signed/Unsigned Doubleword with truncation
7772 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7773 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7774 let Predicates = [HasAVX512] in {
7775 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7777 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7778 OpNodeRnd, sched.ZMM>, EVEX_V512;
7780 let Predicates = [HasVLX] in {
7781 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7782 sched.XMM>, EVEX_V128;
7783 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7784 sched.YMM>, EVEX_V256;
7788 // Convert Float to Signed/Unsigned Doubleword
7789 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7790 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7791 let Predicates = [HasAVX512] in {
7792 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7794 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7795 OpNodeRnd, sched.ZMM>, EVEX_V512;
7797 let Predicates = [HasVLX] in {
7798 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7799 sched.XMM>, EVEX_V128;
7800 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7801 sched.YMM>, EVEX_V256;
7805 // Convert Double to Signed/Unsigned Doubleword with truncation
7806 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7807 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7808 let Predicates = [HasAVX512] in {
7809 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7811 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7812 OpNodeRnd, sched.ZMM>, EVEX_V512;
7814 let Predicates = [HasVLX] in {
7815 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7816 // memory forms of these instructions in Asm Parser. They have the same
7817 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7818 // due to the same reason.
7819 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7820 OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7821 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7822 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7824 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7825 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7826 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7827 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7828 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7829 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7830 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7831 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7835 // Convert Double to Signed/Unsigned Doubleword
7836 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7837 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7838 let Predicates = [HasAVX512] in {
7839 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7841 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7842 OpNodeRnd, sched.ZMM>, EVEX_V512;
7844 let Predicates = [HasVLX] in {
7845 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7846 // memory forms of these instructions in Asm Parcer. They have the same
7847 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7848 // due to the same reason.
7849 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7850 sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7851 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7852 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7854 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7855 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7856 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7857 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7858 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7859 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7860 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7861 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7865 // Convert Double to Signed/Unsigned Quardword
7866 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7867 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7868 let Predicates = [HasDQI] in {
7869 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7871 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7872 OpNodeRnd, sched.ZMM>, EVEX_V512;
7874 let Predicates = [HasDQI, HasVLX] in {
7875 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7876 sched.XMM>, EVEX_V128;
7877 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7878 sched.YMM>, EVEX_V256;
7882 // Convert Double to Signed/Unsigned Quardword with truncation
7883 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7884 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7885 let Predicates = [HasDQI] in {
7886 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7888 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7889 OpNodeRnd, sched.ZMM>, EVEX_V512;
7891 let Predicates = [HasDQI, HasVLX] in {
7892 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7893 sched.XMM>, EVEX_V128;
7894 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7895 sched.YMM>, EVEX_V256;
7899 // Convert Signed/Unsigned Quardword to Double
7900 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7901 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7902 let Predicates = [HasDQI] in {
7903 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7905 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7906 OpNodeRnd, sched.ZMM>, EVEX_V512;
7908 let Predicates = [HasDQI, HasVLX] in {
7909 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7910 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7911 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7912 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7916 // Convert Float to Signed/Unsigned Quardword
7917 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7918 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7919 let Predicates = [HasDQI] in {
7920 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7922 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7923 OpNodeRnd, sched.ZMM>, EVEX_V512;
7925 let Predicates = [HasDQI, HasVLX] in {
7926 // Explicitly specified broadcast string, since we take only 2 elements
7927 // from v4f32x_info source
7928 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7929 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7930 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7931 sched.YMM>, EVEX_V256;
7935 // Convert Float to Signed/Unsigned Quardword with truncation
7936 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7937 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7938 let Predicates = [HasDQI] in {
7939 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7940 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7941 OpNodeRnd, sched.ZMM>, EVEX_V512;
7943 let Predicates = [HasDQI, HasVLX] in {
7944 // Explicitly specified broadcast string, since we take only 2 elements
7945 // from v4f32x_info source
7946 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7947 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7948 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7949 sched.YMM>, EVEX_V256;
7953 // Convert Signed/Unsigned Quardword to Float
7954 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7955 SDNode OpNode128, SDNode OpNodeRnd,
7956 X86SchedWriteWidths sched> {
7957 let Predicates = [HasDQI] in {
7958 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7960 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7961 OpNodeRnd, sched.ZMM>, EVEX_V512;
7963 let Predicates = [HasDQI, HasVLX] in {
7964 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7965 // memory forms of these instructions in Asm Parcer. They have the same
7966 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7967 // due to the same reason.
7968 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7969 sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7970 NotEVEX2VEXConvertible;
7971 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7972 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7973 NotEVEX2VEXConvertible;
7975 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7976 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7977 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7978 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7979 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7980 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7981 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7982 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7986 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7987 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
7989 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7990 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
7991 PS, EVEX_CD8<32, CD8VF>;
7993 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
7994 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
7995 XS, EVEX_CD8<32, CD8VF>;
7997 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
7998 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
7999 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8001 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8002 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
8003 EVEX_CD8<32, CD8VF>;
8005 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8006 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
8007 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8009 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8010 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8011 EVEX_CD8<32, CD8VH>;
8013 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8014 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8015 EVEX_CD8<32, CD8VF>;
8017 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8018 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8019 EVEX_CD8<32, CD8VF>;
8021 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8022 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8023 VEX_W, EVEX_CD8<64, CD8VF>;
8025 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8026 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8027 PS, EVEX_CD8<32, CD8VF>;
8029 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8030 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8031 PS, EVEX_CD8<64, CD8VF>;
8033 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8034 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8035 PD, EVEX_CD8<64, CD8VF>;
8037 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8038 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8039 EVEX_CD8<32, CD8VH>;
8041 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8042 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8043 PD, EVEX_CD8<64, CD8VF>;
8045 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8046 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8047 EVEX_CD8<32, CD8VH>;
8049 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8050 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8051 PD, EVEX_CD8<64, CD8VF>;
8053 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8054 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8055 EVEX_CD8<32, CD8VH>;
8057 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8058 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8059 PD, EVEX_CD8<64, CD8VF>;
8061 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8062 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8063 EVEX_CD8<32, CD8VH>;
8065 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8066 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8067 EVEX_CD8<64, CD8VF>;
8069 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8070 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8071 EVEX_CD8<64, CD8VF>;
8073 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8074 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8075 EVEX_CD8<64, CD8VF>;
8077 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8078 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8079 EVEX_CD8<64, CD8VF>;
8081 let Predicates = [HasAVX512] in {
8082 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8083 (VCVTTPS2DQZrr VR512:$src)>;
8084 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8085 (VCVTTPS2DQZrm addr:$src)>;
8087 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8088 (VCVTTPS2UDQZrr VR512:$src)>;
8089 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8090 (VCVTTPS2UDQZrm addr:$src)>;
8092 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8093 (VCVTTPD2DQZrr VR512:$src)>;
8094 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8095 (VCVTTPD2DQZrm addr:$src)>;
8097 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8098 (VCVTTPD2UDQZrr VR512:$src)>;
8099 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8100 (VCVTTPD2UDQZrm addr:$src)>;
8103 let Predicates = [HasVLX] in {
8104 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8105 (VCVTTPS2DQZ128rr VR128X:$src)>;
8106 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8107 (VCVTTPS2DQZ128rm addr:$src)>;
8109 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8110 (VCVTTPS2UDQZ128rr VR128X:$src)>;
8111 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8112 (VCVTTPS2UDQZ128rm addr:$src)>;
8114 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8115 (VCVTTPS2DQZ256rr VR256X:$src)>;
8116 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8117 (VCVTTPS2DQZ256rm addr:$src)>;
8119 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8120 (VCVTTPS2UDQZ256rr VR256X:$src)>;
8121 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8122 (VCVTTPS2UDQZ256rm addr:$src)>;
8124 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8125 (VCVTTPD2DQZ256rr VR256X:$src)>;
8126 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8127 (VCVTTPD2DQZ256rm addr:$src)>;
8129 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8130 (VCVTTPD2UDQZ256rr VR256X:$src)>;
8131 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8132 (VCVTTPD2UDQZ256rm addr:$src)>;
8135 let Predicates = [HasDQI] in {
8136 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8137 (VCVTTPS2QQZrr VR256X:$src)>;
8138 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8139 (VCVTTPS2QQZrm addr:$src)>;
8141 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8142 (VCVTTPS2UQQZrr VR256X:$src)>;
8143 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8144 (VCVTTPS2UQQZrm addr:$src)>;
8146 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8147 (VCVTTPD2QQZrr VR512:$src)>;
8148 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8149 (VCVTTPD2QQZrm addr:$src)>;
8151 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8152 (VCVTTPD2UQQZrr VR512:$src)>;
8153 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8154 (VCVTTPD2UQQZrm addr:$src)>;
8157 let Predicates = [HasDQI, HasVLX] in {
8158 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8159 (VCVTTPS2QQZ256rr VR128X:$src)>;
8160 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8161 (VCVTTPS2QQZ256rm addr:$src)>;
8163 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8164 (VCVTTPS2UQQZ256rr VR128X:$src)>;
8165 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8166 (VCVTTPS2UQQZ256rm addr:$src)>;
8168 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8169 (VCVTTPD2QQZ128rr VR128X:$src)>;
8170 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8171 (VCVTTPD2QQZ128rm addr:$src)>;
8173 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8174 (VCVTTPD2UQQZ128rr VR128X:$src)>;
8175 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8176 (VCVTTPD2UQQZ128rm addr:$src)>;
8178 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8179 (VCVTTPD2QQZ256rr VR256X:$src)>;
8180 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8181 (VCVTTPD2QQZ256rm addr:$src)>;
8183 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8184 (VCVTTPD2UQQZ256rr VR256X:$src)>;
8185 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8186 (VCVTTPD2UQQZ256rm addr:$src)>;
8189 let Predicates = [HasAVX512, NoVLX] in {
8190 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8191 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8192 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8193 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8195 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8196 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8197 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8198 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8200 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8201 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8202 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8203 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8205 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8206 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8207 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8208 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8210 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8211 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8212 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8213 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8215 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8216 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8217 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8218 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8220 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8221 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8222 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8223 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8226 let Predicates = [HasAVX512, HasVLX] in {
8227 let AddedComplexity = 15 in {
8228 def : Pat<(X86vzmovl (v2i64 (bitconvert
8229 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8230 (VCVTPD2DQZ128rr VR128X:$src)>;
8231 def : Pat<(X86vzmovl (v2i64 (bitconvert
8232 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8233 (VCVTPD2DQZ128rm addr:$src)>;
8234 def : Pat<(X86vzmovl (v2i64 (bitconvert
8235 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8236 (VCVTPD2UDQZ128rr VR128X:$src)>;
8237 def : Pat<(X86vzmovl (v2i64 (bitconvert
8238 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8239 (VCVTTPD2DQZ128rr VR128X:$src)>;
8240 def : Pat<(X86vzmovl (v2i64 (bitconvert
8241 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8242 (VCVTTPD2DQZ128rm addr:$src)>;
8243 def : Pat<(X86vzmovl (v2i64 (bitconvert
8244 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8245 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8248 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8249 (VCVTDQ2PDZ128rm addr:$src)>;
8250 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8251 (VCVTDQ2PDZ128rm addr:$src)>;
8253 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8254 (VCVTUDQ2PDZ128rm addr:$src)>;
8255 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8256 (VCVTUDQ2PDZ128rm addr:$src)>;
8259 let Predicates = [HasAVX512] in {
8260 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8261 (VCVTPD2PSZrm addr:$src)>;
8262 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8263 (VCVTPS2PDZrm addr:$src)>;
8266 let Predicates = [HasDQI, HasVLX] in {
8267 let AddedComplexity = 15 in {
8268 def : Pat<(X86vzmovl (v2f64 (bitconvert
8269 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8270 (VCVTQQ2PSZ128rr VR128X:$src)>;
8271 def : Pat<(X86vzmovl (v2f64 (bitconvert
8272 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8273 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8277 let Predicates = [HasDQI, NoVLX] in {
8278 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8279 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8280 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8281 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8283 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8284 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8285 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8286 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8288 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8289 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8290 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8291 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8293 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8294 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8295 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8296 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8298 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8299 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8300 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8301 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8303 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8304 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8305 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8306 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8308 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8309 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8310 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8311 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8313 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8314 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8315 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8316 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8318 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8319 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8320 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8321 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8323 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8324 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8325 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8326 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8328 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8329 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8330 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8331 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8333 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8334 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8335 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8336 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8339 //===----------------------------------------------------------------------===//
8340 // Half precision conversion instructions
8341 //===----------------------------------------------------------------------===//
8343 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8344 X86MemOperand x86memop, PatFrag ld_frag,
8345 X86FoldableSchedWrite sched> {
8346 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8347 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8348 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8349 T8PD, Sched<[sched]>;
8350 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8351 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8352 (X86cvtph2ps (_src.VT
8354 (ld_frag addr:$src))))>,
8355 T8PD, Sched<[sched.Folded]>;
8358 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8359 X86FoldableSchedWrite sched> {
8360 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8361 (ins _src.RC:$src), "vcvtph2ps",
8362 "{sae}, $src", "$src, {sae}",
8363 (X86cvtph2psRnd (_src.VT _src.RC:$src),
8364 (i32 FROUND_NO_EXC))>,
8365 T8PD, EVEX_B, Sched<[sched]>;
8368 let Predicates = [HasAVX512] in
8369 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8371 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8372 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8374 let Predicates = [HasVLX] in {
8375 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8376 loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8377 EVEX_CD8<32, CD8VH>;
8378 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8379 loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8380 EVEX_CD8<32, CD8VH>;
8382 // Pattern match vcvtph2ps of a scalar i64 load.
8383 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8384 (VCVTPH2PSZ128rm addr:$src)>;
8385 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8386 (VCVTPH2PSZ128rm addr:$src)>;
8387 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8388 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8389 (VCVTPH2PSZ128rm addr:$src)>;
8392 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8393 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8394 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8395 (ins _src.RC:$src1, i32u8imm:$src2),
8396 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8397 (X86cvtps2ph (_src.VT _src.RC:$src1),
8398 (i32 imm:$src2)), 0, 0>,
8399 AVX512AIi8Base, Sched<[RR]>;
8400 let hasSideEffects = 0, mayStore = 1 in {
8401 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8402 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8403 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8405 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8406 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8407 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8408 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8412 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8414 let hasSideEffects = 0 in
8415 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8416 (outs _dest.RC:$dst),
8417 (ins _src.RC:$src1, i32u8imm:$src2),
8418 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8419 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8422 let Predicates = [HasAVX512] in {
8423 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8424 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8425 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8426 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8427 let Predicates = [HasVLX] in {
8428 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8429 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8430 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8431 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8432 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8433 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8436 def : Pat<(store (f64 (extractelt
8437 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8438 (iPTR 0))), addr:$dst),
8439 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8440 def : Pat<(store (i64 (extractelt
8441 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8442 (iPTR 0))), addr:$dst),
8443 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8444 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8445 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8446 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8447 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8450 // Patterns for matching conversions from float to half-float and vice versa.
8451 let Predicates = [HasVLX] in {
8452 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8453 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8454 // configurations we support (the default). However, falling back to MXCSR is
8455 // more consistent with other instructions, which are always controlled by it.
8456 // It's encoded as 0b100.
8457 def : Pat<(fp_to_f16 FR32X:$src),
8458 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
8459 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
8461 def : Pat<(f16_to_fp GR16:$src),
8462 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
8463 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
8465 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8466 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
8467 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
8470 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8471 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8472 string OpcodeStr, X86FoldableSchedWrite sched> {
8473 let hasSideEffects = 0 in
8474 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8475 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8476 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8479 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8480 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8481 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8482 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8483 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8484 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8485 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8486 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8487 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8490 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8491 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8492 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8493 EVEX_CD8<32, CD8VT1>;
8494 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8495 "ucomisd", WriteFCom>, PD, EVEX,
8496 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8497 let Pattern = []<dag> in {
8498 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8499 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8500 EVEX_CD8<32, CD8VT1>;
8501 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8502 "comisd", WriteFCom>, PD, EVEX,
8503 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8505 let isCodeGenOnly = 1 in {
8506 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8507 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8508 EVEX_CD8<32, CD8VT1>;
8509 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8510 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8511 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8513 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8514 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8515 EVEX_CD8<32, CD8VT1>;
8516 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8517 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8518 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8522 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8523 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8524 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8525 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8526 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8527 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8528 "$src2, $src1", "$src1, $src2",
8529 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8530 EVEX_4V, Sched<[sched]>;
8531 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8532 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8533 "$src2, $src1", "$src1, $src2",
8534 (OpNode (_.VT _.RC:$src1),
8535 _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8536 Sched<[sched.Folded, ReadAfterLd]>;
8540 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8541 f32x_info>, EVEX_CD8<32, CD8VT1>,
8543 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8544 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8546 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8547 SchedWriteFRsqrt.Scl, f32x_info>,
8548 EVEX_CD8<32, CD8VT1>, T8PD;
8549 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8550 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8551 EVEX_CD8<64, CD8VT1>, T8PD;
8553 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8554 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8555 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8556 let ExeDomain = _.ExeDomain in {
8557 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8558 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8559 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8561 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8562 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8564 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8565 Sched<[sched.Folded, ReadAfterLd]>;
8566 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8567 (ins _.ScalarMemOp:$src), OpcodeStr,
8568 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8570 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8571 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8575 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8576 X86SchedWriteWidths sched> {
8577 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8578 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8579 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8580 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8582 // Define only if AVX512VL feature is present.
8583 let Predicates = [HasVLX] in {
8584 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8585 OpNode, sched.XMM, v4f32x_info>,
8586 EVEX_V128, EVEX_CD8<32, CD8VF>;
8587 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8588 OpNode, sched.YMM, v8f32x_info>,
8589 EVEX_V256, EVEX_CD8<32, CD8VF>;
8590 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8591 OpNode, sched.XMM, v2f64x_info>,
8592 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8593 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8594 OpNode, sched.YMM, v4f64x_info>,
8595 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8599 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8600 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8602 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8603 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8604 SDNode OpNode, X86FoldableSchedWrite sched> {
8605 let ExeDomain = _.ExeDomain in {
8606 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8607 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8608 "$src2, $src1", "$src1, $src2",
8609 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8610 (i32 FROUND_CURRENT))>,
8613 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8614 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8615 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8616 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8617 (i32 FROUND_NO_EXC))>, EVEX_B,
8620 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8621 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8622 "$src2, $src1", "$src1, $src2",
8623 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8624 (i32 FROUND_CURRENT))>,
8625 Sched<[sched.Folded, ReadAfterLd]>;
8629 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8630 X86FoldableSchedWrite sched> {
8631 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8632 EVEX_CD8<32, CD8VT1>;
8633 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8634 EVEX_CD8<64, CD8VT1>, VEX_W;
8637 let Predicates = [HasERI] in {
8638 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8640 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8641 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8644 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8645 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8646 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8648 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8649 SDNode OpNode, X86FoldableSchedWrite sched> {
8650 let ExeDomain = _.ExeDomain in {
8651 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8652 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8653 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8656 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8657 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8659 (bitconvert (_.LdFrag addr:$src))),
8660 (i32 FROUND_CURRENT))>,
8661 Sched<[sched.Folded, ReadAfterLd]>;
8663 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8664 (ins _.ScalarMemOp:$src), OpcodeStr,
8665 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8667 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8668 (i32 FROUND_CURRENT))>, EVEX_B,
8669 Sched<[sched.Folded, ReadAfterLd]>;
8672 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8673 SDNode OpNode, X86FoldableSchedWrite sched> {
8674 let ExeDomain = _.ExeDomain in
8675 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8676 (ins _.RC:$src), OpcodeStr,
8677 "{sae}, $src", "$src, {sae}",
8678 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8679 EVEX_B, Sched<[sched]>;
8682 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8683 X86SchedWriteWidths sched> {
8684 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8685 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8686 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8687 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8688 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8689 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8692 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8693 SDNode OpNode, X86SchedWriteWidths sched> {
8694 // Define only if AVX512VL feature is present.
8695 let Predicates = [HasVLX] in {
8696 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8697 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8698 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8699 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8700 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8701 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8702 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8703 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8707 let Predicates = [HasERI] in {
8708 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8709 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8710 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8712 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8713 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8714 SchedWriteFRnd>, EVEX;
8716 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8717 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8718 let ExeDomain = _.ExeDomain in
8719 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8720 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8721 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8722 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8725 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8726 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8727 let ExeDomain = _.ExeDomain in {
8728 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8729 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8730 (_.VT (fsqrt _.RC:$src))>, EVEX,
8732 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8733 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8735 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8736 Sched<[sched.Folded, ReadAfterLd]>;
8737 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8738 (ins _.ScalarMemOp:$src), OpcodeStr,
8739 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8741 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8742 EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8746 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8747 X86SchedWriteSizes sched> {
8748 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8749 sched.PS.ZMM, v16f32_info>,
8750 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8751 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8752 sched.PD.ZMM, v8f64_info>,
8753 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8754 // Define only if AVX512VL feature is present.
8755 let Predicates = [HasVLX] in {
8756 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8757 sched.PS.XMM, v4f32x_info>,
8758 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8759 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8760 sched.PS.YMM, v8f32x_info>,
8761 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8762 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8763 sched.PD.XMM, v2f64x_info>,
8764 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8765 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8766 sched.PD.YMM, v4f64x_info>,
8767 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8771 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8772 X86SchedWriteSizes sched> {
8773 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8774 sched.PS.ZMM, v16f32_info>,
8775 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8776 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8777 sched.PD.ZMM, v8f64_info>,
8778 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8781 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8782 X86VectorVTInfo _, string Name> {
8783 let ExeDomain = _.ExeDomain in {
8784 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8785 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8786 "$src2, $src1", "$src1, $src2",
8787 (X86fsqrtRnds (_.VT _.RC:$src1),
8789 (i32 FROUND_CURRENT))>,
8791 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8792 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8793 "$src2, $src1", "$src1, $src2",
8794 (X86fsqrtRnds (_.VT _.RC:$src1),
8795 _.ScalarIntMemCPat:$src2,
8796 (i32 FROUND_CURRENT))>,
8797 Sched<[sched.Folded, ReadAfterLd]>;
8798 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8799 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8800 "$rc, $src2, $src1", "$src1, $src2, $rc",
8801 (X86fsqrtRnds (_.VT _.RC:$src1),
8804 EVEX_B, EVEX_RC, Sched<[sched]>;
8806 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8807 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8808 (ins _.FRC:$src1, _.FRC:$src2),
8809 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8812 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8813 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8814 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8815 Sched<[sched.Folded, ReadAfterLd]>;
8819 let Predicates = [HasAVX512] in {
8820 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8821 (!cast<Instruction>(Name#Zr)
8822 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8825 let Predicates = [HasAVX512, OptForSize] in {
8826 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8827 (!cast<Instruction>(Name#Zm)
8828 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8832 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8833 X86SchedWriteSizes sched> {
8834 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8835 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8836 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8837 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8840 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8841 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8843 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8845 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8846 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8847 let ExeDomain = _.ExeDomain in {
8848 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8849 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8850 "$src3, $src2, $src1", "$src1, $src2, $src3",
8851 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8855 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8856 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8857 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8858 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8859 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8862 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8863 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8865 "$src3, $src2, $src1", "$src1, $src2, $src3",
8866 (_.VT (X86RndScales _.RC:$src1,
8867 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8868 Sched<[sched.Folded, ReadAfterLd]>;
8870 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8871 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8872 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8873 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8874 []>, Sched<[sched]>;
8877 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8878 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8879 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8880 []>, Sched<[sched.Folded, ReadAfterLd]>;
8884 let Predicates = [HasAVX512] in {
8885 def : Pat<(ffloor _.FRC:$src),
8886 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8887 _.FRC:$src, (i32 0x9)))>;
8888 def : Pat<(fceil _.FRC:$src),
8889 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8890 _.FRC:$src, (i32 0xa)))>;
8891 def : Pat<(ftrunc _.FRC:$src),
8892 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8893 _.FRC:$src, (i32 0xb)))>;
8894 def : Pat<(frint _.FRC:$src),
8895 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8896 _.FRC:$src, (i32 0x4)))>;
8897 def : Pat<(fnearbyint _.FRC:$src),
8898 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8899 _.FRC:$src, (i32 0xc)))>;
8902 let Predicates = [HasAVX512, OptForSize] in {
8903 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8904 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8905 addr:$src, (i32 0x9)))>;
8906 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8907 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8908 addr:$src, (i32 0xa)))>;
8909 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8910 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8911 addr:$src, (i32 0xb)))>;
8912 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8913 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8914 addr:$src, (i32 0x4)))>;
8915 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8916 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8917 addr:$src, (i32 0xc)))>;
8921 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8922 SchedWriteFRnd.Scl, f32x_info>,
8923 AVX512AIi8Base, EVEX_4V,
8924 EVEX_CD8<32, CD8VT1>;
8926 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8927 SchedWriteFRnd.Scl, f64x_info>,
8928 VEX_W, AVX512AIi8Base, EVEX_4V,
8929 EVEX_CD8<64, CD8VT1>;
8931 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8932 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8933 dag OutMask, Predicate BasePredicate> {
8934 let Predicates = [BasePredicate] in {
8935 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8936 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8937 (extractelt _.VT:$dst, (iPTR 0))))),
8938 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8939 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8941 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8942 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8944 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8945 OutMask, _.VT:$src2, _.VT:$src1)>;
8949 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8950 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8951 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8952 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8953 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8954 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8956 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8957 X86VectorVTInfo _, PatLeaf ZeroFP,
8958 bits<8> ImmV, Predicate BasePredicate> {
8959 let Predicates = [BasePredicate] in {
8960 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8961 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8962 (extractelt _.VT:$dst, (iPTR 0))))),
8963 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8964 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8966 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8967 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8968 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8969 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8973 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8974 v4f32x_info, fp32imm0, 0x01, HasAVX512>;
8975 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8976 v4f32x_info, fp32imm0, 0x02, HasAVX512>;
8977 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8978 v2f64x_info, fp64imm0, 0x01, HasAVX512>;
8979 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8980 v2f64x_info, fp64imm0, 0x02, HasAVX512>;
8983 //-------------------------------------------------
8984 // Integer truncate and extend operations
8985 //-------------------------------------------------
8987 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8988 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
8989 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8990 let ExeDomain = DestInfo.ExeDomain in
8991 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8992 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8993 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
8994 EVEX, T8XS, Sched<[sched]>;
8996 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
8997 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8998 (ins x86memop:$dst, SrcInfo.RC:$src),
8999 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9000 EVEX, Sched<[sched.Folded]>;
9002 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9003 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9004 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9005 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9006 }//mayStore = 1, hasSideEffects = 0
9009 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9010 X86VectorVTInfo DestInfo,
9011 PatFrag truncFrag, PatFrag mtruncFrag,
9014 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9015 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9016 addr:$dst, SrcInfo.RC:$src)>;
9018 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
9019 (SrcInfo.VT SrcInfo.RC:$src)),
9020 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9021 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9024 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9025 SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
9026 AVX512VLVectorVTInfo VTSrcInfo,
9027 X86VectorVTInfo DestInfoZ128,
9028 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9029 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9030 X86MemOperand x86memopZ, PatFrag truncFrag,
9031 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9033 let Predicates = [HasVLX, prd] in {
9034 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
9035 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9036 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9037 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9039 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
9040 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9041 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9042 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9044 let Predicates = [prd] in
9045 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
9046 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9047 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9048 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9051 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9052 X86FoldableSchedWrite sched, PatFrag StoreNode,
9053 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9054 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
9055 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9056 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9057 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9060 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9061 X86FoldableSchedWrite sched, PatFrag StoreNode,
9062 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9063 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9064 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9065 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9066 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9069 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9070 X86FoldableSchedWrite sched, PatFrag StoreNode,
9071 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9072 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9073 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9074 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9075 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9078 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9079 X86FoldableSchedWrite sched, PatFrag StoreNode,
9080 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9081 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9082 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9083 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9084 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9087 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9088 X86FoldableSchedWrite sched, PatFrag StoreNode,
9089 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9090 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9091 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9092 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9093 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9096 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9097 X86FoldableSchedWrite sched, PatFrag StoreNode,
9098 PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9099 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9100 sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9101 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9102 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9105 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
9106 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9107 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
9108 truncstore_s_vi8, masked_truncstore_s_vi8>;
9109 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9110 truncstore_us_vi8, masked_truncstore_us_vi8>;
9112 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
9113 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9114 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
9115 truncstore_s_vi16, masked_truncstore_s_vi16>;
9116 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9117 truncstore_us_vi16, masked_truncstore_us_vi16>;
9119 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
9120 truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9121 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
9122 truncstore_s_vi32, masked_truncstore_s_vi32>;
9123 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9124 truncstore_us_vi32, masked_truncstore_us_vi32>;
9126 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9127 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9128 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
9129 truncstore_s_vi8, masked_truncstore_s_vi8>;
9130 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
9131 truncstore_us_vi8, masked_truncstore_us_vi8>;
9133 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9134 truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9135 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
9136 truncstore_s_vi16, masked_truncstore_s_vi16>;
9137 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
9138 truncstore_us_vi16, masked_truncstore_us_vi16>;
9140 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9141 truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9142 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
9143 truncstore_s_vi8, masked_truncstore_s_vi8>;
9144 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
9145 truncstore_us_vi8, masked_truncstore_us_vi8>;
9147 let Predicates = [HasAVX512, NoVLX] in {
9148 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9149 (v8i16 (EXTRACT_SUBREG
9150 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9151 VR256X:$src, sub_ymm)))), sub_xmm))>;
9152 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9153 (v4i32 (EXTRACT_SUBREG
9154 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9155 VR256X:$src, sub_ymm)))), sub_xmm))>;
9158 let Predicates = [HasBWI, NoVLX] in {
9159 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9160 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9161 VR256X:$src, sub_ymm))), sub_xmm))>;
9164 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9165 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9166 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9167 let ExeDomain = DestInfo.ExeDomain in {
9168 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9169 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9170 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9171 EVEX, Sched<[sched]>;
9173 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9174 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9175 (DestInfo.VT (LdFrag addr:$src))>,
9176 EVEX, Sched<[sched.Folded]>;
9180 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9181 SDNode OpNode, SDNode InVecNode, string ExtTy,
9182 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9183 let Predicates = [HasVLX, HasBWI] in {
9184 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9185 v16i8x_info, i64mem, LdFrag, InVecNode>,
9186 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9188 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9189 v16i8x_info, i128mem, LdFrag, OpNode>,
9190 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9192 let Predicates = [HasBWI] in {
9193 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9194 v32i8x_info, i256mem, LdFrag, OpNode>,
9195 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9199 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9200 SDNode OpNode, SDNode InVecNode, string ExtTy,
9201 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9202 let Predicates = [HasVLX, HasAVX512] in {
9203 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9204 v16i8x_info, i32mem, LdFrag, InVecNode>,
9205 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9207 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9208 v16i8x_info, i64mem, LdFrag, OpNode>,
9209 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9211 let Predicates = [HasAVX512] in {
9212 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9213 v16i8x_info, i128mem, LdFrag, OpNode>,
9214 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9218 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9219 SDNode OpNode, SDNode InVecNode, string ExtTy,
9220 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9221 let Predicates = [HasVLX, HasAVX512] in {
9222 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9223 v16i8x_info, i16mem, LdFrag, InVecNode>,
9224 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9226 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9227 v16i8x_info, i32mem, LdFrag, OpNode>,
9228 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9230 let Predicates = [HasAVX512] in {
9231 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9232 v16i8x_info, i64mem, LdFrag, OpNode>,
9233 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9237 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9238 SDNode OpNode, SDNode InVecNode, string ExtTy,
9239 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9240 let Predicates = [HasVLX, HasAVX512] in {
9241 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9242 v8i16x_info, i64mem, LdFrag, InVecNode>,
9243 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9245 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9246 v8i16x_info, i128mem, LdFrag, OpNode>,
9247 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9249 let Predicates = [HasAVX512] in {
9250 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9251 v16i16x_info, i256mem, LdFrag, OpNode>,
9252 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9256 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9257 SDNode OpNode, SDNode InVecNode, string ExtTy,
9258 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9259 let Predicates = [HasVLX, HasAVX512] in {
9260 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9261 v8i16x_info, i32mem, LdFrag, InVecNode>,
9262 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9264 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9265 v8i16x_info, i64mem, LdFrag, OpNode>,
9266 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9268 let Predicates = [HasAVX512] in {
9269 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9270 v8i16x_info, i128mem, LdFrag, OpNode>,
9271 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9275 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9276 SDNode OpNode, SDNode InVecNode, string ExtTy,
9277 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9279 let Predicates = [HasVLX, HasAVX512] in {
9280 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9281 v4i32x_info, i64mem, LdFrag, InVecNode>,
9282 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9284 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9285 v4i32x_info, i128mem, LdFrag, OpNode>,
9286 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9288 let Predicates = [HasAVX512] in {
9289 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9290 v8i32x_info, i256mem, LdFrag, OpNode>,
9291 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9295 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9296 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9297 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9298 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9299 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9300 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9302 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9303 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9304 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9305 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9306 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9307 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9310 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9313 let Predicates = [HasVLX, HasBWI] in {
9314 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9315 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9316 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9317 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9318 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9319 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9320 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9321 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9322 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9323 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9325 let Predicates = [HasVLX] in {
9326 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9327 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9328 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9329 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9330 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9331 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9332 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9333 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9335 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9336 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9337 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9338 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9339 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9340 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9341 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9342 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9344 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9345 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9346 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9347 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9348 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9349 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9350 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9351 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9352 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9353 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9355 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9356 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9357 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9358 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9359 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9360 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9361 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9362 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9364 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9365 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9366 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9367 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9368 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9369 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9370 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9371 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9372 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9373 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9376 let Predicates = [HasVLX, HasBWI] in {
9377 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9378 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9379 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9380 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9381 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9382 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9384 let Predicates = [HasVLX] in {
9385 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9386 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9387 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9388 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9389 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9390 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9391 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9392 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9394 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9395 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9396 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9397 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9398 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9399 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9400 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9401 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9403 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9404 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9405 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9406 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9407 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9408 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9410 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9411 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9412 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9413 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9414 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9415 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9416 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9417 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9419 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9420 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9421 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9422 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9423 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9424 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9427 let Predicates = [HasBWI] in {
9428 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9429 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9431 let Predicates = [HasAVX512] in {
9432 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9433 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9435 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9436 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9437 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9438 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9440 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9441 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9443 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9444 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9446 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9447 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9451 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9452 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9454 //===----------------------------------------------------------------------===//
9455 // GATHER - SCATTER Operations
9457 // FIXME: Improve scheduling of gather/scatter instructions.
9458 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9459 X86MemOperand memop, PatFrag GatherNode,
9460 RegisterClass MaskRC = _.KRCWM> {
9461 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9462 ExeDomain = _.ExeDomain in
9463 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9464 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9465 !strconcat(OpcodeStr#_.Suffix,
9466 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9467 [(set _.RC:$dst, MaskRC:$mask_wb,
9468 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9469 vectoraddr:$src2))]>, EVEX, EVEX_K,
9470 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9473 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9474 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9475 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9476 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9477 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9478 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9479 let Predicates = [HasVLX] in {
9480 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9481 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9482 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9483 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9484 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9485 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9486 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9487 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9491 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9492 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9493 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9494 mgatherv16i32>, EVEX_V512;
9495 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9496 mgatherv8i64>, EVEX_V512;
9497 let Predicates = [HasVLX] in {
9498 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9499 vy256xmem, mgatherv8i32>, EVEX_V256;
9500 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9501 vy128xmem, mgatherv4i64>, EVEX_V256;
9502 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9503 vx128xmem, mgatherv4i32>, EVEX_V128;
9504 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9505 vx64xmem, mgatherv2i64, VK2WM>,
9511 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9512 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9514 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9515 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9517 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9518 X86MemOperand memop, PatFrag ScatterNode,
9519 RegisterClass MaskRC = _.KRCWM> {
9521 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9523 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9524 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9525 !strconcat(OpcodeStr#_.Suffix,
9526 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9527 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9528 MaskRC:$mask, vectoraddr:$dst))]>,
9529 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9530 Sched<[WriteStore]>;
9533 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9534 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9535 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9536 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9537 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9538 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9539 let Predicates = [HasVLX] in {
9540 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9541 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9542 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9543 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9544 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9545 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9546 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9547 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9551 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9552 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9553 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9554 mscatterv16i32>, EVEX_V512;
9555 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9556 mscatterv8i64>, EVEX_V512;
9557 let Predicates = [HasVLX] in {
9558 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9559 vy256xmem, mscatterv8i32>, EVEX_V256;
9560 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9561 vy128xmem, mscatterv4i64>, EVEX_V256;
9562 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9563 vx128xmem, mscatterv4i32>, EVEX_V128;
9564 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9565 vx64xmem, mscatterv2i64, VK2WM>,
9570 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9571 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9573 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9574 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9577 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9578 RegisterClass KRC, X86MemOperand memop> {
9579 let Predicates = [HasPFI], hasSideEffects = 1 in
9580 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9581 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9582 EVEX, EVEX_K, Sched<[WriteLoad]>;
9585 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9586 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9588 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9589 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9591 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9592 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9594 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9595 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9597 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9598 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9600 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9601 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9603 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9604 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9606 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9607 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9609 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9610 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9612 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9613 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9615 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9616 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9618 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9619 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9621 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9622 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9624 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9625 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9627 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9628 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9630 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9631 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9633 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9634 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9635 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9636 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9637 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9640 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9641 string OpcodeStr, Predicate prd> {
9642 let Predicates = [prd] in
9643 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9645 let Predicates = [prd, HasVLX] in {
9646 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9647 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9651 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9652 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9653 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9654 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9656 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9657 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9658 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9659 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9660 EVEX, Sched<[WriteMove]>;
9663 // Use 512bit version to implement 128/256 bit in case NoVLX.
9664 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9668 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9669 (_.KVT (COPY_TO_REGCLASS
9670 (!cast<Instruction>(Name#"Zrr")
9671 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9672 _.RC:$src, _.SubRegIdx)),
9676 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9677 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9678 let Predicates = [prd] in
9679 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9682 let Predicates = [prd, HasVLX] in {
9683 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9685 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9688 let Predicates = [prd, NoVLX] in {
9689 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9690 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9694 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9695 avx512vl_i8_info, HasBWI>;
9696 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9697 avx512vl_i16_info, HasBWI>, VEX_W;
9698 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9699 avx512vl_i32_info, HasDQI>;
9700 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9701 avx512vl_i64_info, HasDQI>, VEX_W;
9703 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9704 // is available, but BWI is not. We can't handle this in lowering because
9705 // a target independent DAG combine likes to combine sext and trunc.
9706 let Predicates = [HasDQI, NoBWI] in {
9707 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9708 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9709 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9710 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9713 //===----------------------------------------------------------------------===//
9714 // AVX-512 - COMPRESS and EXPAND
9717 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9718 string OpcodeStr, X86FoldableSchedWrite sched> {
9719 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9720 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9721 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9724 let mayStore = 1, hasSideEffects = 0 in
9725 def mr : AVX5128I<opc, MRMDestMem, (outs),
9726 (ins _.MemOp:$dst, _.RC:$src),
9727 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9728 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9729 Sched<[sched.Folded]>;
9731 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9732 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9733 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9735 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9736 Sched<[sched.Folded]>;
9739 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9740 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9742 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9743 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9746 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9747 X86FoldableSchedWrite sched,
9748 AVX512VLVectorVTInfo VTInfo,
9749 Predicate Pred = HasAVX512> {
9750 let Predicates = [Pred] in
9751 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9752 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9754 let Predicates = [Pred, HasVLX] in {
9755 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9756 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9757 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9758 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9762 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9763 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9764 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9765 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9766 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9767 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9768 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9769 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9770 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9773 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9774 string OpcodeStr, X86FoldableSchedWrite sched> {
9775 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9776 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9777 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9780 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9781 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9782 (_.VT (X86expand (_.VT (bitconvert
9783 (_.LdFrag addr:$src1)))))>,
9784 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9785 Sched<[sched.Folded, ReadAfterLd]>;
9788 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9790 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9791 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9792 _.KRCWM:$mask, addr:$src)>;
9794 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9795 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9796 _.KRCWM:$mask, addr:$src)>;
9798 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9799 (_.VT _.RC:$src0))),
9800 (!cast<Instruction>(Name#_.ZSuffix##rmk)
9801 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9804 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9805 X86FoldableSchedWrite sched,
9806 AVX512VLVectorVTInfo VTInfo,
9807 Predicate Pred = HasAVX512> {
9808 let Predicates = [Pred] in
9809 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9810 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9812 let Predicates = [Pred, HasVLX] in {
9813 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9814 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9815 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9816 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9820 // FIXME: Is there a better scheduler class for VPEXPAND?
9821 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9822 avx512vl_i32_info>, EVEX;
9823 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9824 avx512vl_i64_info>, EVEX, VEX_W;
9825 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9826 avx512vl_f32_info>, EVEX;
9827 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9828 avx512vl_f64_info>, EVEX, VEX_W;
9830 //handle instruction reg_vec1 = op(reg_vec,imm)
9832 // op(broadcast(eltVt),imm)
9833 //all instruction created with FROUND_CURRENT
9834 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9835 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9836 let ExeDomain = _.ExeDomain in {
9837 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9838 (ins _.RC:$src1, i32u8imm:$src2),
9839 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9840 (OpNode (_.VT _.RC:$src1),
9841 (i32 imm:$src2))>, Sched<[sched]>;
9842 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9843 (ins _.MemOp:$src1, i32u8imm:$src2),
9844 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9845 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9847 Sched<[sched.Folded, ReadAfterLd]>;
9848 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9849 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9850 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9851 "${src1}"##_.BroadcastStr##", $src2",
9852 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9853 (i32 imm:$src2))>, EVEX_B,
9854 Sched<[sched.Folded, ReadAfterLd]>;
9858 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9859 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9860 SDNode OpNode, X86FoldableSchedWrite sched,
9861 X86VectorVTInfo _> {
9862 let ExeDomain = _.ExeDomain in
9863 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9864 (ins _.RC:$src1, i32u8imm:$src2),
9865 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9866 "$src1, {sae}, $src2",
9867 (OpNode (_.VT _.RC:$src1),
9869 (i32 FROUND_NO_EXC))>,
9870 EVEX_B, Sched<[sched]>;
9873 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9874 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9875 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9876 let Predicates = [prd] in {
9877 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9879 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9880 sched.ZMM, _.info512>, EVEX_V512;
9882 let Predicates = [prd, HasVLX] in {
9883 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9884 _.info128>, EVEX_V128;
9885 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9886 _.info256>, EVEX_V256;
9890 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9891 // op(reg_vec2,mem_vec,imm)
9892 // op(reg_vec2,broadcast(eltVt),imm)
9893 //all instruction created with FROUND_CURRENT
9894 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9895 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9896 let ExeDomain = _.ExeDomain in {
9897 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9898 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9899 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9900 (OpNode (_.VT _.RC:$src1),
9904 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9905 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9906 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9907 (OpNode (_.VT _.RC:$src1),
9908 (_.VT (bitconvert (_.LdFrag addr:$src2))),
9910 Sched<[sched.Folded, ReadAfterLd]>;
9911 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9912 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9913 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9914 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9915 (OpNode (_.VT _.RC:$src1),
9916 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9917 (i32 imm:$src3))>, EVEX_B,
9918 Sched<[sched.Folded, ReadAfterLd]>;
9922 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9923 // op(reg_vec2,mem_vec,imm)
9924 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9925 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9926 X86VectorVTInfo SrcInfo>{
9927 let ExeDomain = DestInfo.ExeDomain in {
9928 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9929 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9930 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9931 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9932 (SrcInfo.VT SrcInfo.RC:$src2),
9935 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9936 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9937 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9938 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9939 (SrcInfo.VT (bitconvert
9940 (SrcInfo.LdFrag addr:$src2))),
9942 Sched<[sched.Folded, ReadAfterLd]>;
9946 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9947 // op(reg_vec2,mem_vec,imm)
9948 // op(reg_vec2,broadcast(eltVt),imm)
9949 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9950 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9951 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9953 let ExeDomain = _.ExeDomain in
9954 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9955 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9956 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9957 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9958 (OpNode (_.VT _.RC:$src1),
9959 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9960 (i8 imm:$src3))>, EVEX_B,
9961 Sched<[sched.Folded, ReadAfterLd]>;
9964 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9965 // op(reg_vec2,mem_scalar,imm)
9966 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9967 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9968 let ExeDomain = _.ExeDomain in {
9969 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9970 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9971 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9972 (OpNode (_.VT _.RC:$src1),
9976 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9977 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9978 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9979 (OpNode (_.VT _.RC:$src1),
9980 (_.VT (scalar_to_vector
9981 (_.ScalarLdFrag addr:$src2))),
9983 Sched<[sched.Folded, ReadAfterLd]>;
9987 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9988 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9989 SDNode OpNode, X86FoldableSchedWrite sched,
9990 X86VectorVTInfo _> {
9991 let ExeDomain = _.ExeDomain in
9992 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9993 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9994 OpcodeStr, "$src3, {sae}, $src2, $src1",
9995 "$src1, $src2, {sae}, $src3",
9996 (OpNode (_.VT _.RC:$src1),
9999 (i32 FROUND_NO_EXC))>,
10000 EVEX_B, Sched<[sched]>;
10003 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10004 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10005 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10006 let ExeDomain = _.ExeDomain in
10007 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10008 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10009 OpcodeStr, "$src3, {sae}, $src2, $src1",
10010 "$src1, $src2, {sae}, $src3",
10011 (OpNode (_.VT _.RC:$src1),
10014 (i32 FROUND_NO_EXC))>,
10015 EVEX_B, Sched<[sched]>;
10018 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10019 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10020 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10021 let Predicates = [prd] in {
10022 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10023 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10027 let Predicates = [prd, HasVLX] in {
10028 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10030 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10035 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10036 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10037 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10038 let Predicates = [Pred] in {
10039 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10040 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10042 let Predicates = [Pred, HasVLX] in {
10043 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10044 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10045 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10046 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10050 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10051 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10052 Predicate Pred = HasAVX512> {
10053 let Predicates = [Pred] in {
10054 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10057 let Predicates = [Pred, HasVLX] in {
10058 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10060 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10065 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10066 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10067 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10068 let Predicates = [prd] in {
10069 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10070 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10074 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10075 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10076 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10077 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10078 opcPs, OpNode, OpNodeRnd, sched, prd>,
10079 EVEX_CD8<32, CD8VF>;
10080 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10081 opcPd, OpNode, OpNodeRnd, sched, prd>,
10082 EVEX_CD8<64, CD8VF>, VEX_W;
10085 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10086 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10087 AVX512AIi8Base, EVEX;
10088 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10089 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10090 AVX512AIi8Base, EVEX;
10091 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10092 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10093 AVX512AIi8Base, EVEX;
10095 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10096 0x50, X86VRange, X86VRangeRnd,
10097 SchedWriteFAdd, HasDQI>,
10098 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10099 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10100 0x50, X86VRange, X86VRangeRnd,
10101 SchedWriteFAdd, HasDQI>,
10102 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10104 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10105 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10106 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10107 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10108 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10109 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10111 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10112 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10113 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10114 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10115 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10116 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10118 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10119 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10120 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10121 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10122 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10123 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10125 let Predicates = [HasAVX512] in {
10126 def : Pat<(v16f32 (ffloor VR512:$src)),
10127 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
10128 def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), VR512:$dst)),
10129 (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0x9))>;
10130 def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), v16f32_info.ImmAllZerosV)),
10131 (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0x9))>;
10132 def : Pat<(v16f32 (fnearbyint VR512:$src)),
10133 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
10134 def : Pat<(v16f32 (fceil VR512:$src)),
10135 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
10136 def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), VR512:$dst)),
10137 (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0xA))>;
10138 def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), v16f32_info.ImmAllZerosV)),
10139 (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0xA))>;
10140 def : Pat<(v16f32 (frint VR512:$src)),
10141 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
10142 def : Pat<(v16f32 (ftrunc VR512:$src)),
10143 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
10145 def : Pat<(v16f32 (ffloor (loadv16f32 addr:$src))),
10146 (VRNDSCALEPSZrmi addr:$src, (i32 0x9))>;
10147 def : Pat<(v16f32 (fnearbyint (loadv16f32 addr:$src))),
10148 (VRNDSCALEPSZrmi addr:$src, (i32 0xC))>;
10149 def : Pat<(v16f32 (fceil (loadv16f32 addr:$src))),
10150 (VRNDSCALEPSZrmi addr:$src, (i32 0xA))>;
10151 def : Pat<(v16f32 (frint (loadv16f32 addr:$src))),
10152 (VRNDSCALEPSZrmi addr:$src, (i32 0x4))>;
10153 def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
10154 (VRNDSCALEPSZrmi addr:$src, (i32 0xB))>;
10156 def : Pat<(v8f64 (ffloor VR512:$src)),
10157 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
10158 def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), VR512:$dst)),
10159 (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0x9))>;
10160 def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), v8f64_info.ImmAllZerosV)),
10161 (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0x9))>;
10162 def : Pat<(v8f64 (fnearbyint VR512:$src)),
10163 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
10164 def : Pat<(v8f64 (fceil VR512:$src)),
10165 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
10166 def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), VR512:$dst)),
10167 (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0xA))>;
10168 def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), v8f64_info.ImmAllZerosV)),
10169 (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0xA))>;
10170 def : Pat<(v8f64 (frint VR512:$src)),
10171 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
10172 def : Pat<(v8f64 (ftrunc VR512:$src)),
10173 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
10175 def : Pat<(v8f64 (ffloor (loadv8f64 addr:$src))),
10176 (VRNDSCALEPDZrmi addr:$src, (i32 0x9))>;
10177 def : Pat<(v8f64 (fnearbyint (loadv8f64 addr:$src))),
10178 (VRNDSCALEPDZrmi addr:$src, (i32 0xC))>;
10179 def : Pat<(v8f64 (fceil (loadv8f64 addr:$src))),
10180 (VRNDSCALEPDZrmi addr:$src, (i32 0xA))>;
10181 def : Pat<(v8f64 (frint (loadv8f64 addr:$src))),
10182 (VRNDSCALEPDZrmi addr:$src, (i32 0x4))>;
10183 def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
10184 (VRNDSCALEPDZrmi addr:$src, (i32 0xB))>;
10187 let Predicates = [HasVLX] in {
10188 def : Pat<(v4f32 (ffloor VR128X:$src)),
10189 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
10190 def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10191 (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0x9))>;
10192 def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), v4f32x_info.ImmAllZerosV)),
10193 (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0x9))>;
10194 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
10195 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
10196 def : Pat<(v4f32 (fceil VR128X:$src)),
10197 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
10198 def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10199 (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0xA))>;
10200 def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), v4f32x_info.ImmAllZerosV)),
10201 (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0xA))>;
10202 def : Pat<(v4f32 (frint VR128X:$src)),
10203 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
10204 def : Pat<(v4f32 (ftrunc VR128X:$src)),
10205 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
10207 def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
10208 (VRNDSCALEPSZ128rmi addr:$src, (i32 0x9))>;
10209 def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
10210 (VRNDSCALEPSZ128rmi addr:$src, (i32 0xC))>;
10211 def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
10212 (VRNDSCALEPSZ128rmi addr:$src, (i32 0xA))>;
10213 def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
10214 (VRNDSCALEPSZ128rmi addr:$src, (i32 0x4))>;
10215 def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
10216 (VRNDSCALEPSZ128rmi addr:$src, (i32 0xB))>;
10218 def : Pat<(v2f64 (ffloor VR128X:$src)),
10219 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
10220 def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10221 (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10222 def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), v2f64x_info.ImmAllZerosV)),
10223 (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10224 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
10225 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
10226 def : Pat<(v2f64 (fceil VR128X:$src)),
10227 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
10228 def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10229 (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10230 def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), v2f64x_info.ImmAllZerosV)),
10231 (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10232 def : Pat<(v2f64 (frint VR128X:$src)),
10233 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
10234 def : Pat<(v2f64 (ftrunc VR128X:$src)),
10235 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
10237 def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
10238 (VRNDSCALEPDZ128rmi addr:$src, (i32 0x9))>;
10239 def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
10240 (VRNDSCALEPDZ128rmi addr:$src, (i32 0xC))>;
10241 def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
10242 (VRNDSCALEPDZ128rmi addr:$src, (i32 0xA))>;
10243 def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
10244 (VRNDSCALEPDZ128rmi addr:$src, (i32 0x4))>;
10245 def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
10246 (VRNDSCALEPDZ128rmi addr:$src, (i32 0xB))>;
10248 def : Pat<(v8f32 (ffloor VR256X:$src)),
10249 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
10250 def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10251 (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10252 def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), v8f32x_info.ImmAllZerosV)),
10253 (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10254 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
10255 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
10256 def : Pat<(v8f32 (fceil VR256X:$src)),
10257 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
10258 def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10259 (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10260 def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), v8f32x_info.ImmAllZerosV)),
10261 (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10262 def : Pat<(v8f32 (frint VR256X:$src)),
10263 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
10264 def : Pat<(v8f32 (ftrunc VR256X:$src)),
10265 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
10267 def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
10268 (VRNDSCALEPSZ256rmi addr:$src, (i32 0x9))>;
10269 def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
10270 (VRNDSCALEPSZ256rmi addr:$src, (i32 0xC))>;
10271 def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
10272 (VRNDSCALEPSZ256rmi addr:$src, (i32 0xA))>;
10273 def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
10274 (VRNDSCALEPSZ256rmi addr:$src, (i32 0x4))>;
10275 def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
10276 (VRNDSCALEPSZ256rmi addr:$src, (i32 0xB))>;
10278 def : Pat<(v4f64 (ffloor VR256X:$src)),
10279 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
10280 def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10281 (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10282 def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), v4f64x_info.ImmAllZerosV)),
10283 (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10284 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
10285 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
10286 def : Pat<(v4f64 (fceil VR256X:$src)),
10287 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
10288 def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10289 (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10290 def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), v4f64x_info.ImmAllZerosV)),
10291 (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10292 def : Pat<(v4f64 (frint VR256X:$src)),
10293 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
10294 def : Pat<(v4f64 (ftrunc VR256X:$src)),
10295 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
10297 def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
10298 (VRNDSCALEPDZ256rmi addr:$src, (i32 0x9))>;
10299 def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
10300 (VRNDSCALEPDZ256rmi addr:$src, (i32 0xC))>;
10301 def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
10302 (VRNDSCALEPDZ256rmi addr:$src, (i32 0xA))>;
10303 def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
10304 (VRNDSCALEPDZ256rmi addr:$src, (i32 0x4))>;
10305 def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
10306 (VRNDSCALEPDZ256rmi addr:$src, (i32 0xB))>;
10309 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10310 X86FoldableSchedWrite sched,
10312 X86VectorVTInfo CastInfo,
10313 string EVEX2VEXOvrd> {
10314 let ExeDomain = _.ExeDomain in {
10315 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10316 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10317 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10319 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10320 (i8 imm:$src3)))))>,
10321 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10322 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10323 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10324 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10327 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10328 (bitconvert (_.LdFrag addr:$src2)),
10329 (i8 imm:$src3)))))>,
10330 Sched<[sched.Folded, ReadAfterLd]>,
10331 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10332 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10333 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10334 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10335 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10339 (X86Shuf128 _.RC:$src1,
10340 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10341 (i8 imm:$src3)))))>, EVEX_B,
10342 Sched<[sched.Folded, ReadAfterLd]>;
10346 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10347 AVX512VLVectorVTInfo _,
10348 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10349 string EVEX2VEXOvrd>{
10350 let Predicates = [HasAVX512] in
10351 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10352 _.info512, CastInfo.info512, "">, EVEX_V512;
10354 let Predicates = [HasAVX512, HasVLX] in
10355 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10356 _.info256, CastInfo.info256,
10357 EVEX2VEXOvrd>, EVEX_V256;
10360 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10361 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10362 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10363 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10364 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10365 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10366 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10367 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10369 let Predicates = [HasAVX512] in {
10370 // Provide fallback in case the load node that is used in the broadcast
10371 // patterns above is used by additional users, which prevents the pattern
10373 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10374 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10375 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10377 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10378 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10379 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10382 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10383 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10384 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10386 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10387 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10388 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10391 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10392 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10393 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10396 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10397 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10398 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10402 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10403 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10404 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10405 // instantiation of this class.
10406 let ExeDomain = _.ExeDomain in {
10407 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10408 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10409 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10410 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10411 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10412 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10413 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10414 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10415 (_.VT (X86VAlign _.RC:$src1,
10416 (bitconvert (_.LdFrag addr:$src2)),
10418 Sched<[sched.Folded, ReadAfterLd]>,
10419 EVEX2VEXOverride<"VPALIGNRrmi">;
10421 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10422 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10423 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10424 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10425 (X86VAlign _.RC:$src1,
10426 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10427 (i8 imm:$src3))>, EVEX_B,
10428 Sched<[sched.Folded, ReadAfterLd]>;
10432 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10433 AVX512VLVectorVTInfo _> {
10434 let Predicates = [HasAVX512] in {
10435 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10436 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10438 let Predicates = [HasAVX512, HasVLX] in {
10439 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10440 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10441 // We can't really override the 256-bit version so change it back to unset.
10442 let EVEX2VEXOverride = ? in
10443 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10444 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10448 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10449 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10450 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10451 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10454 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10455 SchedWriteShuffle, avx512vl_i8_info,
10456 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10458 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10460 def ValignqImm32XForm : SDNodeXForm<imm, [{
10461 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10463 def ValignqImm8XForm : SDNodeXForm<imm, [{
10464 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10466 def ValigndImm8XForm : SDNodeXForm<imm, [{
10467 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10470 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10471 X86VectorVTInfo From, X86VectorVTInfo To,
10472 SDNodeXForm ImmXForm> {
10473 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10475 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10478 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10479 To.RC:$src1, To.RC:$src2,
10480 (ImmXForm imm:$src3))>;
10482 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10484 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10487 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10488 To.RC:$src1, To.RC:$src2,
10489 (ImmXForm imm:$src3))>;
10491 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10493 (From.VT (OpNode From.RC:$src1,
10494 (bitconvert (To.LdFrag addr:$src2)),
10497 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10498 To.RC:$src1, addr:$src2,
10499 (ImmXForm imm:$src3))>;
10501 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10503 (From.VT (OpNode From.RC:$src1,
10504 (bitconvert (To.LdFrag addr:$src2)),
10507 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10508 To.RC:$src1, addr:$src2,
10509 (ImmXForm imm:$src3))>;
10512 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10513 X86VectorVTInfo From,
10514 X86VectorVTInfo To,
10515 SDNodeXForm ImmXForm> :
10516 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10517 def : Pat<(From.VT (OpNode From.RC:$src1,
10518 (bitconvert (To.VT (X86VBroadcast
10519 (To.ScalarLdFrag addr:$src2)))),
10521 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10522 (ImmXForm imm:$src3))>;
10524 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10526 (From.VT (OpNode From.RC:$src1,
10528 (To.VT (X86VBroadcast
10529 (To.ScalarLdFrag addr:$src2)))),
10532 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10533 To.RC:$src1, addr:$src2,
10534 (ImmXForm imm:$src3))>;
10536 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10538 (From.VT (OpNode From.RC:$src1,
10540 (To.VT (X86VBroadcast
10541 (To.ScalarLdFrag addr:$src2)))),
10544 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10545 To.RC:$src1, addr:$src2,
10546 (ImmXForm imm:$src3))>;
10549 let Predicates = [HasAVX512] in {
10550 // For 512-bit we lower to the widest element type we can. So we only need
10551 // to handle converting valignq to valignd.
10552 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10553 v16i32_info, ValignqImm32XForm>;
10556 let Predicates = [HasVLX] in {
10557 // For 128-bit we lower to the widest element type we can. So we only need
10558 // to handle converting valignq to valignd.
10559 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10560 v4i32x_info, ValignqImm32XForm>;
10561 // For 256-bit we lower to the widest element type we can. So we only need
10562 // to handle converting valignq to valignd.
10563 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10564 v8i32x_info, ValignqImm32XForm>;
10567 let Predicates = [HasVLX, HasBWI] in {
10568 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10569 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10570 v16i8x_info, ValignqImm8XForm>;
10571 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10572 v16i8x_info, ValigndImm8XForm>;
10575 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10576 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10577 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10579 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10580 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10581 let ExeDomain = _.ExeDomain in {
10582 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10583 (ins _.RC:$src1), OpcodeStr,
10585 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10588 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10589 (ins _.MemOp:$src1), OpcodeStr,
10591 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10592 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10593 Sched<[sched.Folded]>;
10597 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10598 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10599 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10600 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10601 (ins _.ScalarMemOp:$src1), OpcodeStr,
10602 "${src1}"##_.BroadcastStr,
10603 "${src1}"##_.BroadcastStr,
10604 (_.VT (OpNode (X86VBroadcast
10605 (_.ScalarLdFrag addr:$src1))))>,
10606 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10607 Sched<[sched.Folded]>;
10610 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10611 X86SchedWriteWidths sched,
10612 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10613 let Predicates = [prd] in
10614 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10617 let Predicates = [prd, HasVLX] in {
10618 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10620 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10625 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10626 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10628 let Predicates = [prd] in
10629 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10632 let Predicates = [prd, HasVLX] in {
10633 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10635 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10640 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10641 SDNode OpNode, X86SchedWriteWidths sched,
10643 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10644 avx512vl_i64_info, prd>, VEX_W;
10645 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10646 avx512vl_i32_info, prd>;
10649 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10650 SDNode OpNode, X86SchedWriteWidths sched,
10652 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10653 avx512vl_i16_info, prd>, VEX_WIG;
10654 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10655 avx512vl_i8_info, prd>, VEX_WIG;
10658 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10659 bits<8> opc_d, bits<8> opc_q,
10660 string OpcodeStr, SDNode OpNode,
10661 X86SchedWriteWidths sched> {
10662 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10664 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10668 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10671 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10672 let Predicates = [HasAVX512, NoVLX] in {
10673 def : Pat<(v4i64 (abs VR256X:$src)),
10676 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10678 def : Pat<(v2i64 (abs VR128X:$src)),
10681 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10685 // Use 512bit version to implement 128/256 bit.
10686 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10687 AVX512VLVectorVTInfo _, Predicate prd> {
10688 let Predicates = [prd, NoVLX] in {
10689 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10691 (!cast<Instruction>(InstrStr # "Zrr")
10692 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10693 _.info256.RC:$src1,
10694 _.info256.SubRegIdx)),
10695 _.info256.SubRegIdx)>;
10697 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10699 (!cast<Instruction>(InstrStr # "Zrr")
10700 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10701 _.info128.RC:$src1,
10702 _.info128.SubRegIdx)),
10703 _.info128.SubRegIdx)>;
10707 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10708 SchedWriteVecIMul, HasCDI>;
10710 // FIXME: Is there a better scheduler class for VPCONFLICT?
10711 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10712 SchedWriteVecALU, HasCDI>;
10714 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10715 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10716 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10718 //===---------------------------------------------------------------------===//
10719 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10720 //===---------------------------------------------------------------------===//
10722 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10723 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10724 SchedWriteVecALU, HasVPOPCNTDQ>;
10726 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10727 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10729 //===---------------------------------------------------------------------===//
10730 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10731 //===---------------------------------------------------------------------===//
10733 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10734 X86SchedWriteWidths sched> {
10735 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10736 avx512vl_f32_info, HasAVX512>, XS;
10739 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10740 SchedWriteFShuffle>;
10741 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10742 SchedWriteFShuffle>;
10744 //===----------------------------------------------------------------------===//
10745 // AVX-512 - MOVDDUP
10746 //===----------------------------------------------------------------------===//
10748 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10749 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10750 let ExeDomain = _.ExeDomain in {
10751 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10752 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10753 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10755 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10756 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10757 (_.VT (OpNode (_.VT (scalar_to_vector
10758 (_.ScalarLdFrag addr:$src)))))>,
10759 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10760 Sched<[sched.Folded]>;
10764 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10765 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10766 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10767 VTInfo.info512>, EVEX_V512;
10769 let Predicates = [HasAVX512, HasVLX] in {
10770 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10771 VTInfo.info256>, EVEX_V256;
10772 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10773 VTInfo.info128>, EVEX_V128;
10777 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10778 X86SchedWriteWidths sched> {
10779 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10780 avx512vl_f64_info>, XD, VEX_W;
10783 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10785 let Predicates = [HasVLX] in {
10786 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10787 (VMOVDDUPZ128rm addr:$src)>;
10788 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10789 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10790 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10791 (VMOVDDUPZ128rm addr:$src)>;
10793 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10794 (v2f64 VR128X:$src0)),
10795 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10796 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10797 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10798 (bitconvert (v4i32 immAllZerosV))),
10799 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10801 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10802 (v2f64 VR128X:$src0)),
10803 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10804 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10805 (bitconvert (v4i32 immAllZerosV))),
10806 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10808 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10809 (v2f64 VR128X:$src0)),
10810 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10811 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10812 (bitconvert (v4i32 immAllZerosV))),
10813 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10816 //===----------------------------------------------------------------------===//
10817 // AVX-512 - Unpack Instructions
10818 //===----------------------------------------------------------------------===//
10820 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10821 SchedWriteFShuffleSizes>;
10822 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10823 SchedWriteFShuffleSizes>;
10825 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10826 SchedWriteShuffle, HasBWI>;
10827 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10828 SchedWriteShuffle, HasBWI>;
10829 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10830 SchedWriteShuffle, HasBWI>;
10831 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10832 SchedWriteShuffle, HasBWI>;
10834 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10835 SchedWriteShuffle, HasAVX512>;
10836 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10837 SchedWriteShuffle, HasAVX512>;
10838 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10839 SchedWriteShuffle, HasAVX512>;
10840 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10841 SchedWriteShuffle, HasAVX512>;
10843 //===----------------------------------------------------------------------===//
10844 // AVX-512 - Extract & Insert Integer Instructions
10845 //===----------------------------------------------------------------------===//
10847 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10848 X86VectorVTInfo _> {
10849 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10850 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10851 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10852 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10854 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10857 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10858 let Predicates = [HasBWI] in {
10859 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10860 (ins _.RC:$src1, u8imm:$src2),
10861 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10862 [(set GR32orGR64:$dst,
10863 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10864 EVEX, TAPD, Sched<[WriteVecExtract]>;
10866 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10870 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10871 let Predicates = [HasBWI] in {
10872 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10873 (ins _.RC:$src1, u8imm:$src2),
10874 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10875 [(set GR32orGR64:$dst,
10876 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10877 EVEX, PD, Sched<[WriteVecExtract]>;
10879 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10880 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10881 (ins _.RC:$src1, u8imm:$src2),
10882 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10883 EVEX, TAPD, FoldGenData<NAME#rr>,
10884 Sched<[WriteVecExtract]>;
10886 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10890 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10891 RegisterClass GRC> {
10892 let Predicates = [HasDQI] in {
10893 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10894 (ins _.RC:$src1, u8imm:$src2),
10895 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10897 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10898 EVEX, TAPD, Sched<[WriteVecExtract]>;
10900 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10901 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10902 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10903 [(store (extractelt (_.VT _.RC:$src1),
10904 imm:$src2),addr:$dst)]>,
10905 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10906 Sched<[WriteVecExtractSt]>;
10910 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10911 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10912 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10913 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10915 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10916 X86VectorVTInfo _, PatFrag LdFrag> {
10917 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10918 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10919 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10921 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10922 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
10925 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10926 X86VectorVTInfo _, PatFrag LdFrag> {
10927 let Predicates = [HasBWI] in {
10928 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10929 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10930 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10932 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10933 Sched<[WriteVecInsert]>;
10935 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10939 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10940 X86VectorVTInfo _, RegisterClass GRC> {
10941 let Predicates = [HasDQI] in {
10942 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10943 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10944 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10946 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10947 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10949 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10950 _.ScalarLdFrag>, TAPD;
10954 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10955 extloadi8>, TAPD, VEX_WIG;
10956 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10957 extloadi16>, PD, VEX_WIG;
10958 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10959 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10961 //===----------------------------------------------------------------------===//
10962 // VSHUFPS - VSHUFPD Operations
10963 //===----------------------------------------------------------------------===//
10965 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10966 AVX512VLVectorVTInfo VTInfo_FP>{
10967 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10968 SchedWriteFShuffle>,
10969 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10970 AVX512AIi8Base, EVEX_4V;
10973 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10974 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10976 //===----------------------------------------------------------------------===//
10977 // AVX-512 - Byte shift Left/Right
10978 //===----------------------------------------------------------------------===//
10980 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10981 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10982 Format MRMm, string OpcodeStr,
10983 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10984 def rr : AVX512<opc, MRMr,
10985 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10986 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10987 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
10989 def rm : AVX512<opc, MRMm,
10990 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10991 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10992 [(set _.RC:$dst,(_.VT (OpNode
10993 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10994 (i8 imm:$src2))))]>,
10995 Sched<[sched.Folded, ReadAfterLd]>;
10998 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10999 Format MRMm, string OpcodeStr,
11000 X86SchedWriteWidths sched, Predicate prd>{
11001 let Predicates = [prd] in
11002 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11003 sched.ZMM, v64i8_info>, EVEX_V512;
11004 let Predicates = [prd, HasVLX] in {
11005 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11006 sched.YMM, v32i8x_info>, EVEX_V256;
11007 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11008 sched.XMM, v16i8x_info>, EVEX_V128;
11011 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11012 SchedWriteShuffle, HasBWI>,
11013 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11014 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11015 SchedWriteShuffle, HasBWI>,
11016 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11018 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11019 string OpcodeStr, X86FoldableSchedWrite sched,
11020 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11021 def rr : AVX512BI<opc, MRMSrcReg,
11022 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11023 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11024 [(set _dst.RC:$dst,(_dst.VT
11025 (OpNode (_src.VT _src.RC:$src1),
11026 (_src.VT _src.RC:$src2))))]>,
11028 def rm : AVX512BI<opc, MRMSrcMem,
11029 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11030 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11031 [(set _dst.RC:$dst,(_dst.VT
11032 (OpNode (_src.VT _src.RC:$src1),
11033 (_src.VT (bitconvert
11034 (_src.LdFrag addr:$src2))))))]>,
11035 Sched<[sched.Folded, ReadAfterLd]>;
11038 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11039 string OpcodeStr, X86SchedWriteWidths sched,
11041 let Predicates = [prd] in
11042 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11043 v8i64_info, v64i8_info>, EVEX_V512;
11044 let Predicates = [prd, HasVLX] in {
11045 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11046 v4i64x_info, v32i8x_info>, EVEX_V256;
11047 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11048 v2i64x_info, v16i8x_info>, EVEX_V128;
11052 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11053 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11055 // Transforms to swizzle an immediate to enable better matching when
11056 // memory operand isn't in the right place.
11057 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11058 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11059 uint8_t Imm = N->getZExtValue();
11060 // Swap bits 1/4 and 3/6.
11061 uint8_t NewImm = Imm & 0xa5;
11062 if (Imm & 0x02) NewImm |= 0x10;
11063 if (Imm & 0x10) NewImm |= 0x02;
11064 if (Imm & 0x08) NewImm |= 0x40;
11065 if (Imm & 0x40) NewImm |= 0x08;
11066 return getI8Imm(NewImm, SDLoc(N));
11068 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11069 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11070 uint8_t Imm = N->getZExtValue();
11071 // Swap bits 2/4 and 3/5.
11072 uint8_t NewImm = Imm & 0xc3;
11073 if (Imm & 0x04) NewImm |= 0x10;
11074 if (Imm & 0x10) NewImm |= 0x04;
11075 if (Imm & 0x08) NewImm |= 0x20;
11076 if (Imm & 0x20) NewImm |= 0x08;
11077 return getI8Imm(NewImm, SDLoc(N));
11079 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11080 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11081 uint8_t Imm = N->getZExtValue();
11082 // Swap bits 1/2 and 5/6.
11083 uint8_t NewImm = Imm & 0x99;
11084 if (Imm & 0x02) NewImm |= 0x04;
11085 if (Imm & 0x04) NewImm |= 0x02;
11086 if (Imm & 0x20) NewImm |= 0x40;
11087 if (Imm & 0x40) NewImm |= 0x20;
11088 return getI8Imm(NewImm, SDLoc(N));
11090 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11091 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11092 uint8_t Imm = N->getZExtValue();
11093 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11094 uint8_t NewImm = Imm & 0x81;
11095 if (Imm & 0x02) NewImm |= 0x04;
11096 if (Imm & 0x04) NewImm |= 0x10;
11097 if (Imm & 0x08) NewImm |= 0x40;
11098 if (Imm & 0x10) NewImm |= 0x02;
11099 if (Imm & 0x20) NewImm |= 0x08;
11100 if (Imm & 0x40) NewImm |= 0x20;
11101 return getI8Imm(NewImm, SDLoc(N));
11103 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11104 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11105 uint8_t Imm = N->getZExtValue();
11106 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11107 uint8_t NewImm = Imm & 0x81;
11108 if (Imm & 0x02) NewImm |= 0x10;
11109 if (Imm & 0x04) NewImm |= 0x02;
11110 if (Imm & 0x08) NewImm |= 0x20;
11111 if (Imm & 0x10) NewImm |= 0x04;
11112 if (Imm & 0x20) NewImm |= 0x40;
11113 if (Imm & 0x40) NewImm |= 0x08;
11114 return getI8Imm(NewImm, SDLoc(N));
11117 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11118 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11120 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11121 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11122 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11123 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11124 (OpNode (_.VT _.RC:$src1),
11127 (i8 imm:$src4)), 1, 1>,
11128 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11129 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11130 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11131 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11132 (OpNode (_.VT _.RC:$src1),
11134 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11135 (i8 imm:$src4)), 1, 0>,
11136 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11137 Sched<[sched.Folded, ReadAfterLd]>;
11138 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11139 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11140 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11141 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11142 (OpNode (_.VT _.RC:$src1),
11144 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11145 (i8 imm:$src4)), 1, 0>, EVEX_B,
11146 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11147 Sched<[sched.Folded, ReadAfterLd]>;
11148 }// Constraints = "$src1 = $dst"
11150 // Additional patterns for matching passthru operand in other positions.
11151 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11152 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11154 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11155 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11156 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11157 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11159 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11160 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11162 // Additional patterns for matching loads in other positions.
11163 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11164 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11165 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11166 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11167 def : Pat<(_.VT (OpNode _.RC:$src1,
11168 (bitconvert (_.LdFrag addr:$src3)),
11169 _.RC:$src2, (i8 imm:$src4))),
11170 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11171 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11173 // Additional patterns for matching zero masking with loads in other
11175 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11176 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11177 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11179 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11180 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11181 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11182 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11183 _.RC:$src2, (i8 imm:$src4)),
11185 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11186 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11188 // Additional patterns for matching masked loads with different
11190 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11191 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11192 _.RC:$src2, (i8 imm:$src4)),
11194 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11195 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11196 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11197 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11198 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11200 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11201 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11202 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11203 (OpNode _.RC:$src2, _.RC:$src1,
11204 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11206 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11207 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11208 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11209 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11210 _.RC:$src1, (i8 imm:$src4)),
11212 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11213 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11214 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11215 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11216 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11218 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11219 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11221 // Additional patterns for matching broadcasts in other positions.
11222 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11223 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11224 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11225 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11226 def : Pat<(_.VT (OpNode _.RC:$src1,
11227 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11228 _.RC:$src2, (i8 imm:$src4))),
11229 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11230 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11232 // Additional patterns for matching zero masking with broadcasts in other
11234 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11235 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11236 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11238 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11239 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11240 (VPTERNLOG321_imm8 imm:$src4))>;
11241 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11242 (OpNode _.RC:$src1,
11243 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11244 _.RC:$src2, (i8 imm:$src4)),
11246 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11247 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11248 (VPTERNLOG132_imm8 imm:$src4))>;
11250 // Additional patterns for matching masked broadcasts with different
11252 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11253 (OpNode _.RC:$src1,
11254 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11255 _.RC:$src2, (i8 imm:$src4)),
11257 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11258 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11259 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11260 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11261 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11263 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11264 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11265 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11266 (OpNode _.RC:$src2, _.RC:$src1,
11267 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11268 (i8 imm:$src4)), _.RC:$src1)),
11269 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11270 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11271 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11272 (OpNode _.RC:$src2,
11273 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11274 _.RC:$src1, (i8 imm:$src4)),
11276 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11277 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11278 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11279 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11280 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11282 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11283 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11286 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11287 AVX512VLVectorVTInfo _> {
11288 let Predicates = [HasAVX512] in
11289 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11290 _.info512, NAME>, EVEX_V512;
11291 let Predicates = [HasAVX512, HasVLX] in {
11292 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11293 _.info128, NAME>, EVEX_V128;
11294 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11295 _.info256, NAME>, EVEX_V256;
11299 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11300 avx512vl_i32_info>;
11301 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11302 avx512vl_i64_info>, VEX_W;
11304 // Patterns to implement vnot using vpternlog instead of creating all ones
11305 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11306 // so that the result is only dependent on src0. But we use the same source
11307 // for all operands to prevent a false dependency.
11308 // TODO: We should maybe have a more generalized algorithm for folding to
11310 let Predicates = [HasAVX512] in {
11311 def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11312 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11315 let Predicates = [HasAVX512, NoVLX] in {
11316 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11319 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11320 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11321 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11322 (i8 15)), sub_xmm)>;
11323 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11326 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11327 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11328 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11329 (i8 15)), sub_ymm)>;
11332 let Predicates = [HasVLX] in {
11333 def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11334 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11335 def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11336 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11339 //===----------------------------------------------------------------------===//
11340 // AVX-512 - FixupImm
11341 //===----------------------------------------------------------------------===//
11343 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11344 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11345 X86VectorVTInfo TblVT>{
11346 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11347 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11348 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11349 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11350 (OpNode (_.VT _.RC:$src1),
11352 (TblVT.VT _.RC:$src3),
11354 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11355 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11356 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11357 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11358 (OpNode (_.VT _.RC:$src1),
11360 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11362 (i32 FROUND_CURRENT))>,
11363 Sched<[sched.Folded, ReadAfterLd]>;
11364 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11365 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11366 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11367 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11368 (OpNode (_.VT _.RC:$src1),
11370 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11372 (i32 FROUND_CURRENT))>,
11373 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11374 } // Constraints = "$src1 = $dst"
11377 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11378 SDNode OpNode, X86FoldableSchedWrite sched,
11379 X86VectorVTInfo _, X86VectorVTInfo TblVT>{
11380 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11381 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11382 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11383 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11384 "$src2, $src3, {sae}, $src4",
11385 (OpNode (_.VT _.RC:$src1),
11387 (TblVT.VT _.RC:$src3),
11389 (i32 FROUND_NO_EXC))>,
11390 EVEX_B, Sched<[sched]>;
11394 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11395 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11396 X86VectorVTInfo _src3VT> {
11397 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11398 ExeDomain = _.ExeDomain in {
11399 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11400 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11401 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11402 (OpNode (_.VT _.RC:$src1),
11404 (_src3VT.VT _src3VT.RC:$src3),
11406 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11407 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11408 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11409 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11410 "$src2, $src3, {sae}, $src4",
11411 (OpNode (_.VT _.RC:$src1),
11413 (_src3VT.VT _src3VT.RC:$src3),
11415 (i32 FROUND_NO_EXC))>,
11416 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11417 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11418 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11419 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11420 (OpNode (_.VT _.RC:$src1),
11422 (_src3VT.VT (scalar_to_vector
11423 (_src3VT.ScalarLdFrag addr:$src3))),
11425 (i32 FROUND_CURRENT))>,
11426 Sched<[sched.Folded, ReadAfterLd]>;
11430 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11431 AVX512VLVectorVTInfo _Vec,
11432 AVX512VLVectorVTInfo _Tbl> {
11433 let Predicates = [HasAVX512] in
11434 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11435 _Vec.info512, _Tbl.info512>,
11436 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11437 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11438 EVEX_4V, EVEX_V512;
11439 let Predicates = [HasAVX512, HasVLX] in {
11440 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11441 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11442 EVEX_4V, EVEX_V128;
11443 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11444 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11445 EVEX_4V, EVEX_V256;
11449 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11450 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11451 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11452 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11453 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11454 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11455 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11456 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11457 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11458 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11460 // Patterns used to select SSE scalar fp arithmetic instructions from
11463 // (1) a scalar fp operation followed by a blend
11465 // The effect is that the backend no longer emits unnecessary vector
11466 // insert instructions immediately after SSE scalar fp instructions
11467 // like addss or mulss.
11469 // For example, given the following code:
11470 // __m128 foo(__m128 A, __m128 B) {
11475 // Previously we generated:
11476 // addss %xmm0, %xmm1
11477 // movss %xmm1, %xmm0
11479 // We now generate:
11480 // addss %xmm1, %xmm0
11482 // (2) a vector packed single/double fp operation followed by a vector insert
11484 // The effect is that the backend converts the packed fp instruction
11485 // followed by a vector insert into a single SSE scalar fp instruction.
11487 // For example, given the following code:
11488 // __m128 foo(__m128 A, __m128 B) {
11489 // __m128 C = A + B;
11490 // return (__m128) {c[0], a[1], a[2], a[3]};
11493 // Previously we generated:
11494 // addps %xmm0, %xmm1
11495 // movss %xmm1, %xmm0
11497 // We now generate:
11498 // addss %xmm1, %xmm0
11500 // TODO: Some canonicalization in lowering would simplify the number of
11501 // patterns we have to try to match.
11502 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11503 X86VectorVTInfo _, PatLeaf ZeroFP> {
11504 let Predicates = [HasAVX512] in {
11505 // extracted scalar math op with insert via movss
11506 def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector
11507 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11509 (!cast<I>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11510 (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
11512 // vector math op with insert via movss
11513 def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst),
11514 (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))),
11515 (!cast<I>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, _.VT:$src)>;
11517 // extracted masked scalar math op with insert via movss
11518 def : Pat<(MoveNode (_.VT VR128X:$src1),
11520 (X86selects VK1WM:$mask,
11521 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11524 (!cast<I>("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS _.FRC:$src0, VR128X),
11525 VK1WM:$mask, _.VT:$src1,
11526 (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
11528 // extracted masked scalar math op with insert via movss
11529 def : Pat<(MoveNode (_.VT VR128X:$src1),
11531 (X86selects VK1WM:$mask,
11532 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11533 _.FRC:$src2), (_.EltVT ZeroFP)))),
11534 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11535 VK1WM:$mask, _.VT:$src1,
11536 (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
11540 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11541 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11542 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11543 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11545 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11546 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11547 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11548 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11551 //===----------------------------------------------------------------------===//
11552 // AES instructions
11553 //===----------------------------------------------------------------------===//
11555 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11556 let Predicates = [HasVLX, HasVAES] in {
11557 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11558 !cast<Intrinsic>(IntPrefix),
11559 loadv2i64, 0, VR128X, i128mem>,
11560 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11561 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11562 !cast<Intrinsic>(IntPrefix##"_256"),
11563 loadv4i64, 0, VR256X, i256mem>,
11564 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11566 let Predicates = [HasAVX512, HasVAES] in
11567 defm Z : AESI_binop_rm_int<Op, OpStr,
11568 !cast<Intrinsic>(IntPrefix##"_512"),
11569 loadv8i64, 0, VR512, i512mem>,
11570 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11573 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11574 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11575 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11576 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11578 //===----------------------------------------------------------------------===//
11579 // PCLMUL instructions - Carry less multiplication
11580 //===----------------------------------------------------------------------===//
11582 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11583 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11584 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11586 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11587 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11588 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11590 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11591 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11592 EVEX_CD8<64, CD8VF>, VEX_WIG;
11596 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11597 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11598 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11600 //===----------------------------------------------------------------------===//
11602 //===----------------------------------------------------------------------===//
11604 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11605 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11606 let Constraints = "$src1 = $dst",
11607 ExeDomain = VTI.ExeDomain in {
11608 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11609 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11610 "$src3, $src2", "$src2, $src3",
11611 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11612 AVX512FMA3Base, Sched<[sched]>;
11613 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11614 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11615 "$src3, $src2", "$src2, $src3",
11616 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11617 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11619 Sched<[sched.Folded, ReadAfterLd]>;
11623 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11624 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11625 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11626 let Constraints = "$src1 = $dst",
11627 ExeDomain = VTI.ExeDomain in
11628 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11629 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11630 "${src3}"##VTI.BroadcastStr##", $src2",
11631 "$src2, ${src3}"##VTI.BroadcastStr,
11632 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11633 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11634 AVX512FMA3Base, EVEX_B,
11635 Sched<[sched.Folded, ReadAfterLd]>;
11638 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11639 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11640 let Predicates = [HasVBMI2] in
11641 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11643 let Predicates = [HasVBMI2, HasVLX] in {
11644 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11646 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11651 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11652 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11653 let Predicates = [HasVBMI2] in
11654 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11656 let Predicates = [HasVBMI2, HasVLX] in {
11657 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11659 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11663 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11664 SDNode OpNode, X86SchedWriteWidths sched> {
11665 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11666 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11667 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11668 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11669 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11670 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11673 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11674 SDNode OpNode, X86SchedWriteWidths sched> {
11675 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11676 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11677 VEX_W, EVEX_CD8<16, CD8VF>;
11678 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11679 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11680 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11681 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11685 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11686 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11687 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11688 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11691 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11692 avx512vl_i8_info, HasVBMI2>, EVEX,
11694 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11695 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11698 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11699 avx512vl_i8_info, HasVBMI2>, EVEX;
11700 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11701 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11703 //===----------------------------------------------------------------------===//
11705 //===----------------------------------------------------------------------===//
11707 let Constraints = "$src1 = $dst" in
11708 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11709 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11710 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11711 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11712 "$src3, $src2", "$src2, $src3",
11713 (VTI.VT (OpNode VTI.RC:$src1,
11714 VTI.RC:$src2, VTI.RC:$src3))>,
11715 EVEX_4V, T8PD, Sched<[sched]>;
11716 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11717 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11718 "$src3, $src2", "$src2, $src3",
11719 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11720 (VTI.VT (bitconvert
11721 (VTI.LdFrag addr:$src3)))))>,
11722 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11723 Sched<[sched.Folded, ReadAfterLd]>;
11724 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11725 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11726 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11727 "$src2, ${src3}"##VTI.BroadcastStr,
11728 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11729 (VTI.VT (X86VBroadcast
11730 (VTI.ScalarLdFrag addr:$src3))))>,
11731 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11732 T8PD, Sched<[sched.Folded, ReadAfterLd]>;
11735 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11736 X86SchedWriteWidths sched> {
11737 let Predicates = [HasVNNI] in
11738 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11739 let Predicates = [HasVNNI, HasVLX] in {
11740 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11741 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11745 // FIXME: Is there a better scheduler class for VPDP?
11746 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11747 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11748 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11749 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11751 //===----------------------------------------------------------------------===//
11753 //===----------------------------------------------------------------------===//
11755 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11756 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11757 avx512vl_i8_info, HasBITALG>;
11758 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11759 avx512vl_i16_info, HasBITALG>, VEX_W;
11761 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11762 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11764 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11765 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11766 (ins VTI.RC:$src1, VTI.RC:$src2),
11768 "$src2, $src1", "$src1, $src2",
11769 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11770 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11772 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11773 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11775 "$src2, $src1", "$src1, $src2",
11776 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11777 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11778 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11779 Sched<[sched.Folded, ReadAfterLd]>;
11782 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11783 let Predicates = [HasBITALG] in
11784 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11785 let Predicates = [HasBITALG, HasVLX] in {
11786 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11787 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11791 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11792 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11794 //===----------------------------------------------------------------------===//
11796 //===----------------------------------------------------------------------===//
11798 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11799 X86SchedWriteWidths sched> {
11800 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11801 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11803 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11804 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11806 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11811 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11813 EVEX_CD8<8, CD8VF>, T8PD;
11815 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11816 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11817 X86VectorVTInfo BcstVTI>
11818 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11819 let ExeDomain = VTI.ExeDomain in
11820 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11821 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11822 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11823 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11824 (OpNode (VTI.VT VTI.RC:$src1),
11825 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11826 (i8 imm:$src3))>, EVEX_B,
11827 Sched<[sched.Folded, ReadAfterLd]>;
11830 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11831 X86SchedWriteWidths sched> {
11832 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11833 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11834 v64i8_info, v8i64_info>, EVEX_V512;
11835 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11836 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11837 v32i8x_info, v4i64x_info>, EVEX_V256;
11838 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11839 v16i8x_info, v2i64x_info>, EVEX_V128;
11843 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11844 X86GF2P8affineinvqb, SchedWriteVecIMul>,
11845 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11846 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11847 X86GF2P8affineqb, SchedWriteVecIMul>,
11848 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11851 //===----------------------------------------------------------------------===//
11853 //===----------------------------------------------------------------------===//
11855 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11856 Constraints = "$src1 = $dst" in {
11857 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11858 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11859 "v4fmaddps", "$src3, $src2", "$src2, $src3",
11860 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11861 Sched<[SchedWriteFMA.ZMM.Folded]>;
11863 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11864 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11865 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11866 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11867 Sched<[SchedWriteFMA.ZMM.Folded]>;
11869 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11870 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11871 "v4fmaddss", "$src3, $src2", "$src2, $src3",
11872 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11873 Sched<[SchedWriteFMA.Scl.Folded]>;
11875 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11876 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11877 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11878 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11879 Sched<[SchedWriteFMA.Scl.Folded]>;
11882 //===----------------------------------------------------------------------===//
11884 //===----------------------------------------------------------------------===//
11886 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11887 Constraints = "$src1 = $dst" in {
11888 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11889 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11890 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11891 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11892 Sched<[SchedWriteFMA.ZMM.Folded]>;
11894 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11895 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11896 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11897 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11898 Sched<[SchedWriteFMA.ZMM.Folded]>;