OSDN Git Service

[X86] Rename VFPCLASSSS and VFPCLASSSD internal instruction names to include a Z...
[android-x86/external-llvm.git] / lib / Target / X86 / X86InstrAVX512.td
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
13 //
14 //===----------------------------------------------------------------------===//
15
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT).  These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                       string suffix = ""> {
23   RegisterClass RC = rc;
24   ValueType EltVT = eltvt;
25   int NumElts = numelts;
26
27   // Corresponding mask register class.
28   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30   // Corresponding write-mask register class.
31   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33   // The mask VT.
34   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
35
36   // Suffix used in the instruction mnemonic.
37   string Suffix = suffix;
38
39   // VTName is a string name for vector VT. For vector types it will be
40   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41   // It is a little bit complex for scalar types, where NumElts = 1.
42   // In this case we build v4f32 or v2f64
43   string VTName = "v" # !if (!eq (NumElts, 1),
44                         !if (!eq (EltVT.Size, 32), 4,
45                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
46
47   // The vector VT.
48   ValueType VT = !cast<ValueType>(VTName);
49
50   string EltTypeName = !cast<string>(EltVT);
51   // Size of the element type in bits, e.g. 32 for v16i32.
52   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53   int EltSize = EltVT.Size;
54
55   // "i" for integer types and "f" for floating-point types
56   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
57
58   // Size of RC in bits, e.g. 512 for VR512.
59   int Size = VT.Size;
60
61   // The corresponding memory operand, e.g. i512mem for VR512.
62   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64   // FP scalar memory operand for intrinsics - ssmem/sdmem.
65   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
67
68   // Load patterns
69   // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70   //       due to load promotion during legalization
71   PatFrag LdFrag = !cast<PatFrag>("load" #
72                                   !if (!eq (TypeVariantName, "i"),
73                                        !if (!eq (Size, 128), "v2i64",
74                                        !if (!eq (Size, 256), "v4i64",
75                                        !if (!eq (Size, 512), "v8i64",
76                                             VTName))), VTName));
77
78   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                                          !if (!eq (TypeVariantName, "i"),
80                                                !if (!eq (Size, 128), "v2i64",
81                                                !if (!eq (Size, 256), "v4i64",
82                                                !if (!eq (Size, 512), "v8i64",
83                                                    VTName))), VTName));
84
85   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
86
87   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88                                           !cast<ComplexPattern>("sse_load_f32"),
89                                     !if (!eq (EltTypeName, "f64"),
90                                           !cast<ComplexPattern>("sse_load_f64"),
91                                     ?));
92
93   // The corresponding float type, e.g. v16f32 for v16i32
94   // Note: For EltSize < 32, FloatVT is illegal and TableGen
95   //       fails to compile, so we choose FloatVT = VT
96   ValueType FloatVT = !cast<ValueType>(
97                         !if (!eq (!srl(EltSize,5),0),
98                              VTName,
99                              !if (!eq(TypeVariantName, "i"),
100                                   "v" # NumElts # "f" # EltSize,
101                                   VTName)));
102
103   ValueType IntVT = !cast<ValueType>(
104                         !if (!eq (!srl(EltSize,5),0),
105                              VTName,
106                              !if (!eq(TypeVariantName, "f"),
107                                   "v" # NumElts # "i" # EltSize,
108                                   VTName)));
109   // The string to specify embedded broadcast in assembly.
110   string BroadcastStr = "{1to" # NumElts # "}";
111
112   // 8-bit compressed displacement tuple/subvector format.  This is only
113   // defined for NumElts <= 8.
114   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
117   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118                           !if (!eq (Size, 256), sub_ymm, ?));
119
120   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122                      SSEPackedInt));
123
124   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
126   // A vector tye of the same width with element type i64. This is used to
127   // create patterns for logic ops.
128   ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
130   // A vector type of the same width with element type i32.  This is used to
131   // create the canonical constant zero node ImmAllZerosV.
132   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133   dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
134
135   string ZSuffix = !if (!eq (Size, 128), "Z128",
136                    !if (!eq (Size, 256), "Z256", "Z"));
137 }
138
139 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
145
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
150 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
151 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
152 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
153
154 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
155 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
156 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
157 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
158 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
159 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
160
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
164 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
165 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
166 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
167
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169                            X86VectorVTInfo i128> {
170   X86VectorVTInfo info512 = i512;
171   X86VectorVTInfo info256 = i256;
172   X86VectorVTInfo info128 = i128;
173 }
174
175 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176                                              v16i8x_info>;
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178                                              v8i16x_info>;
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180                                              v4i32x_info>;
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182                                              v2i64x_info>;
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184                                              v4f32x_info>;
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186                                              v2f64x_info>;
187
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189                        ValueType _vt> {
190   RegisterClass KRC = _krc;
191   RegisterClass KRCWM = _krcwm;
192   ValueType KVT = _vt;
193 }
194
195 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
196 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
203 // This multiclass generates the masking variants from the non-masking
204 // variant.  It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                   dag Outs,
209                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                   string OpcodeStr,
211                                   string AttSrcAsm, string IntelSrcAsm,
212                                   list<dag> Pattern,
213                                   list<dag> MaskingPattern,
214                                   list<dag> ZeroMaskingPattern,
215                                   string MaskingConstraint = "",
216                                   bit IsCommutable = 0,
217                                   bit IsKCommutable = 0> {
218   let isCommutable = IsCommutable in
219     def NAME: AVX512<O, F, Outs, Ins,
220                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
221                                      "$dst, "#IntelSrcAsm#"}",
222                        Pattern>;
223
224   // Prefer over VMOV*rrk Pat<>
225   let isCommutable = IsKCommutable in
226     def NAME#k: AVX512<O, F, Outs, MaskingIns,
227                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
228                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
229                        MaskingPattern>,
230               EVEX_K {
231       // In case of the 3src subclass this is overridden with a let.
232       string Constraints = MaskingConstraint;
233     }
234
235   // Zero mask does not add any restrictions to commute operands transformation.
236   // So, it is Ok to use IsCommutable instead of IsKCommutable.
237   let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
238     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
239                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
240                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
241                        ZeroMaskingPattern>,
242               EVEX_KZ;
243 }
244
245
246 // Common base class of AVX512_maskable and AVX512_maskable_3src.
247 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
248                                   dag Outs,
249                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
250                                   string OpcodeStr,
251                                   string AttSrcAsm, string IntelSrcAsm,
252                                   dag RHS, dag MaskingRHS,
253                                   SDNode Select = vselect,
254                                   string MaskingConstraint = "",
255                                   bit IsCommutable = 0,
256                                   bit IsKCommutable = 0> :
257   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
258                          AttSrcAsm, IntelSrcAsm,
259                          [(set _.RC:$dst, RHS)],
260                          [(set _.RC:$dst, MaskingRHS)],
261                          [(set _.RC:$dst,
262                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
263                          MaskingConstraint, IsCommutable,
264                          IsKCommutable>;
265
266 // This multiclass generates the unconditional/non-masking, the masking and
267 // the zero-masking variant of the vector instruction.  In the masking case, the
268 // perserved vector elements come from a new dummy input operand tied to $dst.
269 // This version uses a separate dag for non-masking and masking.
270 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
271                            dag Outs, dag Ins, string OpcodeStr,
272                            string AttSrcAsm, string IntelSrcAsm,
273                            dag RHS, dag MaskRHS,
274                            bit IsCommutable = 0, bit IsKCommutable = 0,
275                            SDNode Select = vselect> :
276    AVX512_maskable_custom<O, F, Outs, Ins,
277                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
278                           !con((ins _.KRCWM:$mask), Ins),
279                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
280                           [(set _.RC:$dst, RHS)],
281                           [(set _.RC:$dst,
282                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
283                           [(set _.RC:$dst,
284                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
285                           "$src0 = $dst", IsCommutable, IsKCommutable>;
286
287 // This multiclass generates the unconditional/non-masking, the masking and
288 // the zero-masking variant of the vector instruction.  In the masking case, the
289 // perserved vector elements come from a new dummy input operand tied to $dst.
290 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
291                            dag Outs, dag Ins, string OpcodeStr,
292                            string AttSrcAsm, string IntelSrcAsm,
293                            dag RHS,
294                            bit IsCommutable = 0, bit IsKCommutable = 0,
295                            SDNode Select = vselect> :
296    AVX512_maskable_common<O, F, _, Outs, Ins,
297                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
298                           !con((ins _.KRCWM:$mask), Ins),
299                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
300                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
301                           Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
302
303 // This multiclass generates the unconditional/non-masking, the masking and
304 // the zero-masking variant of the scalar instruction.
305 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
306                            dag Outs, dag Ins, string OpcodeStr,
307                            string AttSrcAsm, string IntelSrcAsm,
308                            dag RHS,
309                            bit IsCommutable = 0> :
310    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
311                    RHS, IsCommutable, 0, X86selects>;
312
313 // Similar to AVX512_maskable but in this case one of the source operands
314 // ($src1) is already tied to $dst so we just use that for the preserved
315 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
316 // $src1.
317 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
318                                 dag Outs, dag NonTiedIns, string OpcodeStr,
319                                 string AttSrcAsm, string IntelSrcAsm,
320                                 dag RHS,
321                                 bit IsCommutable = 0,
322                                 bit IsKCommutable = 0,
323                                 SDNode Select = vselect,
324                                 bit MaskOnly = 0> :
325    AVX512_maskable_common<O, F, _, Outs,
326                           !con((ins _.RC:$src1), NonTiedIns),
327                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
328                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
329                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
330                           !if(MaskOnly, (null_frag), RHS),
331                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
332                           Select, "", IsCommutable, IsKCommutable>;
333
334 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
335 // operand differs from the output VT. This requires a bitconvert on
336 // the preserved vector going into the vselect.
337 // NOTE: The unmasked pattern is disabled.
338 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
339                                      X86VectorVTInfo InVT,
340                                      dag Outs, dag NonTiedIns, string OpcodeStr,
341                                      string AttSrcAsm, string IntelSrcAsm,
342                                      dag RHS, bit IsCommutable = 0> :
343    AVX512_maskable_common<O, F, OutVT, Outs,
344                           !con((ins InVT.RC:$src1), NonTiedIns),
345                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
346                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
347                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
348                           (vselect InVT.KRCWM:$mask, RHS,
349                            (bitconvert InVT.RC:$src1)),
350                            vselect, "", IsCommutable>;
351
352 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
353                                      dag Outs, dag NonTiedIns, string OpcodeStr,
354                                      string AttSrcAsm, string IntelSrcAsm,
355                                      dag RHS,
356                                      bit IsCommutable = 0,
357                                      bit IsKCommutable = 0,
358                                      bit MaskOnly = 0> :
359    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
360                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
361                         X86selects, MaskOnly>;
362
363 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
364                                   dag Outs, dag Ins,
365                                   string OpcodeStr,
366                                   string AttSrcAsm, string IntelSrcAsm,
367                                   list<dag> Pattern> :
368    AVX512_maskable_custom<O, F, Outs, Ins,
369                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
370                           !con((ins _.KRCWM:$mask), Ins),
371                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
372                           "$src0 = $dst">;
373
374 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
375                                        dag Outs, dag NonTiedIns,
376                                        string OpcodeStr,
377                                        string AttSrcAsm, string IntelSrcAsm,
378                                        list<dag> Pattern> :
379    AVX512_maskable_custom<O, F, Outs,
380                           !con((ins _.RC:$src1), NonTiedIns),
381                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
382                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
383                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
384                           "">;
385
386 // Instruction with mask that puts result in mask register,
387 // like "compare" and "vptest"
388 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
389                                   dag Outs,
390                                   dag Ins, dag MaskingIns,
391                                   string OpcodeStr,
392                                   string AttSrcAsm, string IntelSrcAsm,
393                                   list<dag> Pattern,
394                                   list<dag> MaskingPattern,
395                                   bit IsCommutable = 0> {
396     let isCommutable = IsCommutable in
397     def NAME: AVX512<O, F, Outs, Ins,
398                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
399                                      "$dst, "#IntelSrcAsm#"}",
400                        Pattern>;
401
402     def NAME#k: AVX512<O, F, Outs, MaskingIns,
403                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
404                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
405                        MaskingPattern>, EVEX_K;
406 }
407
408 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
409                                   dag Outs,
410                                   dag Ins, dag MaskingIns,
411                                   string OpcodeStr,
412                                   string AttSrcAsm, string IntelSrcAsm,
413                                   dag RHS, dag MaskingRHS,
414                                   bit IsCommutable = 0> :
415   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
416                          AttSrcAsm, IntelSrcAsm,
417                          [(set _.KRC:$dst, RHS)],
418                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
419
420 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
421                            dag Outs, dag Ins, string OpcodeStr,
422                            string AttSrcAsm, string IntelSrcAsm,
423                            dag RHS, bit IsCommutable = 0> :
424    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
425                           !con((ins _.KRCWM:$mask), Ins),
426                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
427                           (and _.KRCWM:$mask, RHS), IsCommutable>;
428
429 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
430                            dag Outs, dag Ins, string OpcodeStr,
431                            string AttSrcAsm, string IntelSrcAsm> :
432    AVX512_maskable_custom_cmp<O, F, Outs,
433                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
434                              AttSrcAsm, IntelSrcAsm, [], []>;
435
436 // This multiclass generates the unconditional/non-masking, the masking and
437 // the zero-masking variant of the vector instruction.  In the masking case, the
438 // perserved vector elements come from a new dummy input operand tied to $dst.
439 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
440                            dag Outs, dag Ins, string OpcodeStr,
441                            string AttSrcAsm, string IntelSrcAsm,
442                            dag RHS, dag MaskedRHS,
443                            bit IsCommutable = 0, SDNode Select = vselect> :
444    AVX512_maskable_custom<O, F, Outs, Ins,
445                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
446                           !con((ins _.KRCWM:$mask), Ins),
447                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
448                           [(set _.RC:$dst, RHS)],
449                           [(set _.RC:$dst,
450                                 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
451                           [(set _.RC:$dst,
452                                 (Select _.KRCWM:$mask, MaskedRHS,
453                                         _.ImmAllZerosV))],
454                           "$src0 = $dst", IsCommutable>;
455
456
457 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
458 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
459 // swizzled by ExecutionDomainFix to pxor.
460 // We set canFoldAsLoad because this can be converted to a constant-pool
461 // load of an all-zeros value if folding it would be beneficial.
462 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
463     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
464 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
465                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
466 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
467                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
468 }
469
470 // Alias instructions that allow VPTERNLOG to be used with a mask to create
471 // a mix of all ones and all zeros elements. This is done this way to force
472 // the same register to be used as input for all three sources.
473 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
474 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
475                                 (ins VK16WM:$mask), "",
476                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
477                                                       (v16i32 immAllOnesV),
478                                                       (v16i32 immAllZerosV)))]>;
479 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
480                                 (ins VK8WM:$mask), "",
481                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
482                                            (bc_v8i64 (v16i32 immAllOnesV)),
483                                            (bc_v8i64 (v16i32 immAllZerosV))))]>;
484 }
485
486 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
487     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
488 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
489                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
490 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
491                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
492 }
493
494 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
495 // This is expanded by ExpandPostRAPseudos.
496 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
497     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
498   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
499                           [(set FR32X:$dst, fp32imm0)]>;
500   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
501                           [(set FR64X:$dst, fpimm0)]>;
502 }
503
504 //===----------------------------------------------------------------------===//
505 // AVX-512 - VECTOR INSERT
506 //
507
508 // Supports two different pattern operators for mask and unmasked ops. Allows
509 // null_frag to be passed for one.
510 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
511                                   X86VectorVTInfo To,
512                                   SDPatternOperator vinsert_insert,
513                                   SDPatternOperator vinsert_for_mask,
514                                   X86FoldableSchedWrite sched> {
515   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
516     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
517                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
518                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
519                    "$src3, $src2, $src1", "$src1, $src2, $src3",
520                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
521                                          (From.VT From.RC:$src2),
522                                          (iPTR imm)),
523                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
524                                            (From.VT From.RC:$src2),
525                                            (iPTR imm))>,
526                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
527     let mayLoad = 1 in
528     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
529                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
530                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
531                    "$src3, $src2, $src1", "$src1, $src2, $src3",
532                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
533                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
534                                (iPTR imm)),
535                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
537                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
538                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
539                    Sched<[sched.Folded, ReadAfterLd]>;
540   }
541 }
542
543 // Passes the same pattern operator for masked and unmasked ops.
544 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
545                             X86VectorVTInfo To,
546                             SDPatternOperator vinsert_insert,
547                             X86FoldableSchedWrite sched> :
548   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
549
550 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
551                        X86VectorVTInfo To, PatFrag vinsert_insert,
552                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
553   let Predicates = p in {
554     def : Pat<(vinsert_insert:$ins
555                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
556               (To.VT (!cast<Instruction>(InstrStr#"rr")
557                      To.RC:$src1, From.RC:$src2,
558                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
559
560     def : Pat<(vinsert_insert:$ins
561                   (To.VT To.RC:$src1),
562                   (From.VT (bitconvert (From.LdFrag addr:$src2))),
563                   (iPTR imm)),
564               (To.VT (!cast<Instruction>(InstrStr#"rm")
565                   To.RC:$src1, addr:$src2,
566                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
567   }
568 }
569
570 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
571                             ValueType EltVT64, int Opcode256,
572                             X86FoldableSchedWrite sched> {
573
574   let Predicates = [HasVLX] in
575     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
576                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
577                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
578                                  vinsert128_insert, sched>, EVEX_V256;
579
580   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
581                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
582                                  X86VectorVTInfo<16, EltVT32, VR512>,
583                                  vinsert128_insert, sched>, EVEX_V512;
584
585   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
586                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
587                                  X86VectorVTInfo< 8, EltVT64, VR512>,
588                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
589
590   // Even with DQI we'd like to only use these instructions for masking.
591   let Predicates = [HasVLX, HasDQI] in
592     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
593                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
594                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
595                                    null_frag, vinsert128_insert, sched>,
596                                    VEX_W1X, EVEX_V256;
597
598   // Even with DQI we'd like to only use these instructions for masking.
599   let Predicates = [HasDQI] in {
600     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
601                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
602                                  X86VectorVTInfo< 8, EltVT64, VR512>,
603                                  null_frag, vinsert128_insert, sched>,
604                                  VEX_W, EVEX_V512;
605
606     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
607                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
608                                    X86VectorVTInfo<16, EltVT32, VR512>,
609                                    null_frag, vinsert256_insert, sched>,
610                                    EVEX_V512;
611   }
612 }
613
614 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
615 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
616 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
617
618 // Codegen pattern with the alternative types,
619 // Even with AVX512DQ we'll still use these for unmasked operations.
620 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
621               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
622 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
623               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
624
625 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
626               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
627 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
628               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
629
630 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
631               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
632 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
633               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
634
635 // Codegen pattern with the alternative types insert VEC128 into VEC256
636 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
637               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
638 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
639               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
640 // Codegen pattern with the alternative types insert VEC128 into VEC512
641 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
642               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
643 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
644                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
645 // Codegen pattern with the alternative types insert VEC256 into VEC512
646 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
647               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
648 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
649               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
650
651
652 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
653                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
654                                  PatFrag vinsert_insert,
655                                  SDNodeXForm INSERT_get_vinsert_imm,
656                                  list<Predicate> p> {
657 let Predicates = p in {
658   def : Pat<(Cast.VT
659              (vselect Cast.KRCWM:$mask,
660                       (bitconvert
661                        (vinsert_insert:$ins (To.VT To.RC:$src1),
662                                             (From.VT From.RC:$src2),
663                                             (iPTR imm))),
664                       Cast.RC:$src0)),
665             (!cast<Instruction>(InstrStr#"rrk")
666              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
667              (INSERT_get_vinsert_imm To.RC:$ins))>;
668   def : Pat<(Cast.VT
669              (vselect Cast.KRCWM:$mask,
670                       (bitconvert
671                        (vinsert_insert:$ins (To.VT To.RC:$src1),
672                                             (From.VT
673                                              (bitconvert
674                                               (From.LdFrag addr:$src2))),
675                                             (iPTR imm))),
676                       Cast.RC:$src0)),
677             (!cast<Instruction>(InstrStr#"rmk")
678              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
679              (INSERT_get_vinsert_imm To.RC:$ins))>;
680
681   def : Pat<(Cast.VT
682              (vselect Cast.KRCWM:$mask,
683                       (bitconvert
684                        (vinsert_insert:$ins (To.VT To.RC:$src1),
685                                             (From.VT From.RC:$src2),
686                                             (iPTR imm))),
687                       Cast.ImmAllZerosV)),
688             (!cast<Instruction>(InstrStr#"rrkz")
689              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
690              (INSERT_get_vinsert_imm To.RC:$ins))>;
691   def : Pat<(Cast.VT
692              (vselect Cast.KRCWM:$mask,
693                       (bitconvert
694                        (vinsert_insert:$ins (To.VT To.RC:$src1),
695                                             (From.VT
696                                              (bitconvert
697                                               (From.LdFrag addr:$src2))),
698                                             (iPTR imm))),
699                       Cast.ImmAllZerosV)),
700             (!cast<Instruction>(InstrStr#"rmkz")
701              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
702              (INSERT_get_vinsert_imm To.RC:$ins))>;
703 }
704 }
705
706 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
707                              v8f32x_info, vinsert128_insert,
708                              INSERT_get_vinsert128_imm, [HasVLX]>;
709 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
710                              v4f64x_info, vinsert128_insert,
711                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
712
713 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
714                              v8i32x_info, vinsert128_insert,
715                              INSERT_get_vinsert128_imm, [HasVLX]>;
716 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
717                              v8i32x_info, vinsert128_insert,
718                              INSERT_get_vinsert128_imm, [HasVLX]>;
719 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
720                              v8i32x_info, vinsert128_insert,
721                              INSERT_get_vinsert128_imm, [HasVLX]>;
722 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
723                              v4i64x_info, vinsert128_insert,
724                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
725 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
726                              v4i64x_info, vinsert128_insert,
727                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
728 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
729                              v4i64x_info, vinsert128_insert,
730                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
731
732 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
733                              v16f32_info, vinsert128_insert,
734                              INSERT_get_vinsert128_imm, [HasAVX512]>;
735 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
736                              v8f64_info, vinsert128_insert,
737                              INSERT_get_vinsert128_imm, [HasDQI]>;
738
739 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
740                              v16i32_info, vinsert128_insert,
741                              INSERT_get_vinsert128_imm, [HasAVX512]>;
742 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
743                              v16i32_info, vinsert128_insert,
744                              INSERT_get_vinsert128_imm, [HasAVX512]>;
745 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
746                              v16i32_info, vinsert128_insert,
747                              INSERT_get_vinsert128_imm, [HasAVX512]>;
748 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
749                              v8i64_info, vinsert128_insert,
750                              INSERT_get_vinsert128_imm, [HasDQI]>;
751 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
752                              v8i64_info, vinsert128_insert,
753                              INSERT_get_vinsert128_imm, [HasDQI]>;
754 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
755                              v8i64_info, vinsert128_insert,
756                              INSERT_get_vinsert128_imm, [HasDQI]>;
757
758 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
759                              v16f32_info, vinsert256_insert,
760                              INSERT_get_vinsert256_imm, [HasDQI]>;
761 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
762                              v8f64_info, vinsert256_insert,
763                              INSERT_get_vinsert256_imm, [HasAVX512]>;
764
765 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
766                              v16i32_info, vinsert256_insert,
767                              INSERT_get_vinsert256_imm, [HasDQI]>;
768 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
769                              v16i32_info, vinsert256_insert,
770                              INSERT_get_vinsert256_imm, [HasDQI]>;
771 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
772                              v16i32_info, vinsert256_insert,
773                              INSERT_get_vinsert256_imm, [HasDQI]>;
774 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
775                              v8i64_info, vinsert256_insert,
776                              INSERT_get_vinsert256_imm, [HasAVX512]>;
777 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
778                              v8i64_info, vinsert256_insert,
779                              INSERT_get_vinsert256_imm, [HasAVX512]>;
780 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
781                              v8i64_info, vinsert256_insert,
782                              INSERT_get_vinsert256_imm, [HasAVX512]>;
783
784 // vinsertps - insert f32 to XMM
785 let ExeDomain = SSEPackedSingle in {
786 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
787       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
788       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
789       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
790       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
791 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
792       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
793       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
794       [(set VR128X:$dst, (X86insertps VR128X:$src1,
795                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
796                           imm:$src3))]>,
797       EVEX_4V, EVEX_CD8<32, CD8VT1>,
798       Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
799 }
800
801 //===----------------------------------------------------------------------===//
802 // AVX-512 VECTOR EXTRACT
803 //---
804
805 // Supports two different pattern operators for mask and unmasked ops. Allows
806 // null_frag to be passed for one.
807 multiclass vextract_for_size_split<int Opcode,
808                                    X86VectorVTInfo From, X86VectorVTInfo To,
809                                    SDPatternOperator vextract_extract,
810                                    SDPatternOperator vextract_for_mask,
811                                    SchedWrite SchedRR, SchedWrite SchedMR> {
812
813   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
814     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
815                 (ins From.RC:$src1, u8imm:$idx),
816                 "vextract" # To.EltTypeName # "x" # To.NumElts,
817                 "$idx, $src1", "$src1, $idx",
818                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
819                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
820                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
821
822     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
823                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
824                     "vextract" # To.EltTypeName # "x" # To.NumElts #
825                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
826                     [(store (To.VT (vextract_extract:$idx
827                                     (From.VT From.RC:$src1), (iPTR imm))),
828                              addr:$dst)]>, EVEX,
829                     Sched<[SchedMR]>;
830
831     let mayStore = 1, hasSideEffects = 0 in
832     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
833                     (ins To.MemOp:$dst, To.KRCWM:$mask,
834                                         From.RC:$src1, u8imm:$idx),
835                      "vextract" # To.EltTypeName # "x" # To.NumElts #
836                           "\t{$idx, $src1, $dst {${mask}}|"
837                           "$dst {${mask}}, $src1, $idx}", []>,
838                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
839   }
840 }
841
842 // Passes the same pattern operator for masked and unmasked ops.
843 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
844                              X86VectorVTInfo To,
845                              SDPatternOperator vextract_extract,
846                              SchedWrite SchedRR, SchedWrite SchedMR> :
847   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
848
849 // Codegen pattern for the alternative types
850 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
851                 X86VectorVTInfo To, PatFrag vextract_extract,
852                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
853   let Predicates = p in {
854      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
855                (To.VT (!cast<Instruction>(InstrStr#"rr")
856                           From.RC:$src1,
857                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
858      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
859                               (iPTR imm))), addr:$dst),
860                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
861                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
862   }
863 }
864
865 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
866                              ValueType EltVT64, int Opcode256,
867                              SchedWrite SchedRR, SchedWrite SchedMR> {
868   let Predicates = [HasAVX512] in {
869     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
870                                    X86VectorVTInfo<16, EltVT32, VR512>,
871                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
872                                    vextract128_extract, SchedRR, SchedMR>,
873                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
874     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
875                                    X86VectorVTInfo< 8, EltVT64, VR512>,
876                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
877                                    vextract256_extract, SchedRR, SchedMR>,
878                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
879   }
880   let Predicates = [HasVLX] in
881     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
882                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
883                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
884                                  vextract128_extract, SchedRR, SchedMR>,
885                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
886
887   // Even with DQI we'd like to only use these instructions for masking.
888   let Predicates = [HasVLX, HasDQI] in
889     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
890                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
891                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
892                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
893                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
894
895   // Even with DQI we'd like to only use these instructions for masking.
896   let Predicates = [HasDQI] in {
897     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
898                                  X86VectorVTInfo< 8, EltVT64, VR512>,
899                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
900                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
901                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
902     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
903                                  X86VectorVTInfo<16, EltVT32, VR512>,
904                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
905                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
906                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
907   }
908 }
909
910 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
911 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
912 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
913
914 // extract_subvector codegen patterns with the alternative types.
915 // Even with AVX512DQ we'll still use these for unmasked operations.
916 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
917           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
918 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
919           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
920
921 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
922           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
923 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
924           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
925
926 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
927           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
928 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
929           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
930
931 // Codegen pattern with the alternative types extract VEC128 from VEC256
932 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
933           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
934 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
935           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
936
937 // Codegen pattern with the alternative types extract VEC128 from VEC512
938 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
939                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
940 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
941                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
942 // Codegen pattern with the alternative types extract VEC256 from VEC512
943 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
944                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
945 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
946                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
947
948
949 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
950 // smaller extract to enable EVEX->VEX.
951 let Predicates = [NoVLX] in {
952 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
953           (v2i64 (VEXTRACTI128rr
954                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
955                   (iPTR 1)))>;
956 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
957           (v2f64 (VEXTRACTF128rr
958                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
959                   (iPTR 1)))>;
960 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
961           (v4i32 (VEXTRACTI128rr
962                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
963                   (iPTR 1)))>;
964 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
965           (v4f32 (VEXTRACTF128rr
966                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
967                   (iPTR 1)))>;
968 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
969           (v8i16 (VEXTRACTI128rr
970                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
971                   (iPTR 1)))>;
972 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
973           (v16i8 (VEXTRACTI128rr
974                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
975                   (iPTR 1)))>;
976 }
977
978 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
979 // smaller extract to enable EVEX->VEX.
980 let Predicates = [HasVLX] in {
981 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
982           (v2i64 (VEXTRACTI32x4Z256rr
983                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
984                   (iPTR 1)))>;
985 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
986           (v2f64 (VEXTRACTF32x4Z256rr
987                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
988                   (iPTR 1)))>;
989 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
990           (v4i32 (VEXTRACTI32x4Z256rr
991                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
992                   (iPTR 1)))>;
993 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
994           (v4f32 (VEXTRACTF32x4Z256rr
995                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
996                   (iPTR 1)))>;
997 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
998           (v8i16 (VEXTRACTI32x4Z256rr
999                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1000                   (iPTR 1)))>;
1001 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1002           (v16i8 (VEXTRACTI32x4Z256rr
1003                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1004                   (iPTR 1)))>;
1005 }
1006
1007
1008 // Additional patterns for handling a bitcast between the vselect and the
1009 // extract_subvector.
1010 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1011                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1012                                   PatFrag vextract_extract,
1013                                   SDNodeXForm EXTRACT_get_vextract_imm,
1014                                   list<Predicate> p> {
1015 let Predicates = p in {
1016   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1017                               (bitconvert
1018                                (To.VT (vextract_extract:$ext
1019                                        (From.VT From.RC:$src), (iPTR imm)))),
1020                               To.RC:$src0)),
1021             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1022                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1023                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1024
1025   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1026                               (bitconvert
1027                                (To.VT (vextract_extract:$ext
1028                                        (From.VT From.RC:$src), (iPTR imm)))),
1029                               Cast.ImmAllZerosV)),
1030             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1031                       Cast.KRCWM:$mask, From.RC:$src,
1032                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1033 }
1034 }
1035
1036 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1037                               v4f32x_info, vextract128_extract,
1038                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1040                               v2f64x_info, vextract128_extract,
1041                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1042
1043 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1044                               v4i32x_info, vextract128_extract,
1045                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1047                               v4i32x_info, vextract128_extract,
1048                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1050                               v4i32x_info, vextract128_extract,
1051                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1052 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1053                               v2i64x_info, vextract128_extract,
1054                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1055 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1056                               v2i64x_info, vextract128_extract,
1057                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1058 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1059                               v2i64x_info, vextract128_extract,
1060                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1061
1062 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1063                               v4f32x_info, vextract128_extract,
1064                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1065 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1066                               v2f64x_info, vextract128_extract,
1067                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1068
1069 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1070                               v4i32x_info, vextract128_extract,
1071                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1073                               v4i32x_info, vextract128_extract,
1074                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1076                               v4i32x_info, vextract128_extract,
1077                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1078 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1079                               v2i64x_info, vextract128_extract,
1080                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1081 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1082                               v2i64x_info, vextract128_extract,
1083                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1084 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1085                               v2i64x_info, vextract128_extract,
1086                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1087
1088 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1089                               v8f32x_info, vextract256_extract,
1090                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1091 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1092                               v4f64x_info, vextract256_extract,
1093                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1094
1095 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1096                               v8i32x_info, vextract256_extract,
1097                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1098 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1099                               v8i32x_info, vextract256_extract,
1100                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1101 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1102                               v8i32x_info, vextract256_extract,
1103                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1104 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1105                               v4i64x_info, vextract256_extract,
1106                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1107 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1108                               v4i64x_info, vextract256_extract,
1109                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1110 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1111                               v4i64x_info, vextract256_extract,
1112                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1113
1114 // vextractps - extract 32 bits from XMM
1115 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1116       (ins VR128X:$src1, u8imm:$src2),
1117       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1118       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1119       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1120
1121 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1122       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1123       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1124       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1125                           addr:$dst)]>,
1126       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1127
1128 //===---------------------------------------------------------------------===//
1129 // AVX-512 BROADCAST
1130 //---
1131 // broadcast with a scalar argument.
1132 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1133                             string Name,
1134                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1135   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136             (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1137              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139                                   (X86VBroadcast SrcInfo.FRC:$src),
1140                                   DestInfo.RC:$src0)),
1141             (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1142              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145                                   (X86VBroadcast SrcInfo.FRC:$src),
1146                                   DestInfo.ImmAllZerosV)),
1147             (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1148              DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1149 }
1150
1151 // Split version to allow mask and broadcast node to be different types. This
1152 // helps support the 32x2 broadcasts.
1153 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1154                                      string Name,
1155                                      SchedWrite SchedRR, SchedWrite SchedRM,
1156                                      X86VectorVTInfo MaskInfo,
1157                                      X86VectorVTInfo DestInfo,
1158                                      X86VectorVTInfo SrcInfo,
1159                                      SDPatternOperator UnmaskedOp = X86VBroadcast> {
1160   let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1161   defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1162                    (outs MaskInfo.RC:$dst),
1163                    (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1164                    (MaskInfo.VT
1165                     (bitconvert
1166                      (DestInfo.VT
1167                       (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1168                    (MaskInfo.VT
1169                     (bitconvert
1170                      (DestInfo.VT
1171                       (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1172                    T8PD, EVEX, Sched<[SchedRR]>;
1173   let mayLoad = 1 in
1174   defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1175                    (outs MaskInfo.RC:$dst),
1176                    (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1177                    (MaskInfo.VT
1178                     (bitconvert
1179                      (DestInfo.VT (UnmaskedOp
1180                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1181                    (MaskInfo.VT
1182                     (bitconvert
1183                      (DestInfo.VT (X86VBroadcast
1184                                    (SrcInfo.ScalarLdFrag addr:$src)))))>,
1185                    T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1186                    Sched<[SchedRM]>;
1187   }
1188
1189   def : Pat<(MaskInfo.VT
1190              (bitconvert
1191               (DestInfo.VT (UnmaskedOp
1192                             (SrcInfo.VT (scalar_to_vector
1193                                          (SrcInfo.ScalarLdFrag addr:$src))))))),
1194             (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1195   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1196                           (bitconvert
1197                            (DestInfo.VT
1198                             (X86VBroadcast
1199                              (SrcInfo.VT (scalar_to_vector
1200                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1201                           MaskInfo.RC:$src0)),
1202             (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1203              MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1204   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1205                           (bitconvert
1206                            (DestInfo.VT
1207                             (X86VBroadcast
1208                              (SrcInfo.VT (scalar_to_vector
1209                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1210                           MaskInfo.ImmAllZerosV)),
1211             (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1212              MaskInfo.KRCWM:$mask, addr:$src)>;
1213 }
1214
1215 // Helper class to force mask and broadcast result to same type.
1216 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1217                                SchedWrite SchedRR, SchedWrite SchedRM,
1218                                X86VectorVTInfo DestInfo,
1219                                X86VectorVTInfo SrcInfo> :
1220   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1221                             DestInfo, DestInfo, SrcInfo>;
1222
1223 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1224                                                        AVX512VLVectorVTInfo _> {
1225   let Predicates = [HasAVX512] in {
1226     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1227                                   WriteFShuffle256Ld, _.info512, _.info128>,
1228               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1229                                       _.info128>,
1230               EVEX_V512;
1231   }
1232
1233   let Predicates = [HasVLX] in {
1234     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1235                                      WriteFShuffle256Ld, _.info256, _.info128>,
1236                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1237                                          _.info128>,
1238                  EVEX_V256;
1239   }
1240 }
1241
1242 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1243                                                        AVX512VLVectorVTInfo _> {
1244   let Predicates = [HasAVX512] in {
1245     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1246                                   WriteFShuffle256Ld, _.info512, _.info128>,
1247               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1248                                       _.info128>,
1249               EVEX_V512;
1250   }
1251
1252   let Predicates = [HasVLX] in {
1253     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1254                                      WriteFShuffle256Ld, _.info256, _.info128>,
1255                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1256                                          _.info128>,
1257                  EVEX_V256;
1258     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1259                                      WriteFShuffle256Ld, _.info128, _.info128>,
1260                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1261                                          _.info128>,
1262                  EVEX_V128;
1263   }
1264 }
1265 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1266                                        avx512vl_f32_info>;
1267 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1268                                        avx512vl_f64_info>, VEX_W1X;
1269
1270 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1271                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1272                                     RegisterClass SrcRC> {
1273   let ExeDomain = _.ExeDomain in
1274   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1275                          (ins SrcRC:$src),
1276                          "vpbroadcast"##_.Suffix, "$src", "$src",
1277                          (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1278                          Sched<[SchedRR]>;
1279 }
1280
1281 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1282                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1283                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1284   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1285   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1286                         (outs _.RC:$dst), (ins GR32:$src),
1287                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1288                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1289                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1290                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1291
1292   def : Pat <(_.VT (OpNode SrcRC:$src)),
1293              (!cast<Instruction>(Name#r)
1294               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1295
1296   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1297              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1298               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1299
1300   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1301              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1302               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1303 }
1304
1305 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1306                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1307                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1308   let Predicates = [prd] in
1309     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1310               OpNode, SrcRC, Subreg>, EVEX_V512;
1311   let Predicates = [prd, HasVLX] in {
1312     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1313               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1314     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1315               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1316   }
1317 }
1318
1319 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1320                                        SDPatternOperator OpNode,
1321                                        RegisterClass SrcRC, Predicate prd> {
1322   let Predicates = [prd] in
1323     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1324                                       SrcRC>, EVEX_V512;
1325   let Predicates = [prd, HasVLX] in {
1326     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1327                                          SrcRC>, EVEX_V256;
1328     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1329                                          SrcRC>, EVEX_V128;
1330   }
1331 }
1332
1333 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1334                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1335 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1336                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1337                        HasBWI>;
1338 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1339                                                  X86VBroadcast, GR32, HasAVX512>;
1340 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1341                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1342
1343 // Provide aliases for broadcast from the same register class that
1344 // automatically does the extract.
1345 multiclass avx512_int_broadcast_rm_lowering<string Name,
1346                                             X86VectorVTInfo DestInfo,
1347                                             X86VectorVTInfo SrcInfo> {
1348   def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1349             (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1350                 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1351 }
1352
1353 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1354                                         AVX512VLVectorVTInfo _, Predicate prd> {
1355   let Predicates = [prd] in {
1356     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1357                                    WriteShuffle256Ld, _.info512, _.info128>,
1358                avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256>,
1359                                   EVEX_V512;
1360     // Defined separately to avoid redefinition.
1361     defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512>;
1362   }
1363   let Predicates = [prd, HasVLX] in {
1364     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1365                                     WriteShuffle256Ld, _.info256, _.info128>,
1366                 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256>,
1367                                  EVEX_V256;
1368     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1369                                     WriteShuffleXLd, _.info128, _.info128>,
1370                                  EVEX_V128;
1371   }
1372 }
1373
1374 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1375                                            avx512vl_i8_info, HasBWI>;
1376 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1377                                            avx512vl_i16_info, HasBWI>;
1378 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1379                                            avx512vl_i32_info, HasAVX512>;
1380 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1381                                            avx512vl_i64_info, HasAVX512>, VEX_W1X;
1382
1383 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1384                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1386                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1387                            (_Dst.VT (X86SubVBroadcast
1388                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1389                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1390                            AVX5128IBase, EVEX;
1391 }
1392
1393 // This should be used for the AVX512DQ broadcast instructions. It disables
1394 // the unmasked patterns so that we only use the DQ instructions when masking
1395 //  is requested.
1396 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1397                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1398   let hasSideEffects = 0, mayLoad = 1 in
1399   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1400                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1401                            (null_frag),
1402                            (_Dst.VT (X86SubVBroadcast
1403                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1404                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1405                            AVX5128IBase, EVEX;
1406 }
1407
1408 let Predicates = [HasAVX512] in {
1409   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1410   def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1411             (VPBROADCASTQZm addr:$src)>;
1412 }
1413
1414 let Predicates = [HasVLX] in {
1415   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1416   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1417             (VPBROADCASTQZ128m addr:$src)>;
1418   def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1419             (VPBROADCASTQZ256m addr:$src)>;
1420 }
1421 let Predicates = [HasVLX, HasBWI] in {
1422   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1423   // This means we'll encounter truncated i32 loads; match that here.
1424   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1425             (VPBROADCASTWZ128m addr:$src)>;
1426   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1427             (VPBROADCASTWZ256m addr:$src)>;
1428   def : Pat<(v8i16 (X86VBroadcast
1429               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430             (VPBROADCASTWZ128m addr:$src)>;
1431   def : Pat<(v16i16 (X86VBroadcast
1432               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1433             (VPBROADCASTWZ256m addr:$src)>;
1434 }
1435
1436 //===----------------------------------------------------------------------===//
1437 // AVX-512 BROADCAST SUBVECTORS
1438 //
1439
1440 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1441                        v16i32_info, v4i32x_info>,
1442                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1443 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1444                        v16f32_info, v4f32x_info>,
1445                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1446 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1447                        v8i64_info, v4i64x_info>, VEX_W,
1448                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1449 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1450                        v8f64_info, v4f64x_info>, VEX_W,
1451                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1452
1453 let Predicates = [HasAVX512] in {
1454 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1455           (VBROADCASTF64X4rm addr:$src)>;
1456 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1457           (VBROADCASTI64X4rm addr:$src)>;
1458 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1459           (VBROADCASTI64X4rm addr:$src)>;
1460 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1461           (VBROADCASTI64X4rm addr:$src)>;
1462
1463 // Provide fallback in case the load node that is used in the patterns above
1464 // is used by additional users, which prevents the pattern selection.
1465 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1466           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1467                            (v4f64 VR256X:$src), 1)>;
1468 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1469           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1470                            (v8f32 VR256X:$src), 1)>;
1471 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1472           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1473                            (v4i64 VR256X:$src), 1)>;
1474 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1475           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1476                            (v8i32 VR256X:$src), 1)>;
1477 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1478           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1479                            (v16i16 VR256X:$src), 1)>;
1480 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1481           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1482                            (v32i8 VR256X:$src), 1)>;
1483
1484 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1485           (VBROADCASTF32X4rm addr:$src)>;
1486 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1487           (VBROADCASTI32X4rm addr:$src)>;
1488 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1489           (VBROADCASTI32X4rm addr:$src)>;
1490 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1491           (VBROADCASTI32X4rm addr:$src)>;
1492
1493 // Patterns for selects of bitcasted operations.
1494 def : Pat<(vselect VK16WM:$mask,
1495                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1496                    (bc_v16f32 (v16i32 immAllZerosV))),
1497           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1498 def : Pat<(vselect VK16WM:$mask,
1499                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1500                    VR512:$src0),
1501           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1502 def : Pat<(vselect VK16WM:$mask,
1503                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1504                    (v16i32 immAllZerosV)),
1505           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1506 def : Pat<(vselect VK16WM:$mask,
1507                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1508                    VR512:$src0),
1509           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1510
1511 def : Pat<(vselect VK8WM:$mask,
1512                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1513                    (bc_v8f64 (v16i32 immAllZerosV))),
1514           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1515 def : Pat<(vselect VK8WM:$mask,
1516                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1517                    VR512:$src0),
1518           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1519 def : Pat<(vselect VK8WM:$mask,
1520                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1521                    (bc_v8i64 (v16i32 immAllZerosV))),
1522           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK8WM:$mask,
1524                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1525                    VR512:$src0),
1526           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1527 }
1528
1529 let Predicates = [HasVLX] in {
1530 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1531                            v8i32x_info, v4i32x_info>,
1532                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1533 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1534                            v8f32x_info, v4f32x_info>,
1535                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1536
1537 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1538           (VBROADCASTF32X4Z256rm addr:$src)>;
1539 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1540           (VBROADCASTI32X4Z256rm addr:$src)>;
1541 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1542           (VBROADCASTI32X4Z256rm addr:$src)>;
1543 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1544           (VBROADCASTI32X4Z256rm addr:$src)>;
1545
1546 // Patterns for selects of bitcasted operations.
1547 def : Pat<(vselect VK8WM:$mask,
1548                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1549                    (bc_v8f32 (v8i32 immAllZerosV))),
1550           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1551 def : Pat<(vselect VK8WM:$mask,
1552                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1553                    VR256X:$src0),
1554           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1555 def : Pat<(vselect VK8WM:$mask,
1556                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1557                    (v8i32 immAllZerosV)),
1558           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1559 def : Pat<(vselect VK8WM:$mask,
1560                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1561                    VR256X:$src0),
1562           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1563
1564
1565 // Provide fallback in case the load node that is used in the patterns above
1566 // is used by additional users, which prevents the pattern selection.
1567 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1568           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1569                               (v2f64 VR128X:$src), 1)>;
1570 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1571           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1572                               (v4f32 VR128X:$src), 1)>;
1573 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1574           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1575                               (v2i64 VR128X:$src), 1)>;
1576 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1577           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1578                               (v4i32 VR128X:$src), 1)>;
1579 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1580           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1581                               (v8i16 VR128X:$src), 1)>;
1582 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1583           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1584                               (v16i8 VR128X:$src), 1)>;
1585 }
1586
1587 let Predicates = [HasVLX, HasDQI] in {
1588 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1589                            v4i64x_info, v2i64x_info>, VEX_W1X,
1590                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1591 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1592                            v4f64x_info, v2f64x_info>, VEX_W1X,
1593                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1594
1595 // Patterns for selects of bitcasted operations.
1596 def : Pat<(vselect VK4WM:$mask,
1597                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1598                    (bc_v4f64 (v8i32 immAllZerosV))),
1599           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1600 def : Pat<(vselect VK4WM:$mask,
1601                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1602                    VR256X:$src0),
1603           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1604 def : Pat<(vselect VK4WM:$mask,
1605                    (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1606                    (bc_v4i64 (v8i32 immAllZerosV))),
1607           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1608 def : Pat<(vselect VK4WM:$mask,
1609                    (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1610                    VR256X:$src0),
1611           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1612 }
1613
1614 let Predicates = [HasDQI] in {
1615 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1616                        v8i64_info, v2i64x_info>, VEX_W,
1617                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1618 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1619                        v16i32_info, v8i32x_info>,
1620                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1621 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1622                        v8f64_info, v2f64x_info>, VEX_W,
1623                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1624 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1625                        v16f32_info, v8f32x_info>,
1626                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1627
1628 // Patterns for selects of bitcasted operations.
1629 def : Pat<(vselect VK16WM:$mask,
1630                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1631                    (bc_v16f32 (v16i32 immAllZerosV))),
1632           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1633 def : Pat<(vselect VK16WM:$mask,
1634                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1635                    VR512:$src0),
1636           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1637 def : Pat<(vselect VK16WM:$mask,
1638                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1639                    (v16i32 immAllZerosV)),
1640           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1641 def : Pat<(vselect VK16WM:$mask,
1642                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1643                    VR512:$src0),
1644           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1645
1646 def : Pat<(vselect VK8WM:$mask,
1647                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1648                    (bc_v8f64 (v16i32 immAllZerosV))),
1649           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1650 def : Pat<(vselect VK8WM:$mask,
1651                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1652                    VR512:$src0),
1653           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1654 def : Pat<(vselect VK8WM:$mask,
1655                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1656                    (bc_v8i64 (v16i32 immAllZerosV))),
1657           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1658 def : Pat<(vselect VK8WM:$mask,
1659                    (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1660                    VR512:$src0),
1661           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1662 }
1663
1664 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1665                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1666   let Predicates = [HasDQI] in
1667     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1668                                           WriteShuffle256Ld, _Dst.info512,
1669                                           _Src.info512, _Src.info128, null_frag>,
1670                                           EVEX_V512;
1671   let Predicates = [HasDQI, HasVLX] in
1672     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1673                                           WriteShuffle256Ld, _Dst.info256,
1674                                           _Src.info256, _Src.info128, null_frag>,
1675                                           EVEX_V256;
1676 }
1677
1678 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1679                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1680   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1681
1682   let Predicates = [HasDQI, HasVLX] in
1683     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1684                                           WriteShuffleXLd, _Dst.info128,
1685                                           _Src.info128, _Src.info128, null_frag>,
1686                                           EVEX_V128;
1687 }
1688
1689 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1690                                           avx512vl_i32_info, avx512vl_i64_info>;
1691 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1692                                           avx512vl_f32_info, avx512vl_f64_info>;
1693
1694 let Predicates = [HasVLX] in {
1695 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1696           (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1697 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1698           (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1699 }
1700
1701 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1702           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1703 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1704           (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1705
1706 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1707           (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1708 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1709           (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1710
1711 //===----------------------------------------------------------------------===//
1712 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1713 //---
1714 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1715                                   X86VectorVTInfo _, RegisterClass KRC> {
1716   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1717                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1718                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1719                   EVEX, Sched<[WriteShuffle]>;
1720 }
1721
1722 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1723                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1724   let Predicates = [HasCDI] in
1725     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1726   let Predicates = [HasCDI, HasVLX] in {
1727     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1728     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1729   }
1730 }
1731
1732 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1733                                                avx512vl_i32_info, VK16>;
1734 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1735                                                avx512vl_i64_info, VK8>, VEX_W;
1736
1737 //===----------------------------------------------------------------------===//
1738 // -- VPERMI2 - 3 source operands form --
1739 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1740                          X86FoldableSchedWrite sched,
1741                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1742 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1743     hasSideEffects = 0 in {
1744   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1745           (ins _.RC:$src2, _.RC:$src3),
1746           OpcodeStr, "$src3, $src2", "$src2, $src3",
1747           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1748           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1749
1750   let mayLoad = 1 in
1751   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1752             (ins _.RC:$src2, _.MemOp:$src3),
1753             OpcodeStr, "$src3, $src2", "$src2, $src3",
1754             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1755                    (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1756             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1757   }
1758 }
1759
1760 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1761                             X86FoldableSchedWrite sched,
1762                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1763   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1764       hasSideEffects = 0, mayLoad = 1 in
1765   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1766               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1767               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1768               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1769               (_.VT (X86VPermt2 _.RC:$src2,
1770                IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1771               AVX5128IBase, EVEX_4V, EVEX_B,
1772               Sched<[sched.Folded, ReadAfterLd]>;
1773 }
1774
1775 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1776                                X86FoldableSchedWrite sched,
1777                                AVX512VLVectorVTInfo VTInfo,
1778                                AVX512VLVectorVTInfo ShuffleMask> {
1779   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1780                            ShuffleMask.info512>,
1781             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1782                              ShuffleMask.info512>, EVEX_V512;
1783   let Predicates = [HasVLX] in {
1784   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1785                                ShuffleMask.info128>,
1786                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1787                                   ShuffleMask.info128>, EVEX_V128;
1788   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1789                                ShuffleMask.info256>,
1790                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1791                                   ShuffleMask.info256>, EVEX_V256;
1792   }
1793 }
1794
1795 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1796                                   X86FoldableSchedWrite sched,
1797                                   AVX512VLVectorVTInfo VTInfo,
1798                                   AVX512VLVectorVTInfo Idx,
1799                                   Predicate Prd> {
1800   let Predicates = [Prd] in
1801   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1802                            Idx.info512>, EVEX_V512;
1803   let Predicates = [Prd, HasVLX] in {
1804   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1805                                Idx.info128>, EVEX_V128;
1806   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1807                                Idx.info256>,  EVEX_V256;
1808   }
1809 }
1810
1811 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1812                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1813 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1814                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1815 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1816                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1817                   VEX_W, EVEX_CD8<16, CD8VF>;
1818 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1819                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1820                   EVEX_CD8<8, CD8VF>;
1821 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1822                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1823 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1824                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1825
1826 // Extra patterns to deal with extra bitcasts due to passthru and index being
1827 // different types on the fp versions.
1828 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1829                                   X86VectorVTInfo IdxVT,
1830                                   X86VectorVTInfo CastVT> {
1831   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1832                              (X86VPermt2 (_.VT _.RC:$src2),
1833                                          (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1834                              (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1835             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1836                                                 _.RC:$src2, _.RC:$src3)>;
1837   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1838                              (X86VPermt2 _.RC:$src2,
1839                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1840                                          (_.LdFrag addr:$src3)),
1841                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1842             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1843                                                 _.RC:$src2, addr:$src3)>;
1844   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1845                              (X86VPermt2 _.RC:$src2,
1846                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1847                                          (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1848                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1849             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1850                                                  _.RC:$src2, addr:$src3)>;
1851 }
1852
1853 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1854 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1855 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1856 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1857
1858 // VPERMT2
1859 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1860                          X86FoldableSchedWrite sched,
1861                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1862 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1863   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1864           (ins IdxVT.RC:$src2, _.RC:$src3),
1865           OpcodeStr, "$src3, $src2", "$src2, $src3",
1866           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1867           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1868
1869   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1870             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1871             OpcodeStr, "$src3, $src2", "$src2, $src3",
1872             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1873                    (bitconvert (_.LdFrag addr:$src3)))), 1>,
1874             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1875   }
1876 }
1877 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1878                             X86FoldableSchedWrite sched,
1879                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1880   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1881   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1882               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1883               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1884               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1885               (_.VT (X86VPermt2 _.RC:$src1,
1886                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1887               AVX5128IBase, EVEX_4V, EVEX_B,
1888               Sched<[sched.Folded, ReadAfterLd]>;
1889 }
1890
1891 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1892                                X86FoldableSchedWrite sched,
1893                                AVX512VLVectorVTInfo VTInfo,
1894                                AVX512VLVectorVTInfo ShuffleMask> {
1895   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1896                               ShuffleMask.info512>,
1897             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1898                               ShuffleMask.info512>, EVEX_V512;
1899   let Predicates = [HasVLX] in {
1900   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1901                               ShuffleMask.info128>,
1902                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1903                               ShuffleMask.info128>, EVEX_V128;
1904   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1905                               ShuffleMask.info256>,
1906                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1907                               ShuffleMask.info256>, EVEX_V256;
1908   }
1909 }
1910
1911 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1912                                   X86FoldableSchedWrite sched,
1913                                   AVX512VLVectorVTInfo VTInfo,
1914                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1915   let Predicates = [Prd] in
1916   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1917                            Idx.info512>, EVEX_V512;
1918   let Predicates = [Prd, HasVLX] in {
1919   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1920                                Idx.info128>, EVEX_V128;
1921   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1922                                Idx.info256>, EVEX_V256;
1923   }
1924 }
1925
1926 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1927                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1928 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1929                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1930 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1931                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1932                   VEX_W, EVEX_CD8<16, CD8VF>;
1933 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1934                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1935                   EVEX_CD8<8, CD8VF>;
1936 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1937                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1938 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1939                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1940
1941 //===----------------------------------------------------------------------===//
1942 // AVX-512 - BLEND using mask
1943 //
1944
1945 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1946                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1947   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1948   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1949              (ins _.RC:$src1, _.RC:$src2),
1950              !strconcat(OpcodeStr,
1951              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1952              EVEX_4V, Sched<[sched]>;
1953   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1954              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1955              !strconcat(OpcodeStr,
1956              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1957              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1958   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1959              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1960              !strconcat(OpcodeStr,
1961              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1962              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1963   let mayLoad = 1 in {
1964   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1965              (ins _.RC:$src1, _.MemOp:$src2),
1966              !strconcat(OpcodeStr,
1967              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1968              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1969              Sched<[sched.Folded, ReadAfterLd]>;
1970   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1971              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1972              !strconcat(OpcodeStr,
1973              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1974              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1975              Sched<[sched.Folded, ReadAfterLd]>;
1976   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1977              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1978              !strconcat(OpcodeStr,
1979              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1980              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1981              Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1982   }
1983   }
1984 }
1985 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1986                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1987   let mayLoad = 1, hasSideEffects = 0 in {
1988   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1989       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1990        !strconcat(OpcodeStr,
1991             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1992             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1993       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1994       Sched<[sched.Folded, ReadAfterLd]>;
1995
1996   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998        !strconcat(OpcodeStr,
1999             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2000             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002       Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2003
2004   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005       (ins _.RC:$src1, _.ScalarMemOp:$src2),
2006        !strconcat(OpcodeStr,
2007             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2008             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010       Sched<[sched.Folded, ReadAfterLd]>;
2011   }
2012 }
2013
2014 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2015                         AVX512VLVectorVTInfo VTInfo> {
2016   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2017            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2018                                  EVEX_V512;
2019
2020   let Predicates = [HasVLX] in {
2021     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2022                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2023                                       EVEX_V256;
2024     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2025                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2026                                       EVEX_V128;
2027   }
2028 }
2029
2030 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2031                         AVX512VLVectorVTInfo VTInfo> {
2032   let Predicates = [HasBWI] in
2033     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2034                                EVEX_V512;
2035
2036   let Predicates = [HasBWI, HasVLX] in {
2037     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2038                                   EVEX_V256;
2039     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2040                                   EVEX_V128;
2041   }
2042 }
2043
2044 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2045                               avx512vl_f32_info>;
2046 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2047                               avx512vl_f64_info>, VEX_W;
2048 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2049                               avx512vl_i32_info>;
2050 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2051                               avx512vl_i64_info>, VEX_W;
2052 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2053                               avx512vl_i8_info>;
2054 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2055                               avx512vl_i16_info>, VEX_W;
2056
2057 //===----------------------------------------------------------------------===//
2058 // Compare Instructions
2059 //===----------------------------------------------------------------------===//
2060
2061 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2062
2063 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2064                              X86FoldableSchedWrite sched> {
2065   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2066                       (outs _.KRC:$dst),
2067                       (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2068                       "vcmp${cc}"#_.Suffix,
2069                       "$src2, $src1", "$src1, $src2",
2070                       (OpNode (_.VT _.RC:$src1),
2071                               (_.VT _.RC:$src2),
2072                               imm:$cc)>, EVEX_4V, Sched<[sched]>;
2073   let mayLoad = 1 in
2074   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2075                     (outs _.KRC:$dst),
2076                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2077                     "vcmp${cc}"#_.Suffix,
2078                     "$src2, $src1", "$src1, $src2",
2079                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2080                         imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2081                     Sched<[sched.Folded, ReadAfterLd]>;
2082
2083   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2084                      (outs _.KRC:$dst),
2085                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2086                      "vcmp${cc}"#_.Suffix,
2087                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2088                      (OpNodeRnd (_.VT _.RC:$src1),
2089                                 (_.VT _.RC:$src2),
2090                                 imm:$cc,
2091                                 (i32 FROUND_NO_EXC))>,
2092                      EVEX_4V, EVEX_B, Sched<[sched]>;
2093   // Accept explicit immediate argument form instead of comparison code.
2094   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2095     defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2096                         (outs VK1:$dst),
2097                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098                         "vcmp"#_.Suffix,
2099                         "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2100                         Sched<[sched]>, NotMemoryFoldable;
2101   let mayLoad = 1 in
2102     defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2103                         (outs _.KRC:$dst),
2104                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2105                         "vcmp"#_.Suffix,
2106                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
2107                         EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2108                         Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2109
2110     defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2111                        (outs _.KRC:$dst),
2112                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2113                        "vcmp"#_.Suffix,
2114                        "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2115                        EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2116   }// let isAsmParserOnly = 1, hasSideEffects = 0
2117
2118   let isCodeGenOnly = 1 in {
2119     let isCommutable = 1 in
2120     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2121                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2122                 !strconcat("vcmp${cc}", _.Suffix,
2123                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2124                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2125                                           _.FRC:$src2,
2126                                           imm:$cc))]>,
2127                 EVEX_4V, Sched<[sched]>;
2128     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2129               (outs _.KRC:$dst),
2130               (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2131               !strconcat("vcmp${cc}", _.Suffix,
2132                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2133               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2134                                         (_.ScalarLdFrag addr:$src2),
2135                                         imm:$cc))]>,
2136               EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2137               Sched<[sched.Folded, ReadAfterLd]>;
2138   }
2139 }
2140
2141 let Predicates = [HasAVX512] in {
2142   let ExeDomain = SSEPackedSingle in
2143   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2144                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2145   let ExeDomain = SSEPackedDouble in
2146   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2147                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2148 }
2149
2150 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2151                               X86FoldableSchedWrite sched, X86VectorVTInfo _,
2152                               bit IsCommutable> {
2153   let isCommutable = IsCommutable in
2154   def rr : AVX512BI<opc, MRMSrcReg,
2155              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2156              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2157              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2158              EVEX_4V, Sched<[sched]>;
2159   def rm : AVX512BI<opc, MRMSrcMem,
2160              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2161              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2162              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2163                                        (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2164              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2165   let isCommutable = IsCommutable in
2166   def rrk : AVX512BI<opc, MRMSrcReg,
2167               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2168               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2169                           "$dst {${mask}}, $src1, $src2}"),
2170               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2171                                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2172               EVEX_4V, EVEX_K, Sched<[sched]>;
2173   def rmk : AVX512BI<opc, MRMSrcMem,
2174               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2175               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2176                           "$dst {${mask}}, $src1, $src2}"),
2177               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2178                                    (OpNode (_.VT _.RC:$src1),
2179                                        (_.VT (bitconvert
2180                                               (_.LdFrag addr:$src2))))))]>,
2181               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2182 }
2183
2184 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2185                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2186                                   bit IsCommutable> :
2187            avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2188   def rmb : AVX512BI<opc, MRMSrcMem,
2189               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2190               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2191                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2192               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2193                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2194               EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2195   def rmbk : AVX512BI<opc, MRMSrcMem,
2196                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2197                                        _.ScalarMemOp:$src2),
2198                !strconcat(OpcodeStr,
2199                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2200                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2201                [(set _.KRC:$dst, (and _.KRCWM:$mask,
2202                                       (OpNode (_.VT _.RC:$src1),
2203                                         (X86VBroadcast
2204                                           (_.ScalarLdFrag addr:$src2)))))]>,
2205                EVEX_4V, EVEX_K, EVEX_B,
2206                Sched<[sched.Folded, ReadAfterLd]>;
2207 }
2208
2209 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2210                                  X86SchedWriteWidths sched,
2211                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2212                                  bit IsCommutable = 0> {
2213   let Predicates = [prd] in
2214   defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2215                               VTInfo.info512, IsCommutable>, EVEX_V512;
2216
2217   let Predicates = [prd, HasVLX] in {
2218     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2219                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2220     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2221                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2222   }
2223 }
2224
2225 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2226                                      PatFrag OpNode, X86SchedWriteWidths sched,
2227                                      AVX512VLVectorVTInfo VTInfo,
2228                                      Predicate prd, bit IsCommutable = 0> {
2229   let Predicates = [prd] in
2230   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2231                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2232
2233   let Predicates = [prd, HasVLX] in {
2234     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2235                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2236     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2237                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2238   }
2239 }
2240
2241 // This fragment treats X86cmpm as commutable to help match loads in both
2242 // operands for PCMPEQ.
2243 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2244 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2245                            (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2246 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2247                          (setcc node:$src1, node:$src2, SETGT)>;
2248
2249 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2250 // increase the pattern complexity the way an immediate would.
2251 let AddedComplexity = 2 in {
2252 // FIXME: Is there a better scheduler class for VPCMP?
2253 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2254                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2255                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2256
2257 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2258                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2259                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2260
2261 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2262                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2263                 EVEX_CD8<32, CD8VF>;
2264
2265 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2266                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2267                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2268
2269 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2270                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2271                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2272
2273 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2274                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2275                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2276
2277 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2278                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2279                 EVEX_CD8<32, CD8VF>;
2280
2281 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2282                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2283                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2284 }
2285
2286 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2287                           PatFrag CommFrag, X86FoldableSchedWrite sched,
2288                           X86VectorVTInfo _, string Name> {
2289   let isCommutable = 1 in
2290   def rri : AVX512AIi8<opc, MRMSrcReg,
2291              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2292              !strconcat("vpcmp${cc}", Suffix,
2293                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2294              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2295                                                 (_.VT _.RC:$src2),
2296                                                 cond)))]>,
2297              EVEX_4V, Sched<[sched]>;
2298   def rmi : AVX512AIi8<opc, MRMSrcMem,
2299              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2300              !strconcat("vpcmp${cc}", Suffix,
2301                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2302              [(set _.KRC:$dst, (_.KVT
2303                                 (Frag:$cc
2304                                  (_.VT _.RC:$src1),
2305                                  (_.VT (bitconvert (_.LdFrag addr:$src2))),
2306                                  cond)))]>,
2307              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2308   let isCommutable = 1 in
2309   def rrik : AVX512AIi8<opc, MRMSrcReg,
2310               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2311                                       AVX512ICC:$cc),
2312               !strconcat("vpcmp${cc}", Suffix,
2313                          "\t{$src2, $src1, $dst {${mask}}|",
2314                          "$dst {${mask}}, $src1, $src2}"),
2315               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2316                                      (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2317                                                       (_.VT _.RC:$src2),
2318                                                       cond))))]>,
2319               EVEX_4V, EVEX_K, Sched<[sched]>;
2320   def rmik : AVX512AIi8<opc, MRMSrcMem,
2321               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2322                                     AVX512ICC:$cc),
2323               !strconcat("vpcmp${cc}", Suffix,
2324                          "\t{$src2, $src1, $dst {${mask}}|",
2325                          "$dst {${mask}}, $src1, $src2}"),
2326               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2327                                      (_.KVT
2328                                       (Frag:$cc
2329                                        (_.VT _.RC:$src1),
2330                                        (_.VT (bitconvert
2331                                               (_.LdFrag addr:$src2))),
2332                                        cond))))]>,
2333               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2334
2335   // Accept explicit immediate argument form instead of comparison code.
2336   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2337     def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2338                (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2339                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2340                           "$dst, $src1, $src2, $cc}"), []>,
2341                EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2342     let mayLoad = 1 in
2343     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2344                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2345                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2346                           "$dst, $src1, $src2, $cc}"), []>,
2347                EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2348     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2349                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2350                                        u8imm:$cc),
2351                !strconcat("vpcmp", Suffix,
2352                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2353                           "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2354                EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2355     let mayLoad = 1 in
2356     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2357                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2358                                        u8imm:$cc),
2359                !strconcat("vpcmp", Suffix,
2360                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2361                           "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2362                EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
2363                NotMemoryFoldable;
2364   }
2365
2366   def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2367                                  (_.VT _.RC:$src1), cond)),
2368             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2369              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2370
2371   def : Pat<(and _.KRCWM:$mask,
2372                  (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2373                                       (_.VT _.RC:$src1), cond))),
2374             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2375              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2376              (CommFrag.OperandTransform $cc))>;
2377 }
2378
2379 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2380                               PatFrag CommFrag, X86FoldableSchedWrite sched,
2381                               X86VectorVTInfo _, string Name> :
2382            avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2383   def rmib : AVX512AIi8<opc, MRMSrcMem,
2384              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2385                                      AVX512ICC:$cc),
2386              !strconcat("vpcmp${cc}", Suffix,
2387                         "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2388                         "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2389              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2390                                        (_.VT _.RC:$src1),
2391                                        (X86VBroadcast
2392                                         (_.ScalarLdFrag addr:$src2)),
2393                                        cond)))]>,
2394              EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2395   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2396               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2397                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
2398               !strconcat("vpcmp${cc}", Suffix,
2399                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2400                        "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2401               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2402                                      (_.KVT (Frag:$cc
2403                                              (_.VT _.RC:$src1),
2404                                              (X86VBroadcast
2405                                               (_.ScalarLdFrag addr:$src2)),
2406                                              cond))))]>,
2407               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2408
2409   // Accept explicit immediate argument form instead of comparison code.
2410   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2411     def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2412                (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2413                                        u8imm:$cc),
2414                !strconcat("vpcmp", Suffix,
2415                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2416                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2417                EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2418                NotMemoryFoldable;
2419     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2420                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2421                                        _.ScalarMemOp:$src2, u8imm:$cc),
2422                !strconcat("vpcmp", Suffix,
2423                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2424                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2425                EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2426                NotMemoryFoldable;
2427   }
2428
2429   def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2430                     (_.VT _.RC:$src1), cond)),
2431             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2432              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2433
2434   def : Pat<(and _.KRCWM:$mask,
2435                  (_.KVT (CommFrag:$cc (X86VBroadcast
2436                                        (_.ScalarLdFrag addr:$src2)),
2437                                       (_.VT _.RC:$src1), cond))),
2438             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2439              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2440              (CommFrag.OperandTransform $cc))>;
2441 }
2442
2443 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2444                              PatFrag CommFrag, X86SchedWriteWidths sched,
2445                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2446   let Predicates = [prd] in
2447   defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2448                           VTInfo.info512, NAME>, EVEX_V512;
2449
2450   let Predicates = [prd, HasVLX] in {
2451     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2452                                VTInfo.info256, NAME>, EVEX_V256;
2453     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2454                                VTInfo.info128, NAME>, EVEX_V128;
2455   }
2456 }
2457
2458 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2459                                  PatFrag CommFrag, X86SchedWriteWidths sched,
2460                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2461   let Predicates = [prd] in
2462   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2463                               VTInfo.info512, NAME>, EVEX_V512;
2464
2465   let Predicates = [prd, HasVLX] in {
2466     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2467                                     VTInfo.info256, NAME>, EVEX_V256;
2468     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2469                                    VTInfo.info128, NAME>, EVEX_V128;
2470   }
2471 }
2472
2473 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2474   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2475   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2476   return getI8Imm(SSECC, SDLoc(N));
2477 }]>;
2478
2479 // Swapped operand version of the above.
2480 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2481   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2482   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2483   SSECC = X86::getSwappedVPCMPImm(SSECC);
2484   return getI8Imm(SSECC, SDLoc(N));
2485 }]>;
2486
2487 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2488                        (setcc node:$src1, node:$src2, node:$cc), [{
2489   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2490   return !ISD::isUnsignedIntSetCC(CC);
2491 }], X86pcmpm_imm>;
2492
2493 // Same as above, but commutes immediate. Use for load folding.
2494 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2495                                (setcc node:$src1, node:$src2, node:$cc), [{
2496   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2497   return !ISD::isUnsignedIntSetCC(CC);
2498 }], X86pcmpm_imm_commute>;
2499
2500 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2501                         (setcc node:$src1, node:$src2, node:$cc), [{
2502   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2503   return ISD::isUnsignedIntSetCC(CC);
2504 }], X86pcmpm_imm>;
2505
2506 // Same as above, but commutes immediate. Use for load folding.
2507 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2508                                 (setcc node:$src1, node:$src2, node:$cc), [{
2509   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2510   return ISD::isUnsignedIntSetCC(CC);
2511 }], X86pcmpm_imm_commute>;
2512
2513 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2514 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2515                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2516                                 EVEX_CD8<8, CD8VF>;
2517 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2518                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2519                                  EVEX_CD8<8, CD8VF>;
2520
2521 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2522                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2523                                 VEX_W, EVEX_CD8<16, CD8VF>;
2524 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2525                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2526                                  VEX_W, EVEX_CD8<16, CD8VF>;
2527
2528 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2529                                     SchedWriteVecALU, avx512vl_i32_info,
2530                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2531 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2532                                      SchedWriteVecALU, avx512vl_i32_info,
2533                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2534
2535 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2536                                     SchedWriteVecALU, avx512vl_i64_info,
2537                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2538 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2539                                      SchedWriteVecALU, avx512vl_i64_info,
2540                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2541
2542 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2543                               string Name> {
2544   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2545                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2546                    "vcmp${cc}"#_.Suffix,
2547                    "$src2, $src1", "$src1, $src2",
2548                    (X86cmpm (_.VT _.RC:$src1),
2549                          (_.VT _.RC:$src2),
2550                            imm:$cc), 1>,
2551                    Sched<[sched]>;
2552
2553   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2554                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2555                 "vcmp${cc}"#_.Suffix,
2556                 "$src2, $src1", "$src1, $src2",
2557                 (X86cmpm (_.VT _.RC:$src1),
2558                         (_.VT (bitconvert (_.LdFrag addr:$src2))),
2559                         imm:$cc)>,
2560                 Sched<[sched.Folded, ReadAfterLd]>;
2561
2562   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2563                 (outs _.KRC:$dst),
2564                 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2565                 "vcmp${cc}"#_.Suffix,
2566                 "${src2}"##_.BroadcastStr##", $src1",
2567                 "$src1, ${src2}"##_.BroadcastStr,
2568                 (X86cmpm (_.VT _.RC:$src1),
2569                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2570                         imm:$cc)>,
2571                 EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2572   // Accept explicit immediate argument form instead of comparison code.
2573   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2574     defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2575                          (outs _.KRC:$dst),
2576                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2577                          "vcmp"#_.Suffix,
2578                          "$cc, $src2, $src1", "$src1, $src2, $cc">,
2579                          Sched<[sched]>, NotMemoryFoldable;
2580
2581     let mayLoad = 1 in {
2582       defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2583                              (outs _.KRC:$dst),
2584                              (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2585                              "vcmp"#_.Suffix,
2586                              "$cc, $src2, $src1", "$src1, $src2, $cc">,
2587                              Sched<[sched.Folded, ReadAfterLd]>,
2588                              NotMemoryFoldable;
2589
2590       defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2591                          (outs _.KRC:$dst),
2592                          (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2593                          "vcmp"#_.Suffix,
2594                          "$cc, ${src2}"##_.BroadcastStr##", $src1",
2595                          "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2596                          EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2597                          NotMemoryFoldable;
2598     }
2599   }
2600
2601   // Patterns for selecting with loads in other operand.
2602   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2603                      CommutableCMPCC:$cc),
2604             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2605                                                       imm:$cc)>;
2606
2607   def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2608                                          (_.VT _.RC:$src1),
2609                                          CommutableCMPCC:$cc)),
2610             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2611                                                        _.RC:$src1, addr:$src2,
2612                                                        imm:$cc)>;
2613
2614   def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2615                      (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2616             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2617                                                        imm:$cc)>;
2618
2619   def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2620                                           (_.ScalarLdFrag addr:$src2)),
2621                                          (_.VT _.RC:$src1),
2622                                          CommutableCMPCC:$cc)),
2623             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2624                                                         _.RC:$src1, addr:$src2,
2625                                                         imm:$cc)>;
2626 }
2627
2628 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2629   // comparison code form (VCMP[EQ/LT/LE/...]
2630   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2631                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2632                      "vcmp${cc}"#_.Suffix,
2633                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2634                      (X86cmpmRnd (_.VT _.RC:$src1),
2635                                     (_.VT _.RC:$src2),
2636                                     imm:$cc,
2637                                 (i32 FROUND_NO_EXC))>,
2638                      EVEX_B, Sched<[sched]>;
2639
2640   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2641     defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2642                          (outs _.KRC:$dst),
2643                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2644                          "vcmp"#_.Suffix,
2645                          "$cc, {sae}, $src2, $src1",
2646                          "$src1, $src2, {sae}, $cc">,
2647                          EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2648    }
2649 }
2650
2651 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2652   let Predicates = [HasAVX512] in {
2653     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2654                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2655
2656   }
2657   let Predicates = [HasAVX512,HasVLX] in {
2658    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2659    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2660   }
2661 }
2662
2663 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2664                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2665 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2666                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2667
2668 // Patterns to select fp compares with load as first operand.
2669 let Predicates = [HasAVX512] in {
2670   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2671                             CommutableCMPCC:$cc)),
2672             (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2673
2674   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2675                             CommutableCMPCC:$cc)),
2676             (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2677 }
2678
2679 // ----------------------------------------------------------------
2680 // FPClass
2681 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2682 //                                    op(mem_scalar,imm)
2683 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2684                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2685                                  Predicate prd> {
2686   let Predicates = [prd], ExeDomain = _.ExeDomain in {
2687       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2688                       (ins _.RC:$src1, i32u8imm:$src2),
2689                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2691                               (i32 imm:$src2)))]>,
2692                       Sched<[sched]>;
2693       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2695                       OpcodeStr##_.Suffix#
2696                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2697                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2698                                       (OpNode (_.VT _.RC:$src1),
2699                                       (i32 imm:$src2))))]>,
2700                       EVEX_K, Sched<[sched]>;
2701     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2702                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2703                     OpcodeStr##_.Suffix##
2704                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705                     [(set _.KRC:$dst,
2706                           (OpNode _.ScalarIntMemCPat:$src1,
2707                                   (i32 imm:$src2)))]>,
2708                     Sched<[sched.Folded, ReadAfterLd]>;
2709     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2711                     OpcodeStr##_.Suffix##
2712                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2713                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2714                         (OpNode _.ScalarIntMemCPat:$src1,
2715                             (i32 imm:$src2))))]>,
2716                     EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2717   }
2718 }
2719
2720 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2721 //                                  fpclass(reg_vec, mem_vec, imm)
2722 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2723 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2724                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2725                                  string mem, string broadcast>{
2726   let ExeDomain = _.ExeDomain in {
2727   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2728                       (ins _.RC:$src1, i32u8imm:$src2),
2729                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2730                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2731                                        (i32 imm:$src2)))]>,
2732                       Sched<[sched]>;
2733   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2734                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2735                       OpcodeStr##_.Suffix#
2736                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2737                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2738                                        (OpNode (_.VT _.RC:$src1),
2739                                        (i32 imm:$src2))))]>,
2740                       EVEX_K, Sched<[sched]>;
2741   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2742                     (ins _.MemOp:$src1, i32u8imm:$src2),
2743                     OpcodeStr##_.Suffix##mem#
2744                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2745                     [(set _.KRC:$dst,(OpNode
2746                                      (_.VT (bitconvert (_.LdFrag addr:$src1))),
2747                                      (i32 imm:$src2)))]>,
2748                     Sched<[sched.Folded, ReadAfterLd]>;
2749   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2750                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2751                     OpcodeStr##_.Suffix##mem#
2752                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2753                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2754                                   (_.VT (bitconvert (_.LdFrag addr:$src1))),
2755                                   (i32 imm:$src2))))]>,
2756                     EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2757   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2758                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2759                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2760                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2761                                                   ##_.BroadcastStr##", $src2}",
2762                     [(set _.KRC:$dst,(OpNode
2763                                      (_.VT (X86VBroadcast
2764                                            (_.ScalarLdFrag addr:$src1))),
2765                                      (i32 imm:$src2)))]>,
2766                     EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2767   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2768                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2769                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2770                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2771                                                    _.BroadcastStr##", $src2}",
2772                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2773                                      (_.VT (X86VBroadcast
2774                                            (_.ScalarLdFrag addr:$src1))),
2775                                      (i32 imm:$src2))))]>,
2776                     EVEX_B, EVEX_K,  Sched<[sched.Folded, ReadAfterLd]>;
2777   }
2778 }
2779
2780 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2781                                      bits<8> opc, SDNode OpNode,
2782                                      X86SchedWriteWidths sched, Predicate prd,
2783                                      string broadcast>{
2784   let Predicates = [prd] in {
2785     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2786                                       _.info512, "{z}", broadcast>, EVEX_V512;
2787   }
2788   let Predicates = [prd, HasVLX] in {
2789     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2790                                       _.info128, "{x}", broadcast>, EVEX_V128;
2791     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2792                                       _.info256, "{y}", broadcast>, EVEX_V256;
2793   }
2794 }
2795
2796 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2797                                  bits<8> opcScalar, SDNode VecOpNode,
2798                                  SDNode ScalarOpNode, X86SchedWriteWidths sched,
2799                                  Predicate prd> {
2800   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2801                                       VecOpNode, sched, prd, "{l}">,
2802                                       EVEX_CD8<32, CD8VF>;
2803   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2804                                       VecOpNode, sched, prd, "{q}">,
2805                                       EVEX_CD8<64, CD8VF> , VEX_W;
2806   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2807                                    sched.Scl, f32x_info, prd>,
2808                                    EVEX_CD8<32, CD8VT1>;
2809   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2810                                    sched.Scl, f64x_info, prd>,
2811                                    EVEX_CD8<64, CD8VT1>, VEX_W;
2812 }
2813
2814 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2815                                       X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2816                                       AVX512AIi8Base, EVEX;
2817
2818 //-----------------------------------------------------------------
2819 // Mask register copy, including
2820 // - copy between mask registers
2821 // - load/store mask registers
2822 // - copy from GPR to mask register and vice versa
2823 //
2824 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2825                          string OpcodeStr, RegisterClass KRC,
2826                          ValueType vvt, X86MemOperand x86memop> {
2827   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2828   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2829              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2830              Sched<[WriteMove]>;
2831   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2832              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2833              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2834              Sched<[WriteLoad]>;
2835   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2836              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2837              [(store KRC:$src, addr:$dst)]>,
2838              Sched<[WriteStore]>;
2839 }
2840
2841 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2842                              string OpcodeStr,
2843                              RegisterClass KRC, RegisterClass GRC> {
2844   let hasSideEffects = 0 in {
2845     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2846                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2847                Sched<[WriteMove]>;
2848     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2849                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2850                Sched<[WriteMove]>;
2851   }
2852 }
2853
2854 let Predicates = [HasDQI] in
2855   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2856                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2857                VEX, PD;
2858
2859 let Predicates = [HasAVX512] in
2860   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2861                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2862                VEX, PS;
2863
2864 let Predicates = [HasBWI] in {
2865   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2866                VEX, PD, VEX_W;
2867   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2868                VEX, XD;
2869   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2870                VEX, PS, VEX_W;
2871   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2872                VEX, XD, VEX_W;
2873 }
2874
2875 // GR from/to mask register
2876 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2877           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2878 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2879           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2880
2881 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2882           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2883 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2884           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2885
2886 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2887           (KMOVWrk VK16:$src)>;
2888 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2889           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2890
2891 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2892           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2893 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2894           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2895
2896 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2897           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2898 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2899           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2900 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2901           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2902 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2903           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2904
2905 // Load/store kreg
2906 let Predicates = [HasDQI] in {
2907   def : Pat<(store VK1:$src, addr:$dst),
2908             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2909
2910   def : Pat<(v1i1 (load addr:$src)),
2911             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2912   def : Pat<(v2i1 (load addr:$src)),
2913             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2914   def : Pat<(v4i1 (load addr:$src)),
2915             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2916 }
2917
2918 let Predicates = [HasAVX512] in {
2919   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2920             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2921 }
2922
2923 let Predicates = [HasAVX512] in {
2924   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2925     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2926               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2927
2928     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2929               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2930   }
2931
2932   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2933   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2934   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2935   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2936   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2937   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2938   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2939
2940   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2941                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2942             (COPY_TO_REGCLASS
2943              (KMOVWkr (AND32ri8
2944                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2945                        (i32 1))), VK16)>;
2946 }
2947
2948 // Mask unary operation
2949 // - KNOT
2950 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2951                             RegisterClass KRC, SDPatternOperator OpNode,
2952                             X86FoldableSchedWrite sched, Predicate prd> {
2953   let Predicates = [prd] in
2954     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2955                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2956                [(set KRC:$dst, (OpNode KRC:$src))]>,
2957                Sched<[sched]>;
2958 }
2959
2960 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2961                                 SDPatternOperator OpNode,
2962                                 X86FoldableSchedWrite sched> {
2963   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2964                             sched, HasDQI>, VEX, PD;
2965   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2966                             sched, HasAVX512>, VEX, PS;
2967   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2968                             sched, HasBWI>, VEX, PD, VEX_W;
2969   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2970                             sched, HasBWI>, VEX, PS, VEX_W;
2971 }
2972
2973 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2974 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2975
2976 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2977 let Predicates = [HasAVX512, NoDQI] in
2978 def : Pat<(vnot VK8:$src),
2979           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2980
2981 def : Pat<(vnot VK4:$src),
2982           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2983 def : Pat<(vnot VK2:$src),
2984           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2985
2986 // Mask binary operation
2987 // - KAND, KANDN, KOR, KXNOR, KXOR
2988 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2989                            RegisterClass KRC, SDPatternOperator OpNode,
2990                            X86FoldableSchedWrite sched, Predicate prd,
2991                            bit IsCommutable> {
2992   let Predicates = [prd], isCommutable = IsCommutable in
2993     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2994                !strconcat(OpcodeStr,
2995                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2996                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2997                Sched<[sched]>;
2998 }
2999
3000 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3001                                  SDPatternOperator OpNode,
3002                                  X86FoldableSchedWrite sched, bit IsCommutable,
3003                                  Predicate prdW = HasAVX512> {
3004   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3005                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3006   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3007                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3008   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3009                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3010   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3011                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3012 }
3013
3014 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3015 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3016 // These nodes use 'vnot' instead of 'not' to support vectors.
3017 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3018 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3019
3020 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3021 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3022 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3023 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3024 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3025 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3026 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3027
3028 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3029                             Instruction Inst> {
3030   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3031   // for the DQI set, this type is legal and KxxxB instruction is used
3032   let Predicates = [NoDQI] in
3033   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3034             (COPY_TO_REGCLASS
3035               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3036                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3037
3038   // All types smaller than 8 bits require conversion anyway
3039   def : Pat<(OpNode VK1:$src1, VK1:$src2),
3040         (COPY_TO_REGCLASS (Inst
3041                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3042                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3043   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3044         (COPY_TO_REGCLASS (Inst
3045                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3046                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3047   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3048         (COPY_TO_REGCLASS (Inst
3049                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3050                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3051 }
3052
3053 defm : avx512_binop_pat<and,   and,  KANDWrr>;
3054 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3055 defm : avx512_binop_pat<or,    or,   KORWrr>;
3056 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3057 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3058
3059 // Mask unpacking
3060 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3061                              RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3062                              Predicate prd> {
3063   let Predicates = [prd] in {
3064     let hasSideEffects = 0 in
3065     def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3066                (ins KRC:$src1, KRC:$src2),
3067                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3068                VEX_4V, VEX_L, Sched<[sched]>;
3069
3070     def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3071               (!cast<Instruction>(NAME##rr)
3072                         (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3073                         (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3074   }
3075 }
3076
3077 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3078 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3079 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3080
3081 // Mask bit testing
3082 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3083                               SDNode OpNode, X86FoldableSchedWrite sched,
3084                               Predicate prd> {
3085   let Predicates = [prd], Defs = [EFLAGS] in
3086     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3087                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3088                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3089                Sched<[sched]>;
3090 }
3091
3092 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3093                                 X86FoldableSchedWrite sched,
3094                                 Predicate prdW = HasAVX512> {
3095   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3096                                                                 VEX, PD;
3097   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3098                                                                 VEX, PS;
3099   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3100                                                                 VEX, PS, VEX_W;
3101   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3102                                                                 VEX, PD, VEX_W;
3103 }
3104
3105 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3106 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3107 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3108
3109 // Mask shift
3110 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3111                                SDNode OpNode, X86FoldableSchedWrite sched> {
3112   let Predicates = [HasAVX512] in
3113     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3114                  !strconcat(OpcodeStr,
3115                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3116                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3117                  Sched<[sched]>;
3118 }
3119
3120 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3121                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3122   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3123                                sched>, VEX, TAPD, VEX_W;
3124   let Predicates = [HasDQI] in
3125   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3126                                sched>, VEX, TAPD;
3127   let Predicates = [HasBWI] in {
3128   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3129                                sched>, VEX, TAPD, VEX_W;
3130   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3131                                sched>, VEX, TAPD;
3132   }
3133 }
3134
3135 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3136 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3137
3138 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3139 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3140                                               X86VectorVTInfo Narrow,
3141                                               X86VectorVTInfo Wide> {
3142   def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3143                               (Narrow.VT Narrow.RC:$src2))),
3144           (COPY_TO_REGCLASS
3145            (!cast<Instruction>(InstStr#"Zrr")
3146             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3147             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3148            Narrow.KRC)>;
3149
3150   def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3151                              (Frag (Narrow.VT Narrow.RC:$src1),
3152                                    (Narrow.VT Narrow.RC:$src2)))),
3153           (COPY_TO_REGCLASS
3154            (!cast<Instruction>(InstStr#"Zrrk")
3155             (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3156             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3157             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3158            Narrow.KRC)>;
3159 }
3160
3161 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3162 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3163                                                  string InstStr,
3164                                                  X86VectorVTInfo Narrow,
3165                                                  X86VectorVTInfo Wide> {
3166 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3167                                 (Narrow.VT Narrow.RC:$src2), cond)),
3168           (COPY_TO_REGCLASS
3169            (!cast<Instruction>(InstStr##Zrri)
3170             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3171             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3172             (Frag.OperandTransform $cc)), Narrow.KRC)>;
3173
3174 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3175                            (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3176                                                  (Narrow.VT Narrow.RC:$src2),
3177                                                  cond)))),
3178           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3179            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3180            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3181            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3182            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3183 }
3184
3185 // Same as above, but for fp types which don't use PatFrags.
3186 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3187                                                 X86VectorVTInfo Narrow,
3188                                                 X86VectorVTInfo Wide> {
3189 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3190                               (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3191           (COPY_TO_REGCLASS
3192            (!cast<Instruction>(InstStr##Zrri)
3193             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3194             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3195             imm:$cc), Narrow.KRC)>;
3196
3197 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3198                            (OpNode (Narrow.VT Narrow.RC:$src1),
3199                                    (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3200           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3201            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3202            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3203            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3204            imm:$cc), Narrow.KRC)>;
3205 }
3206
3207 let Predicates = [HasAVX512, NoVLX] in {
3208   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3209   // increase the pattern complexity the way an immediate would.
3210   let AddedComplexity = 2 in {
3211   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3212   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3213
3214   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3215   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3216
3217   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3218   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3219
3220   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3221   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3222   }
3223
3224   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3225   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3226
3227   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3228   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3229
3230   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3231   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3232
3233   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3234   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3235
3236   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3237   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3238   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3239   defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3240 }
3241
3242 let Predicates = [HasBWI, NoVLX] in {
3243   // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3244   // increase the pattern complexity the way an immediate would.
3245   let AddedComplexity = 2 in {
3246   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3247   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3248
3249   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3250   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3251
3252   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3253   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3254
3255   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3256   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3257   }
3258
3259   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3260   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3261
3262   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3263   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3264
3265   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3266   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3267
3268   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3269   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3270 }
3271
3272 // Mask setting all 0s or 1s
3273 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3274   let Predicates = [HasAVX512] in
3275     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3276         SchedRW = [WriteZero] in
3277       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3278                      [(set KRC:$dst, (VT Val))]>;
3279 }
3280
3281 multiclass avx512_mask_setop_w<PatFrag Val> {
3282   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3283   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3284   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3285 }
3286
3287 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3288 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3289
3290 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3291 let Predicates = [HasAVX512] in {
3292   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3293   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3294   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3295   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3296   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3297   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3298   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3299   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3300 }
3301
3302 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3303 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3304                                              RegisterClass RC, ValueType VT> {
3305   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3306             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3307
3308   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3309             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3310 }
3311 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3312 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3313 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3314 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3315 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3316 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3317
3318 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3319 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3320 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3321 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3322 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3323
3324 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3325 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3326 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3327 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3328
3329 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3330 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3331 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3332
3333 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3334 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3335
3336 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3337
3338 //===----------------------------------------------------------------------===//
3339 // AVX-512 - Aligned and unaligned load and store
3340 //
3341
3342 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3343                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3344                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3345                        bit NoRMPattern = 0,
3346                        SDPatternOperator SelectOprr = vselect> {
3347   let hasSideEffects = 0 in {
3348   let isMoveReg = 1 in
3349   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3350                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3351                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3352                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3353   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3354                       (ins _.KRCWM:$mask,  _.RC:$src),
3355                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3356                        "${dst} {${mask}} {z}, $src}"),
3357                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3358                                            (_.VT _.RC:$src),
3359                                            _.ImmAllZerosV)))], _.ExeDomain>,
3360                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3361
3362   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3363   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3364                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3365                     !if(NoRMPattern, [],
3366                         [(set _.RC:$dst,
3367                           (_.VT (bitconvert (ld_frag addr:$src))))]),
3368                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3369                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3370
3371   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3372     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3373                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3374                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3375                       "${dst} {${mask}}, $src1}"),
3376                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3377                                           (_.VT _.RC:$src1),
3378                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3379                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3380     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3381                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3382                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3383                       "${dst} {${mask}}, $src1}"),
3384                      [(set _.RC:$dst, (_.VT
3385                          (vselect _.KRCWM:$mask,
3386                           (_.VT (bitconvert (ld_frag addr:$src1))),
3387                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3388                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3389   }
3390   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3391                   (ins _.KRCWM:$mask, _.MemOp:$src),
3392                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3393                                 "${dst} {${mask}} {z}, $src}",
3394                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3395                     (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3396                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3397   }
3398   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3399             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3400
3401   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3402             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3403
3404   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3405             (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3406              _.KRCWM:$mask, addr:$ptr)>;
3407 }
3408
3409 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3410                                  AVX512VLVectorVTInfo _, Predicate prd,
3411                                  X86SchedWriteMoveLSWidths Sched,
3412                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3413   let Predicates = [prd] in
3414   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3415                        _.info512.AlignedLdFrag, masked_load_aligned512,
3416                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3417
3418   let Predicates = [prd, HasVLX] in {
3419   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3420                           _.info256.AlignedLdFrag, masked_load_aligned256,
3421                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3422   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3423                           _.info128.AlignedLdFrag, masked_load_aligned128,
3424                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3425   }
3426 }
3427
3428 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3429                           AVX512VLVectorVTInfo _, Predicate prd,
3430                           X86SchedWriteMoveLSWidths Sched,
3431                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3432                           SDPatternOperator SelectOprr = vselect> {
3433   let Predicates = [prd] in
3434   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3435                        masked_load_unaligned, Sched.ZMM, "",
3436                        NoRMPattern, SelectOprr>, EVEX_V512;
3437
3438   let Predicates = [prd, HasVLX] in {
3439   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3440                          masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3441                          NoRMPattern, SelectOprr>, EVEX_V256;
3442   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3443                          masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3444                          NoRMPattern, SelectOprr>, EVEX_V128;
3445   }
3446 }
3447
3448 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3449                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3450                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3451                         bit NoMRPattern = 0> {
3452   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3453   let isMoveReg = 1 in
3454   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3455                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3456                          [], _.ExeDomain>, EVEX,
3457                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3458                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3459   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3460                          (ins _.KRCWM:$mask, _.RC:$src),
3461                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3462                          "${dst} {${mask}}, $src}",
3463                          [], _.ExeDomain>,  EVEX, EVEX_K,
3464                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3465                          Sched<[Sched.RR]>;
3466   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3467                           (ins _.KRCWM:$mask, _.RC:$src),
3468                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3469                           "${dst} {${mask}} {z}, $src}",
3470                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3471                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3472                           Sched<[Sched.RR]>;
3473   }
3474
3475   let hasSideEffects = 0, mayStore = 1 in
3476   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3477                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3478                     !if(NoMRPattern, [],
3479                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3480                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3481                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3482   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3483                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3484               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3485                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3486                NotMemoryFoldable;
3487
3488   def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3489            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3490                                                         _.KRCWM:$mask, _.RC:$src)>;
3491
3492   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3493                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3494                    _.RC:$dst, _.RC:$src), 0>;
3495   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3496                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3497                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3498   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3499                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3500                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3501 }
3502
3503 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3504                             AVX512VLVectorVTInfo _, Predicate prd,
3505                             X86SchedWriteMoveLSWidths Sched,
3506                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3507   let Predicates = [prd] in
3508   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3509                         masked_store_unaligned, Sched.ZMM, "",
3510                         NoMRPattern>, EVEX_V512;
3511   let Predicates = [prd, HasVLX] in {
3512     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3513                              masked_store_unaligned, Sched.YMM,
3514                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3515     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3516                              masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3517                              NoMRPattern>, EVEX_V128;
3518   }
3519 }
3520
3521 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3522                                   AVX512VLVectorVTInfo _, Predicate prd,
3523                                   X86SchedWriteMoveLSWidths Sched,
3524                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3525   let Predicates = [prd] in
3526   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3527                         masked_store_aligned512, Sched.ZMM, "",
3528                         NoMRPattern>, EVEX_V512;
3529
3530   let Predicates = [prd, HasVLX] in {
3531     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3532                              masked_store_aligned256, Sched.YMM,
3533                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3534     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3535                              masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3536                              NoMRPattern>, EVEX_V128;
3537   }
3538 }
3539
3540 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3541                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3542                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3543                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3544                PS, EVEX_CD8<32, CD8VF>;
3545
3546 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3547                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3548                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3549                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3550                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3551
3552 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3553                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3554                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3555                                SchedWriteFMoveLS, "VMOVUPS">,
3556                                PS, EVEX_CD8<32, CD8VF>;
3557
3558 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3559                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3560                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3561                                SchedWriteFMoveLS, "VMOVUPD">,
3562                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3563
3564 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3565                                        HasAVX512, SchedWriteVecMoveLS,
3566                                        "VMOVDQA", 1>,
3567                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3568                                         HasAVX512, SchedWriteVecMoveLS,
3569                                         "VMOVDQA", 1>,
3570                  PD, EVEX_CD8<32, CD8VF>;
3571
3572 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3573                                        HasAVX512, SchedWriteVecMoveLS,
3574                                        "VMOVDQA">,
3575                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3576                                         HasAVX512, SchedWriteVecMoveLS,
3577                                         "VMOVDQA">,
3578                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3579
3580 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3581                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3582                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3583                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3584                 XD, EVEX_CD8<8, CD8VF>;
3585
3586 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3587                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3588                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3589                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3590                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3591
3592 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3593                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3594                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3595                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3596                  XS, EVEX_CD8<32, CD8VF>;
3597
3598 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3599                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3600                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3601                                  SchedWriteVecMoveLS, "VMOVDQU">,
3602                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3603
3604 // Special instructions to help with spilling when we don't have VLX. We need
3605 // to load or store from a ZMM register instead. These are converted in
3606 // expandPostRAPseudos.
3607 let isReMaterializable = 1, canFoldAsLoad = 1,
3608     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3609 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3610                             "", []>, Sched<[WriteFLoadX]>;
3611 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3612                             "", []>, Sched<[WriteFLoadY]>;
3613 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3614                             "", []>, Sched<[WriteFLoadX]>;
3615 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3616                             "", []>, Sched<[WriteFLoadY]>;
3617 }
3618
3619 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3620 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3621                             "", []>, Sched<[WriteFStoreX]>;
3622 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3623                             "", []>, Sched<[WriteFStoreY]>;
3624 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3625                             "", []>, Sched<[WriteFStoreX]>;
3626 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3627                             "", []>, Sched<[WriteFStoreY]>;
3628 }
3629
3630 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3631                           (v8i64 VR512:$src))),
3632    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3633                                               VK8), VR512:$src)>;
3634
3635 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3636                            (v16i32 VR512:$src))),
3637                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3638
3639 // These patterns exist to prevent the above patterns from introducing a second
3640 // mask inversion when one already exists.
3641 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3642                           (bc_v8i64 (v16i32 immAllZerosV)),
3643                           (v8i64 VR512:$src))),
3644                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3645 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3646                            (v16i32 immAllZerosV),
3647                            (v16i32 VR512:$src))),
3648                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3649
3650 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3651                               X86VectorVTInfo Wide> {
3652  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3653                                Narrow.RC:$src1, Narrow.RC:$src0)),
3654            (EXTRACT_SUBREG
3655             (Wide.VT
3656              (!cast<Instruction>(InstrStr#"rrk")
3657               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3658               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3659               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3660             Narrow.SubRegIdx)>;
3661
3662  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3663                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3664            (EXTRACT_SUBREG
3665             (Wide.VT
3666              (!cast<Instruction>(InstrStr#"rrkz")
3667               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3668               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3669             Narrow.SubRegIdx)>;
3670 }
3671
3672 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3673 // available. Use a 512-bit operation and extract.
3674 let Predicates = [HasAVX512, NoVLX] in {
3675   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3676   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3677   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3678   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3679
3680   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3681   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3682   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3683   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3684 }
3685
3686 let Predicates = [HasBWI, NoVLX] in {
3687   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3688   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3689
3690   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3691   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3692 }
3693
3694 let Predicates = [HasAVX512] in {
3695   // 512-bit store.
3696   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3697             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3698   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3699             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3700   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3701             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3702   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3703             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3704   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3705             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3706   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3707             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3708 }
3709
3710 let Predicates = [HasVLX] in {
3711   // 128-bit store.
3712   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3713             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3714   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3715             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3716   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3717             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3718   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3719             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3720   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3721             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3722   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3723             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3724
3725   // 256-bit store.
3726   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3727             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3728   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3729             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3730   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3731             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3732   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3733             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3734   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3735             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3736   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3737             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3738 }
3739
3740 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3741                                    X86VectorVTInfo To, X86VectorVTInfo Cast> {
3742   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3743                               (bitconvert
3744                                (To.VT (extract_subvector
3745                                        (From.VT From.RC:$src), (iPTR 0)))),
3746                               To.RC:$src0)),
3747             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3748                       Cast.RC:$src0, Cast.KRCWM:$mask,
3749                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3750
3751   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3752                               (bitconvert
3753                                (To.VT (extract_subvector
3754                                        (From.VT From.RC:$src), (iPTR 0)))),
3755                               Cast.ImmAllZerosV)),
3756             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3757                       Cast.KRCWM:$mask,
3758                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3759 }
3760
3761
3762 let Predicates = [HasVLX] in {
3763 // A masked extract from the first 128-bits of a 256-bit vector can be
3764 // implemented with masked move.
3765 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
3766 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
3767 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3768 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
3769 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
3770 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
3771 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3772 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
3773 defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
3774 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
3775 defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
3776 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
3777
3778 // A masked extract from the first 128-bits of a 512-bit vector can be
3779 // implemented with masked move.
3780 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
3781 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3782 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3783 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
3784 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
3785 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3786 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3787 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
3788 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
3789 defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
3790 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
3791 defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
3792
3793 // A masked extract from the first 256-bits of a 512-bit vector can be
3794 // implemented with masked move.
3795 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
3796 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
3797 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3798 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
3799 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
3800 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
3801 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3802 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
3803 defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
3804 defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
3805 defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
3806 defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
3807 }
3808
3809 // Move Int Doubleword to Packed Double Int
3810 //
3811 let ExeDomain = SSEPackedInt in {
3812 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3813                       "vmovd\t{$src, $dst|$dst, $src}",
3814                       [(set VR128X:$dst,
3815                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3816                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3817 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3818                       "vmovd\t{$src, $dst|$dst, $src}",
3819                       [(set VR128X:$dst,
3820                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3821                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3822 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3823                       "vmovq\t{$src, $dst|$dst, $src}",
3824                         [(set VR128X:$dst,
3825                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3826                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3827 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3828 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3829                       (ins i64mem:$src),
3830                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3831                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3832 let isCodeGenOnly = 1 in {
3833 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3834                        "vmovq\t{$src, $dst|$dst, $src}",
3835                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3836                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3838                       "vmovq\t{$src, $dst|$dst, $src}",
3839                       [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3840                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3841 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3842                          "vmovq\t{$src, $dst|$dst, $src}",
3843                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3844                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3845 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3846                          "vmovq\t{$src, $dst|$dst, $src}",
3847                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3848                          EVEX, VEX_W, Sched<[WriteVecStore]>,
3849                          EVEX_CD8<64, CD8VT1>;
3850 }
3851 } // ExeDomain = SSEPackedInt
3852
3853 // Move Int Doubleword to Single Scalar
3854 //
3855 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3856 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3857                       "vmovd\t{$src, $dst|$dst, $src}",
3858                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3859                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3860
3861 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3862                       "vmovd\t{$src, $dst|$dst, $src}",
3863                       [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3864                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3865 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3866
3867 // Move doubleword from xmm register to r/m32
3868 //
3869 let ExeDomain = SSEPackedInt in {
3870 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3871                        "vmovd\t{$src, $dst|$dst, $src}",
3872                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3873                                         (iPTR 0)))]>,
3874                        EVEX, Sched<[WriteVecMoveToGpr]>;
3875 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3876                        (ins i32mem:$dst, VR128X:$src),
3877                        "vmovd\t{$src, $dst|$dst, $src}",
3878                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3879                                      (iPTR 0))), addr:$dst)]>,
3880                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3881 } // ExeDomain = SSEPackedInt
3882
3883 // Move quadword from xmm1 register to r/m64
3884 //
3885 let ExeDomain = SSEPackedInt in {
3886 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3887                       "vmovq\t{$src, $dst|$dst, $src}",
3888                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3889                                                    (iPTR 0)))]>,
3890                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3891                       Requires<[HasAVX512]>;
3892
3893 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3894 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3895                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3896                       EVEX, VEX_W, Sched<[WriteVecStore]>,
3897                       Requires<[HasAVX512, In64BitMode]>;
3898
3899 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3900                       (ins i64mem:$dst, VR128X:$src),
3901                       "vmovq\t{$src, $dst|$dst, $src}",
3902                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3903                               addr:$dst)]>,
3904                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3905                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3906
3907 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3908 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3909                              (ins VR128X:$src),
3910                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3911                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3912 } // ExeDomain = SSEPackedInt
3913
3914 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3915                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3916
3917 // Move Scalar Single to Double Int
3918 //
3919 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3920 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3921                       (ins FR32X:$src),
3922                       "vmovd\t{$src, $dst|$dst, $src}",
3923                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3924                       EVEX, Sched<[WriteVecMoveToGpr]>;
3925 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3926                       (ins i32mem:$dst, FR32X:$src),
3927                       "vmovd\t{$src, $dst|$dst, $src}",
3928                       [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3929                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3930 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3931
3932 // Move Quadword Int to Packed Quadword Int
3933 //
3934 let ExeDomain = SSEPackedInt in {
3935 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3936                       (ins i64mem:$src),
3937                       "vmovq\t{$src, $dst|$dst, $src}",
3938                       [(set VR128X:$dst,
3939                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3940                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3941 } // ExeDomain = SSEPackedInt
3942
3943 // Allow "vmovd" but print "vmovq".
3944 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3945                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3946 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3947                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3948
3949 //===----------------------------------------------------------------------===//
3950 // AVX-512  MOVSS, MOVSD
3951 //===----------------------------------------------------------------------===//
3952
3953 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3954                               X86VectorVTInfo _> {
3955   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3956              (ins _.RC:$src1, _.RC:$src2),
3957              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3958              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3959              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3960   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3961               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3962               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3963               "$dst {${mask}} {z}, $src1, $src2}"),
3964               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3965                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3966                                       _.ImmAllZerosV)))],
3967               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3968   let Constraints = "$src0 = $dst"  in
3969   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3970              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3971              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3972              "$dst {${mask}}, $src1, $src2}"),
3973              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3974                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3975                                      (_.VT _.RC:$src0))))],
3976              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3977   let canFoldAsLoad = 1, isReMaterializable = 1 in
3978   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3979              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3980              [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3981              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3982   let mayLoad = 1, hasSideEffects = 0 in {
3983     let Constraints = "$src0 = $dst" in
3984     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3985                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3986                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3987                "$dst {${mask}}, $src}"),
3988                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3989     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3990                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3991                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3992                "$dst {${mask}} {z}, $src}"),
3993                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3994   }
3995   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3996              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3997              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3998              EVEX, Sched<[WriteFStore]>;
3999   let mayStore = 1, hasSideEffects = 0 in
4000   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4001               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
4002               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4003               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4004               NotMemoryFoldable;
4005 }
4006
4007 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
4008                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4009
4010 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4011                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4012
4013
4014 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4015                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4016
4017 def : Pat<(_.VT (OpNode _.RC:$src0,
4018                         (_.VT (scalar_to_vector
4019                                   (_.EltVT (X86selects VK1WM:$mask,
4020                                                        (_.EltVT _.FRC:$src1),
4021                                                        (_.EltVT _.FRC:$src2))))))),
4022           (!cast<Instruction>(InstrStr#rrk)
4023                         (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
4024                         VK1WM:$mask,
4025                         (_.VT _.RC:$src0),
4026                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
4027
4028 def : Pat<(_.VT (OpNode _.RC:$src0,
4029                         (_.VT (scalar_to_vector
4030                                   (_.EltVT (X86selects VK1WM:$mask,
4031                                                        (_.EltVT _.FRC:$src1),
4032                                                        (_.EltVT ZeroFP))))))),
4033           (!cast<Instruction>(InstrStr#rrkz)
4034                         VK1WM:$mask,
4035                         (_.VT _.RC:$src0),
4036                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
4037 }
4038
4039 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4040                                         dag Mask, RegisterClass MaskRC> {
4041
4042 def : Pat<(masked_store addr:$dst, Mask,
4043              (_.info512.VT (insert_subvector undef,
4044                                (_.info128.VT _.info128.RC:$src),
4045                                (iPTR 0)))),
4046           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4047                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4048                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4049
4050 }
4051
4052 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4053                                                AVX512VLVectorVTInfo _,
4054                                                dag Mask, RegisterClass MaskRC,
4055                                                SubRegIndex subreg> {
4056
4057 def : Pat<(masked_store addr:$dst, Mask,
4058              (_.info512.VT (insert_subvector undef,
4059                                (_.info128.VT _.info128.RC:$src),
4060                                (iPTR 0)))),
4061           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4062                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4063                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4064
4065 }
4066
4067 // This matches the more recent codegen from clang that avoids emitting a 512
4068 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4069 // bits on AVX512F only targets.
4070 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4071                                                AVX512VLVectorVTInfo _,
4072                                                dag Mask512, dag Mask128,
4073                                                RegisterClass MaskRC,
4074                                                SubRegIndex subreg> {
4075
4076 // AVX512F pattern.
4077 def : Pat<(masked_store addr:$dst, Mask512,
4078              (_.info512.VT (insert_subvector undef,
4079                                (_.info128.VT _.info128.RC:$src),
4080                                (iPTR 0)))),
4081           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4082                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4083                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4084
4085 // AVX512VL pattern.
4086 def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
4087           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4088                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4089                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4090 }
4091
4092 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4093                                        dag Mask, RegisterClass MaskRC> {
4094
4095 def : Pat<(_.info128.VT (extract_subvector
4096                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4097                                         (_.info512.VT (bitconvert
4098                                                        (v16i32 immAllZerosV))))),
4099                            (iPTR 0))),
4100           (!cast<Instruction>(InstrStr#rmkz)
4101                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4102                       addr:$srcAddr)>;
4103
4104 def : Pat<(_.info128.VT (extract_subvector
4105                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4106                       (_.info512.VT (insert_subvector undef,
4107                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4108                             (iPTR 0))))),
4109                 (iPTR 0))),
4110           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4111                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4112                       addr:$srcAddr)>;
4113
4114 }
4115
4116 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4117                                               AVX512VLVectorVTInfo _,
4118                                               dag Mask, RegisterClass MaskRC,
4119                                               SubRegIndex subreg> {
4120
4121 def : Pat<(_.info128.VT (extract_subvector
4122                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4123                                         (_.info512.VT (bitconvert
4124                                                        (v16i32 immAllZerosV))))),
4125                            (iPTR 0))),
4126           (!cast<Instruction>(InstrStr#rmkz)
4127                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128                       addr:$srcAddr)>;
4129
4130 def : Pat<(_.info128.VT (extract_subvector
4131                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4132                       (_.info512.VT (insert_subvector undef,
4133                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4134                             (iPTR 0))))),
4135                 (iPTR 0))),
4136           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4137                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4138                       addr:$srcAddr)>;
4139
4140 }
4141
4142 // This matches the more recent codegen from clang that avoids emitting a 512
4143 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4144 // bits on AVX512F only targets.
4145 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4146                                               AVX512VLVectorVTInfo _,
4147                                               dag Mask512, dag Mask128,
4148                                               RegisterClass MaskRC,
4149                                               SubRegIndex subreg> {
4150 // AVX512F patterns.
4151 def : Pat<(_.info128.VT (extract_subvector
4152                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4153                                         (_.info512.VT (bitconvert
4154                                                        (v16i32 immAllZerosV))))),
4155                            (iPTR 0))),
4156           (!cast<Instruction>(InstrStr#rmkz)
4157                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4158                       addr:$srcAddr)>;
4159
4160 def : Pat<(_.info128.VT (extract_subvector
4161                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4162                       (_.info512.VT (insert_subvector undef,
4163                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4164                             (iPTR 0))))),
4165                 (iPTR 0))),
4166           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4167                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4168                       addr:$srcAddr)>;
4169
4170 // AVX512Vl patterns.
4171 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4172                          (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4173           (!cast<Instruction>(InstrStr#rmkz)
4174                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4175                       addr:$srcAddr)>;
4176
4177 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4178                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4179           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4180                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4181                       addr:$srcAddr)>;
4182 }
4183
4184 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4185 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4186
4187 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4188                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4189 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4190                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4191 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4192                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4193
4194 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4195                    (v16i1 (insert_subvector
4196                            (v16i1 immAllZerosV),
4197                            (v4i1 (extract_subvector
4198                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4199                                   (iPTR 0))),
4200                            (iPTR 0))),
4201                    (v4i1 (extract_subvector
4202                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4203                           (iPTR 0))), GR8, sub_8bit>;
4204 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4205                    (v8i1
4206                     (extract_subvector
4207                      (v16i1
4208                       (insert_subvector
4209                        (v16i1 immAllZerosV),
4210                        (v2i1 (extract_subvector
4211                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4212                               (iPTR 0))),
4213                        (iPTR 0))),
4214                      (iPTR 0))),
4215                    (v2i1 (extract_subvector
4216                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4217                           (iPTR 0))), GR8, sub_8bit>;
4218
4219 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4220                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4221 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4222                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4223 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4224                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4225
4226 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4227                    (v16i1 (insert_subvector
4228                            (v16i1 immAllZerosV),
4229                            (v4i1 (extract_subvector
4230                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4231                                   (iPTR 0))),
4232                            (iPTR 0))),
4233                    (v4i1 (extract_subvector
4234                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4235                           (iPTR 0))), GR8, sub_8bit>;
4236 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4237                    (v8i1
4238                     (extract_subvector
4239                      (v16i1
4240                       (insert_subvector
4241                        (v16i1 immAllZerosV),
4242                        (v2i1 (extract_subvector
4243                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4244                               (iPTR 0))),
4245                        (iPTR 0))),
4246                      (iPTR 0))),
4247                    (v2i1 (extract_subvector
4248                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4249                           (iPTR 0))), GR8, sub_8bit>;
4250
4251 def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
4252                            (f32 FR32X:$src1), (f32 FR32X:$src2))),
4253           (COPY_TO_REGCLASS
4254             (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
4255                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
4256                           GR8:$mask, sub_8bit)), VK1WM),
4257             (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
4258             FR32X)>;
4259
4260 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4261           (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
4262            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4263            (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
4264
4265 def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
4266                            (f64 FR64X:$src1), (f64 FR64X:$src2))),
4267           (COPY_TO_REGCLASS
4268             (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
4269                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
4270                           GR8:$mask, sub_8bit)), VK1WM),
4271             (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
4272             FR64X)>;
4273
4274 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4275           (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
4276            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4277            (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
4278
4279 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4280   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4281                            (ins VR128X:$src1, VR128X:$src2),
4282                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4283                            []>, XS, EVEX_4V, VEX_LIG,
4284                            FoldGenData<"VMOVSSZrr">,
4285                            Sched<[SchedWriteFShuffle.XMM]>;
4286
4287   let Constraints = "$src0 = $dst" in
4288   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4289                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4290                                                    VR128X:$src1, VR128X:$src2),
4291                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4292                                         "$dst {${mask}}, $src1, $src2}",
4293                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4294                              FoldGenData<"VMOVSSZrrk">,
4295                              Sched<[SchedWriteFShuffle.XMM]>;
4296
4297   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4299                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4300                                     "$dst {${mask}} {z}, $src1, $src2}",
4301                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4302                          FoldGenData<"VMOVSSZrrkz">,
4303                          Sched<[SchedWriteFShuffle.XMM]>;
4304
4305   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306                            (ins VR128X:$src1, VR128X:$src2),
4307                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4308                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4309                            FoldGenData<"VMOVSDZrr">,
4310                            Sched<[SchedWriteFShuffle.XMM]>;
4311
4312   let Constraints = "$src0 = $dst" in
4313   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4314                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4315                                                    VR128X:$src1, VR128X:$src2),
4316                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4317                                         "$dst {${mask}}, $src1, $src2}",
4318                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4319                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4320                              Sched<[SchedWriteFShuffle.XMM]>;
4321
4322   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4323                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4324                                                           VR128X:$src2),
4325                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4326                                          "$dst {${mask}} {z}, $src1, $src2}",
4327                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4328                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4329                               Sched<[SchedWriteFShuffle.XMM]>;
4330 }
4331
4332 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4333                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4334 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4335                              "$dst {${mask}}, $src1, $src2}",
4336                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4337                                 VR128X:$src1, VR128X:$src2), 0>;
4338 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4339                              "$dst {${mask}} {z}, $src1, $src2}",
4340                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4341                                  VR128X:$src1, VR128X:$src2), 0>;
4342 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4343                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4344 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4345                              "$dst {${mask}}, $src1, $src2}",
4346                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4347                                 VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4349                              "$dst {${mask}} {z}, $src1, $src2}",
4350                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4351                                  VR128X:$src1, VR128X:$src2), 0>;
4352
4353 let Predicates = [HasAVX512] in {
4354   let AddedComplexity = 15 in {
4355   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4356             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4357   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4358             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4359   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
4360             (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4361                        (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
4362   }
4363
4364   // Move low f32 and clear high bits.
4365   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4366             (SUBREG_TO_REG (i32 0),
4367              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4368               (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4369   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4370             (SUBREG_TO_REG (i32 0),
4371              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4372               (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
4373   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4374             (SUBREG_TO_REG (i32 0),
4375              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4376               (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4377   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4378             (SUBREG_TO_REG (i32 0),
4379              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4380               (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
4381
4382   let AddedComplexity = 20 in {
4383   // MOVSSrm zeros the high parts of the register; represent this
4384   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4385   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4386             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4387   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4388             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4389   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4390             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4391   def : Pat<(v4f32 (X86vzload addr:$src)),
4392             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4393
4394   // MOVSDrm zeros the high parts of the register; represent this
4395   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4396   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4397             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4398   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4399             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4400   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4401             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4402   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4403             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4404   def : Pat<(v2f64 (X86vzload addr:$src)),
4405             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4406
4407   // Represent the same patterns above but in the form they appear for
4408   // 256-bit types
4409   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4410                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4411             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4412   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4413                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4414             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4415   def : Pat<(v8f32 (X86vzload addr:$src)),
4416             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4417   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4418                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4419             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4420   def : Pat<(v4f64 (X86vzload addr:$src)),
4421             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4422
4423   // Represent the same patterns above but in the form they appear for
4424   // 512-bit types
4425   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4426                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4427             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4428   def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4429                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4430             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4431   def : Pat<(v16f32 (X86vzload addr:$src)),
4432             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4433   def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4434                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4435             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4436   def : Pat<(v8f64 (X86vzload addr:$src)),
4437             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4438   }
4439   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4440                    (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4441             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4442
4443   // Move low f64 and clear high bits.
4444   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4445             (SUBREG_TO_REG (i32 0),
4446              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4447                        (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
4448   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4449             (SUBREG_TO_REG (i32 0),
4450              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4451                        (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
4452
4453   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4454             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4455                        (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
4456   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4457             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4458                        (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
4459
4460   // Extract and store.
4461   def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4462                    addr:$dst),
4463             (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4464
4465   // Shuffle with VMOVSS
4466   def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4467             (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4468
4469   def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4470             (VMOVSSZrr VR128X:$src1,
4471                        (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4472
4473   // Shuffle with VMOVSD
4474   def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4475             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4476
4477   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4478             (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4479
4480   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4481             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4482   def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4483             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4484 }
4485
4486 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4487 let AddedComplexity = 15 in
4488 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4489                                 (ins VR128X:$src),
4490                                 "vmovq\t{$src, $dst|$dst, $src}",
4491                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4492                                                    (v2i64 VR128X:$src))))]>,
4493                                 EVEX, VEX_W;
4494 }
4495
4496 let Predicates = [HasAVX512] in {
4497   let AddedComplexity = 15 in {
4498     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4499               (VMOVDI2PDIZrr GR32:$src)>;
4500
4501     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4502               (VMOV64toPQIZrr GR64:$src)>;
4503
4504     def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4505                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4506               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4507
4508     def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4509                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4510               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4511   }
4512   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4513   let AddedComplexity = 20 in {
4514     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4515               (VMOVDI2PDIZrm addr:$src)>;
4516     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4517               (VMOVDI2PDIZrm addr:$src)>;
4518     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4519               (VMOVDI2PDIZrm addr:$src)>;
4520     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4521               (VMOVDI2PDIZrm addr:$src)>;
4522     def : Pat<(v4i32 (X86vzload addr:$src)),
4523               (VMOVDI2PDIZrm addr:$src)>;
4524     def : Pat<(v8i32 (X86vzload addr:$src)),
4525               (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4526     def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4527               (VMOVQI2PQIZrm addr:$src)>;
4528     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4529               (VMOVZPQILo2PQIZrr VR128X:$src)>;
4530     def : Pat<(v2i64 (X86vzload addr:$src)),
4531               (VMOVQI2PQIZrm addr:$src)>;
4532     def : Pat<(v4i64 (X86vzload addr:$src)),
4533               (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4534   }
4535
4536   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4537   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4538                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4539             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4540   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4541                                 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4542             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4543
4544   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4545   def : Pat<(v16i32 (X86vzload addr:$src)),
4546             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4547   def : Pat<(v8i64 (X86vzload addr:$src)),
4548             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4549 }
4550
4551 //===----------------------------------------------------------------------===//
4552 // AVX-512 - Non-temporals
4553 //===----------------------------------------------------------------------===//
4554
4555 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4556                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4557                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4558                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4559
4560 let Predicates = [HasVLX] in {
4561   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4562                        (ins i256mem:$src),
4563                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4564                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4565                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4566
4567   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4568                       (ins i128mem:$src),
4569                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4570                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4571                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4572 }
4573
4574 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4575                         X86SchedWriteMoveLS Sched,
4576                         PatFrag st_frag = alignednontemporalstore> {
4577   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4578   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4579                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4580                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4581                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4582 }
4583
4584 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4585                            AVX512VLVectorVTInfo VTInfo,
4586                            X86SchedWriteMoveLSWidths Sched> {
4587   let Predicates = [HasAVX512] in
4588     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4589
4590   let Predicates = [HasAVX512, HasVLX] in {
4591     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4592     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4593   }
4594 }
4595
4596 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4597                                 SchedWriteVecMoveLSNT>, PD;
4598 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4599                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4600 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4601                                 SchedWriteFMoveLSNT>, PS;
4602
4603 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4604   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4605             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4606   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4607             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4608   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4609             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4610
4611   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4612             (VMOVNTDQAZrm addr:$src)>;
4613   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4614             (VMOVNTDQAZrm addr:$src)>;
4615   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4616             (VMOVNTDQAZrm addr:$src)>;
4617 }
4618
4619 let Predicates = [HasVLX], AddedComplexity = 400 in {
4620   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4621             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4622   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4623             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4624   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4625             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4626
4627   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4628             (VMOVNTDQAZ256rm addr:$src)>;
4629   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4630             (VMOVNTDQAZ256rm addr:$src)>;
4631   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4632             (VMOVNTDQAZ256rm addr:$src)>;
4633
4634   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4635             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4636   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4637             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4638   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4639             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4640
4641   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4642             (VMOVNTDQAZ128rm addr:$src)>;
4643   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4644             (VMOVNTDQAZ128rm addr:$src)>;
4645   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4646             (VMOVNTDQAZ128rm addr:$src)>;
4647 }
4648
4649 //===----------------------------------------------------------------------===//
4650 // AVX-512 - Integer arithmetic
4651 //
4652 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4653                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4654                            bit IsCommutable = 0> {
4655   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4656                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4657                     "$src2, $src1", "$src1, $src2",
4658                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4659                     IsCommutable>, AVX512BIBase, EVEX_4V,
4660                     Sched<[sched]>;
4661
4662   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4663                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4664                   "$src2, $src1", "$src1, $src2",
4665                   (_.VT (OpNode _.RC:$src1,
4666                                 (bitconvert (_.LdFrag addr:$src2))))>,
4667                   AVX512BIBase, EVEX_4V,
4668                   Sched<[sched.Folded, ReadAfterLd]>;
4669 }
4670
4671 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4672                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4673                             bit IsCommutable = 0> :
4674            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4675   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4677                   "${src2}"##_.BroadcastStr##", $src1",
4678                   "$src1, ${src2}"##_.BroadcastStr,
4679                   (_.VT (OpNode _.RC:$src1,
4680                                 (X86VBroadcast
4681                                     (_.ScalarLdFrag addr:$src2))))>,
4682                   AVX512BIBase, EVEX_4V, EVEX_B,
4683                   Sched<[sched.Folded, ReadAfterLd]>;
4684 }
4685
4686 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4687                               AVX512VLVectorVTInfo VTInfo,
4688                               X86SchedWriteWidths sched, Predicate prd,
4689                               bit IsCommutable = 0> {
4690   let Predicates = [prd] in
4691     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4692                              IsCommutable>, EVEX_V512;
4693
4694   let Predicates = [prd, HasVLX] in {
4695     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4696                                 sched.YMM, IsCommutable>, EVEX_V256;
4697     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4698                                 sched.XMM, IsCommutable>, EVEX_V128;
4699   }
4700 }
4701
4702 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4703                                AVX512VLVectorVTInfo VTInfo,
4704                                X86SchedWriteWidths sched, Predicate prd,
4705                                bit IsCommutable = 0> {
4706   let Predicates = [prd] in
4707     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4708                              IsCommutable>, EVEX_V512;
4709
4710   let Predicates = [prd, HasVLX] in {
4711     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4712                                  sched.YMM, IsCommutable>, EVEX_V256;
4713     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4714                                  sched.XMM, IsCommutable>, EVEX_V128;
4715   }
4716 }
4717
4718 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4719                                 X86SchedWriteWidths sched, Predicate prd,
4720                                 bit IsCommutable = 0> {
4721   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4722                                   sched, prd, IsCommutable>,
4723                                   VEX_W, EVEX_CD8<64, CD8VF>;
4724 }
4725
4726 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4727                                 X86SchedWriteWidths sched, Predicate prd,
4728                                 bit IsCommutable = 0> {
4729   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4730                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4731 }
4732
4733 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4734                                 X86SchedWriteWidths sched, Predicate prd,
4735                                 bit IsCommutable = 0> {
4736   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4737                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4738                                  VEX_WIG;
4739 }
4740
4741 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4742                                 X86SchedWriteWidths sched, Predicate prd,
4743                                 bit IsCommutable = 0> {
4744   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4745                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4746                                  VEX_WIG;
4747 }
4748
4749 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4750                                  SDNode OpNode, X86SchedWriteWidths sched,
4751                                  Predicate prd, bit IsCommutable = 0> {
4752   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4753                                    IsCommutable>;
4754
4755   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4756                                    IsCommutable>;
4757 }
4758
4759 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4760                                  SDNode OpNode, X86SchedWriteWidths sched,
4761                                  Predicate prd, bit IsCommutable = 0> {
4762   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4763                                    IsCommutable>;
4764
4765   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4766                                    IsCommutable>;
4767 }
4768
4769 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4770                                   bits<8> opc_d, bits<8> opc_q,
4771                                   string OpcodeStr, SDNode OpNode,
4772                                   X86SchedWriteWidths sched,
4773                                   bit IsCommutable = 0> {
4774   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4775                                     sched, HasAVX512, IsCommutable>,
4776               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4777                                     sched, HasBWI, IsCommutable>;
4778 }
4779
4780 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4781                             X86FoldableSchedWrite sched,
4782                             SDNode OpNode,X86VectorVTInfo _Src,
4783                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4784                             bit IsCommutable = 0> {
4785   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4786                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4787                             "$src2, $src1","$src1, $src2",
4788                             (_Dst.VT (OpNode
4789                                          (_Src.VT _Src.RC:$src1),
4790                                          (_Src.VT _Src.RC:$src2))),
4791                             IsCommutable>,
4792                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4793   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4794                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4795                         "$src2, $src1", "$src1, $src2",
4796                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4797                                       (bitconvert (_Src.LdFrag addr:$src2))))>,
4798                         AVX512BIBase, EVEX_4V,
4799                         Sched<[sched.Folded, ReadAfterLd]>;
4800
4801   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4802                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4803                     OpcodeStr,
4804                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4805                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4806                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4807                                  (_Brdct.VT (X86VBroadcast
4808                                           (_Brdct.ScalarLdFrag addr:$src2))))))>,
4809                     AVX512BIBase, EVEX_4V, EVEX_B,
4810                     Sched<[sched.Folded, ReadAfterLd]>;
4811 }
4812
4813 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4814                                     SchedWriteVecALU, 1>;
4815 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4816                                     SchedWriteVecALU, 0>;
4817 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4818                                     SchedWriteVecALU, HasBWI, 1>;
4819 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4820                                     SchedWriteVecALU, HasBWI, 0>;
4821 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4822                                      SchedWriteVecALU, HasBWI, 1>;
4823 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4824                                      SchedWriteVecALU, HasBWI, 0>;
4825 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4826                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
4827 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4828                                     SchedWriteVecIMul, HasBWI, 1>;
4829 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4830                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
4831                                     NotEVEX2VEXConvertible;
4832 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4833                                     HasBWI, 1>;
4834 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4835                                      HasBWI, 1>;
4836 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4837                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
4838 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4839                                    SchedWriteVecALU, HasBWI, 1>;
4840 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4841                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4842 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4843                                      SchedWriteVecIMul, HasAVX512, 1>;
4844
4845 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4846                             X86SchedWriteWidths sched,
4847                             AVX512VLVectorVTInfo _SrcVTInfo,
4848                             AVX512VLVectorVTInfo _DstVTInfo,
4849                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4850   let Predicates = [prd] in
4851     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4852                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4853                                  v8i64_info, IsCommutable>,
4854                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4855   let Predicates = [HasVLX, prd] in {
4856     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4857                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4858                                       v4i64x_info, IsCommutable>,
4859                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4860     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4861                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4862                                       v2i64x_info, IsCommutable>,
4863                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4864   }
4865 }
4866
4867 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4868                                 avx512vl_i8_info, avx512vl_i8_info,
4869                                 X86multishift, HasVBMI, 0>, T8PD;
4870
4871 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4872                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4873                             X86FoldableSchedWrite sched> {
4874   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4875                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4876                     OpcodeStr,
4877                     "${src2}"##_Src.BroadcastStr##", $src1",
4878                      "$src1, ${src2}"##_Src.BroadcastStr,
4879                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4880                                  (_Src.VT (X86VBroadcast
4881                                           (_Src.ScalarLdFrag addr:$src2))))))>,
4882                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4883                     Sched<[sched.Folded, ReadAfterLd]>;
4884 }
4885
4886 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4887                             SDNode OpNode,X86VectorVTInfo _Src,
4888                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4889                             bit IsCommutable = 0> {
4890   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4891                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4892                             "$src2, $src1","$src1, $src2",
4893                             (_Dst.VT (OpNode
4894                                          (_Src.VT _Src.RC:$src1),
4895                                          (_Src.VT _Src.RC:$src2))),
4896                             IsCommutable>,
4897                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4898   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4899                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4900                         "$src2, $src1", "$src1, $src2",
4901                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4902                                       (bitconvert (_Src.LdFrag addr:$src2))))>,
4903                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4904                          Sched<[sched.Folded, ReadAfterLd]>;
4905 }
4906
4907 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4908                                     SDNode OpNode> {
4909   let Predicates = [HasBWI] in
4910   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4911                                  v32i16_info, SchedWriteShuffle.ZMM>,
4912                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4913                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4914   let Predicates = [HasBWI, HasVLX] in {
4915     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4916                                      v16i16x_info, SchedWriteShuffle.YMM>,
4917                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4918                                       v16i16x_info, SchedWriteShuffle.YMM>,
4919                                       EVEX_V256;
4920     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4921                                      v8i16x_info, SchedWriteShuffle.XMM>,
4922                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4923                                       v8i16x_info, SchedWriteShuffle.XMM>,
4924                                       EVEX_V128;
4925   }
4926 }
4927 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4928                             SDNode OpNode> {
4929   let Predicates = [HasBWI] in
4930   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4931                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4932   let Predicates = [HasBWI, HasVLX] in {
4933     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4934                                      v32i8x_info, SchedWriteShuffle.YMM>,
4935                                      EVEX_V256, VEX_WIG;
4936     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4937                                      v16i8x_info, SchedWriteShuffle.XMM>,
4938                                      EVEX_V128, VEX_WIG;
4939   }
4940 }
4941
4942 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4943                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4944                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4945   let Predicates = [HasBWI] in
4946   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4947                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4948                                 IsCommutable>, EVEX_V512;
4949   let Predicates = [HasBWI, HasVLX] in {
4950     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4951                                      _Dst.info256, SchedWriteVecIMul.YMM,
4952                                      IsCommutable>, EVEX_V256;
4953     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4954                                      _Dst.info128, SchedWriteVecIMul.XMM,
4955                                      IsCommutable>, EVEX_V128;
4956   }
4957 }
4958
4959 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4960 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4961 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4962 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4963
4964 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4965                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4966 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4967                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4968
4969 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4970                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4971 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4972                                     SchedWriteVecALU, HasBWI, 1>;
4973 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4974                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4975 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4976                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4977                                     NotEVEX2VEXConvertible;
4978
4979 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4980                                     SchedWriteVecALU, HasBWI, 1>;
4981 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4982                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4983 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4984                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4985 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4986                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4987                                     NotEVEX2VEXConvertible;
4988
4989 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4990                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4991 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4992                                     SchedWriteVecALU, HasBWI, 1>;
4993 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4994                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4995 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4996                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4997                                     NotEVEX2VEXConvertible;
4998
4999 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5000                                     SchedWriteVecALU, HasBWI, 1>;
5001 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5002                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
5003 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5004                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
5005 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5006                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
5007                                     NotEVEX2VEXConvertible;
5008
5009 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5010 let Predicates = [HasDQI, NoVLX] in {
5011   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5012             (EXTRACT_SUBREG
5013                 (VPMULLQZrr
5014                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5015                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5016              sub_ymm)>;
5017
5018   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5019             (EXTRACT_SUBREG
5020                 (VPMULLQZrr
5021                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5022                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5023              sub_xmm)>;
5024 }
5025
5026 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5027 let Predicates = [HasDQI, NoVLX] in {
5028   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5029             (EXTRACT_SUBREG
5030                 (VPMULLQZrr
5031                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5032                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5033              sub_ymm)>;
5034
5035   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5036             (EXTRACT_SUBREG
5037                 (VPMULLQZrr
5038                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5039                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5040              sub_xmm)>;
5041 }
5042
5043 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5044   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5045             (EXTRACT_SUBREG
5046                 (Instr
5047                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5048                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5049              sub_ymm)>;
5050
5051   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5052             (EXTRACT_SUBREG
5053                 (Instr
5054                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5055                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5056              sub_xmm)>;
5057 }
5058
5059 let Predicates = [HasAVX512, NoVLX] in {
5060   defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5061   defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5062   defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5063   defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5064 }
5065
5066 //===----------------------------------------------------------------------===//
5067 // AVX-512  Logical Instructions
5068 //===----------------------------------------------------------------------===//
5069
5070 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
5071 // be set to null_frag for 32-bit elements.
5072 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5073                            SDPatternOperator OpNode,
5074                            SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5075                            X86VectorVTInfo _, bit IsCommutable = 0> {
5076   let hasSideEffects = 0 in
5077   defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5078                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5079                     "$src2, $src1", "$src1, $src2",
5080                     (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5081                                      (bitconvert (_.VT _.RC:$src2)))),
5082                     (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5083                                                           _.RC:$src2)))),
5084                     IsCommutable>, AVX512BIBase, EVEX_4V,
5085                     Sched<[sched]>;
5086
5087   let hasSideEffects = 0, mayLoad = 1 in
5088   defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5089                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5090                   "$src2, $src1", "$src1, $src2",
5091                   (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5092                                    (bitconvert (_.LdFrag addr:$src2)))),
5093                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5094                                      (bitconvert (_.LdFrag addr:$src2))))))>,
5095                   AVX512BIBase, EVEX_4V,
5096                   Sched<[sched.Folded, ReadAfterLd]>;
5097 }
5098
5099 // OpNodeMsk is the OpNode to use where element size is important. So use
5100 // for all of the broadcast patterns.
5101 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5102                             SDPatternOperator OpNode,
5103                             SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5104                             bit IsCommutable = 0> :
5105            avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5106                            IsCommutable> {
5107   defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5108                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5109                   "${src2}"##_.BroadcastStr##", $src1",
5110                   "$src1, ${src2}"##_.BroadcastStr,
5111                   (_.i64VT (OpNodeMsk _.RC:$src1,
5112                                    (bitconvert
5113                                     (_.VT (X86VBroadcast
5114                                             (_.ScalarLdFrag addr:$src2)))))),
5115                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5116                                      (bitconvert
5117                                       (_.VT (X86VBroadcast
5118                                              (_.ScalarLdFrag addr:$src2))))))))>,
5119                   AVX512BIBase, EVEX_4V, EVEX_B,
5120                   Sched<[sched.Folded, ReadAfterLd]>;
5121 }
5122
5123 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5124                                SDPatternOperator OpNode,
5125                                SDNode OpNodeMsk, X86SchedWriteWidths sched,
5126                                AVX512VLVectorVTInfo VTInfo,
5127                                bit IsCommutable = 0> {
5128   let Predicates = [HasAVX512] in
5129     defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5130                               VTInfo.info512, IsCommutable>, EVEX_V512;
5131
5132   let Predicates = [HasAVX512, HasVLX] in {
5133     defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5134                                  VTInfo.info256, IsCommutable>, EVEX_V256;
5135     defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5136                                  VTInfo.info128, IsCommutable>, EVEX_V128;
5137   }
5138 }
5139
5140 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5141                                  SDNode OpNode, X86SchedWriteWidths sched,
5142                                  bit IsCommutable = 0> {
5143   defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5144                                avx512vl_i64_info, IsCommutable>,
5145                                VEX_W, EVEX_CD8<64, CD8VF>;
5146   defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5147                                avx512vl_i32_info, IsCommutable>,
5148                                EVEX_CD8<32, CD8VF>;
5149 }
5150
5151 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5152                                    SchedWriteVecLogic, 1>;
5153 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5154                                   SchedWriteVecLogic, 1>;
5155 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5156                                    SchedWriteVecLogic, 1>;
5157 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5158                                     SchedWriteVecLogic>;
5159
5160 //===----------------------------------------------------------------------===//
5161 // AVX-512  FP arithmetic
5162 //===----------------------------------------------------------------------===//
5163
5164 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5165                             SDNode OpNode, SDNode VecNode,
5166                             X86FoldableSchedWrite sched, bit IsCommutable> {
5167   let ExeDomain = _.ExeDomain in {
5168   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5169                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5170                            "$src2, $src1", "$src1, $src2",
5171                            (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5172                                           (i32 FROUND_CURRENT)))>,
5173                            Sched<[sched]>;
5174
5175   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5176                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5177                          "$src2, $src1", "$src1, $src2",
5178                          (_.VT (VecNode _.RC:$src1,
5179                                         _.ScalarIntMemCPat:$src2,
5180                                         (i32 FROUND_CURRENT)))>,
5181                          Sched<[sched.Folded, ReadAfterLd]>;
5182   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5183   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5184                          (ins _.FRC:$src1, _.FRC:$src2),
5185                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5186                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5187                           Sched<[sched]> {
5188     let isCommutable = IsCommutable;
5189   }
5190   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5191                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5192                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5193                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5194                          (_.ScalarLdFrag addr:$src2)))]>,
5195                          Sched<[sched.Folded, ReadAfterLd]>;
5196   }
5197   }
5198 }
5199
5200 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5201                                   SDNode VecNode, X86FoldableSchedWrite sched,
5202                                   bit IsCommutable = 0> {
5203   let ExeDomain = _.ExeDomain in
5204   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5205                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5206                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5207                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5208                           (i32 imm:$rc)), IsCommutable>,
5209                           EVEX_B, EVEX_RC, Sched<[sched]>;
5210 }
5211 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5212                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5213                                 X86FoldableSchedWrite sched, bit IsCommutable> {
5214   let ExeDomain = _.ExeDomain in {
5215   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5216                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5217                            "$src2, $src1", "$src1, $src2",
5218                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5219                            Sched<[sched]>;
5220
5221   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5222                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5223                          "$src2, $src1", "$src1, $src2",
5224                          (_.VT (VecNode _.RC:$src1,
5225                                         _.ScalarIntMemCPat:$src2))>,
5226                          Sched<[sched.Folded, ReadAfterLd]>;
5227
5228   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5229   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5230                          (ins _.FRC:$src1, _.FRC:$src2),
5231                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5232                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5233                           Sched<[sched]> {
5234     let isCommutable = IsCommutable;
5235   }
5236   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5237                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5238                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5239                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5240                          (_.ScalarLdFrag addr:$src2)))]>,
5241                          Sched<[sched.Folded, ReadAfterLd]>;
5242   }
5243
5244   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5245                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5246                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5247                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5248                             (i32 FROUND_NO_EXC))>, EVEX_B,
5249                             Sched<[sched]>;
5250   }
5251 }
5252
5253 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5254                                 SDNode VecNode, X86SchedWriteSizes sched,
5255                                 bit IsCommutable> {
5256   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5257                               sched.PS.Scl, IsCommutable>,
5258              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5259                               sched.PS.Scl, IsCommutable>,
5260                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5261   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5262                               sched.PD.Scl, IsCommutable>,
5263              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5264                               sched.PD.Scl, IsCommutable>,
5265                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5266 }
5267
5268 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5269                               SDNode VecNode, SDNode SaeNode,
5270                               X86SchedWriteSizes sched, bit IsCommutable> {
5271   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5272                               VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5273                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5274   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5275                               VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5276                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5277 }
5278 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5279                                  SchedWriteFAddSizes, 1>;
5280 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5281                                  SchedWriteFMulSizes, 1>;
5282 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5283                                  SchedWriteFAddSizes, 0>;
5284 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5285                                  SchedWriteFDivSizes, 0>;
5286 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5287                                SchedWriteFCmpSizes, 0>;
5288 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5289                                SchedWriteFCmpSizes, 0>;
5290
5291 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5292 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5293 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5294                                     X86VectorVTInfo _, SDNode OpNode,
5295                                     X86FoldableSchedWrite sched> {
5296   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5297   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5298                          (ins _.FRC:$src1, _.FRC:$src2),
5299                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5300                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5301                           Sched<[sched]> {
5302     let isCommutable = 1;
5303   }
5304   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5305                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5306                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5307                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5308                          (_.ScalarLdFrag addr:$src2)))]>,
5309                          Sched<[sched.Folded, ReadAfterLd]>;
5310   }
5311 }
5312 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5313                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
5314                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
5315
5316 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5317                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5318                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
5319
5320 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5321                                          SchedWriteFCmp.Scl>, XS, EVEX_4V,
5322                                          VEX_LIG, EVEX_CD8<32, CD8VT1>;
5323
5324 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5325                                          SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5326                                          VEX_LIG, EVEX_CD8<64, CD8VT1>;
5327
5328 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5329                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5330                             bit IsCommutable> {
5331   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5332   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5333                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5334                   "$src2, $src1", "$src1, $src2",
5335                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable>,
5336                   EVEX_4V, Sched<[sched]>;
5337   let mayLoad = 1 in {
5338     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5339                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5340                     "$src2, $src1", "$src1, $src2",
5341                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5342                     EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5343     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5344                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5345                      "${src2}"##_.BroadcastStr##", $src1",
5346                      "$src1, ${src2}"##_.BroadcastStr,
5347                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5348                                                 (_.ScalarLdFrag addr:$src2))))>,
5349                      EVEX_4V, EVEX_B,
5350                      Sched<[sched.Folded, ReadAfterLd]>;
5351     }
5352   }
5353 }
5354
5355 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5356                                   SDPatternOperator OpNodeRnd,
5357                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5358   let ExeDomain = _.ExeDomain in
5359   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5360                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5361                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5362                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5363                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5364 }
5365
5366 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5367                                 SDPatternOperator OpNodeRnd,
5368                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5369   let ExeDomain = _.ExeDomain in
5370   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5371                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5372                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5373                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5374                   EVEX_4V, EVEX_B, Sched<[sched]>;
5375 }
5376
5377 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5378                              Predicate prd, X86SchedWriteSizes sched,
5379                              bit IsCommutable = 0> {
5380   let Predicates = [prd] in {
5381   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5382                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5383                               EVEX_CD8<32, CD8VF>;
5384   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5385                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5386                               EVEX_CD8<64, CD8VF>;
5387   }
5388
5389     // Define only if AVX512VL feature is present.
5390   let Predicates = [prd, HasVLX] in {
5391     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5392                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5393                                    EVEX_CD8<32, CD8VF>;
5394     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5395                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5396                                    EVEX_CD8<32, CD8VF>;
5397     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5398                                    sched.PD.XMM, IsCommutable>, EVEX_V128, PD, VEX_W,
5399                                    EVEX_CD8<64, CD8VF>;
5400     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5401                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5402                                    EVEX_CD8<64, CD8VF>;
5403   }
5404 }
5405
5406 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5407                                    X86SchedWriteSizes sched> {
5408   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5409                                     v16f32_info>,
5410                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5411   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5412                                     v8f64_info>,
5413                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5414 }
5415
5416 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5417                                  X86SchedWriteSizes sched> {
5418   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5419                                   v16f32_info>,
5420                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5421   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5422                                   v8f64_info>,
5423                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5424 }
5425
5426 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5427                               SchedWriteFAddSizes, 1>,
5428             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5429 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5430                               SchedWriteFMulSizes, 1>,
5431             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5432 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5433                               SchedWriteFAddSizes>,
5434             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5435 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5436                               SchedWriteFDivSizes>,
5437             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5438 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5439                               SchedWriteFCmpSizes, 0>,
5440             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5441 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5442                               SchedWriteFCmpSizes, 0>,
5443             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5444 let isCodeGenOnly = 1 in {
5445   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5446                                  SchedWriteFCmpSizes, 1>;
5447   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5448                                  SchedWriteFCmpSizes, 1>;
5449 }
5450 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5451                                SchedWriteFLogicSizes, 1>;
5452 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5453                                SchedWriteFLogicSizes, 0>;
5454 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5455                                SchedWriteFLogicSizes, 1>;
5456 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5457                                SchedWriteFLogicSizes, 1>;
5458
5459 // Patterns catch floating point selects with bitcasted integer logic ops.
5460 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5461                                       X86VectorVTInfo _, Predicate prd> {
5462 let Predicates = [prd] in {
5463   // Masked register-register logical operations.
5464   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5465                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5466                    _.RC:$src0)),
5467             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5468              _.RC:$src1, _.RC:$src2)>;
5469   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5470                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5471                    _.ImmAllZerosV)),
5472             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5473              _.RC:$src2)>;
5474   // Masked register-memory logical operations.
5475   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5476                    (bitconvert (_.i64VT (OpNode _.RC:$src1,
5477                                          (load addr:$src2)))),
5478                    _.RC:$src0)),
5479             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5480              _.RC:$src1, addr:$src2)>;
5481   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5482                    (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5483                    _.ImmAllZerosV)),
5484             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5485              addr:$src2)>;
5486   // Register-broadcast logical operations.
5487   def : Pat<(_.i64VT (OpNode _.RC:$src1,
5488                       (bitconvert (_.VT (X86VBroadcast
5489                                          (_.ScalarLdFrag addr:$src2)))))),
5490             (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5491   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5492                    (bitconvert
5493                     (_.i64VT (OpNode _.RC:$src1,
5494                               (bitconvert (_.VT
5495                                            (X86VBroadcast
5496                                             (_.ScalarLdFrag addr:$src2))))))),
5497                    _.RC:$src0)),
5498             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5499              _.RC:$src1, addr:$src2)>;
5500   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5501                    (bitconvert
5502                     (_.i64VT (OpNode _.RC:$src1,
5503                               (bitconvert (_.VT
5504                                            (X86VBroadcast
5505                                             (_.ScalarLdFrag addr:$src2))))))),
5506                    _.ImmAllZerosV)),
5507             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5508              _.RC:$src1, addr:$src2)>;
5509 }
5510 }
5511
5512 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5513   defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5514   defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5515   defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5516   defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5517   defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5518   defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5519 }
5520
5521 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5522 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5523 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5524 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5525
5526 let Predicates = [HasVLX,HasDQI] in {
5527   // Use packed logical operations for scalar ops.
5528   def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5529             (COPY_TO_REGCLASS (VANDPDZ128rr
5530                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5531                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5532   def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5533             (COPY_TO_REGCLASS (VORPDZ128rr
5534                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5535                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5536   def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5537             (COPY_TO_REGCLASS (VXORPDZ128rr
5538                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5539                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5540   def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5541             (COPY_TO_REGCLASS (VANDNPDZ128rr
5542                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5543                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5544
5545   def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5546             (COPY_TO_REGCLASS (VANDPSZ128rr
5547                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5548                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5549   def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5550             (COPY_TO_REGCLASS (VORPSZ128rr
5551                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5552                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5553   def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5554             (COPY_TO_REGCLASS (VXORPSZ128rr
5555                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5556                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5557   def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5558             (COPY_TO_REGCLASS (VANDNPSZ128rr
5559                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5560                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5561 }
5562
5563 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5564                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5565   let ExeDomain = _.ExeDomain in {
5566   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5567                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5568                   "$src2, $src1", "$src1, $src2",
5569                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5570                   EVEX_4V, Sched<[sched]>;
5571   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5572                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5573                   "$src2, $src1", "$src1, $src2",
5574                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5575                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5576   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5577                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5578                    "${src2}"##_.BroadcastStr##", $src1",
5579                    "$src1, ${src2}"##_.BroadcastStr,
5580                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5581                                               (_.ScalarLdFrag addr:$src2))),
5582                                               (i32 FROUND_CURRENT))>,
5583                    EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
5584   }
5585 }
5586
5587 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5588                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5589   let ExeDomain = _.ExeDomain in {
5590   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5591                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5592                   "$src2, $src1", "$src1, $src2",
5593                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5594                   Sched<[sched]>;
5595   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5596                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5597                   "$src2, $src1", "$src1, $src2",
5598                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5599                           (i32 FROUND_CURRENT))>,
5600                   Sched<[sched.Folded, ReadAfterLd]>;
5601   }
5602 }
5603
5604 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5605                                 SDNode OpNode, SDNode OpNodeScal,
5606                                 X86SchedWriteWidths sched> {
5607   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5608              avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5609                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5610   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5611              avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5612                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5613   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5614              avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5615                            EVEX_4V,EVEX_CD8<32, CD8VT1>;
5616   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5617              avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5618                            EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5619
5620   // Define only if AVX512VL feature is present.
5621   let Predicates = [HasVLX] in {
5622     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5623                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5624     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5625                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5626     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5627                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5628     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5629                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5630   }
5631 }
5632 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5633                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5634
5635 //===----------------------------------------------------------------------===//
5636 // AVX-512  VPTESTM instructions
5637 //===----------------------------------------------------------------------===//
5638
5639 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5640                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
5641                          string Name> {
5642   let ExeDomain = _.ExeDomain in {
5643   let isCommutable = 1 in
5644   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5645                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5646                       "$src2, $src1", "$src1, $src2",
5647                    (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5648                            _.ImmAllZerosV)>,
5649                    EVEX_4V, Sched<[sched]>;
5650   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5651                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5652                        "$src2, $src1", "$src1, $src2",
5653                    (OpNode (bitconvert
5654                             (_.i64VT (and _.RC:$src1,
5655                                           (bitconvert (_.LdFrag addr:$src2))))),
5656                            _.ImmAllZerosV)>,
5657                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5658                    Sched<[sched.Folded, ReadAfterLd]>;
5659   }
5660
5661   // Patterns for compare with 0 that just use the same source twice.
5662   def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5663             (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5664                                       _.RC:$src, _.RC:$src))>;
5665
5666   def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5667             (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5668                                       _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5669 }
5670
5671 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5672                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5673   let ExeDomain = _.ExeDomain in
5674   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5675                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5676                     "${src2}"##_.BroadcastStr##", $src1",
5677                     "$src1, ${src2}"##_.BroadcastStr,
5678                     (OpNode (and _.RC:$src1,
5679                                        (X86VBroadcast
5680                                         (_.ScalarLdFrag addr:$src2))),
5681                             _.ImmAllZerosV)>,
5682                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5683                     Sched<[sched.Folded, ReadAfterLd]>;
5684 }
5685
5686 // Use 512bit version to implement 128/256 bit in case NoVLX.
5687 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5688                                   X86VectorVTInfo _, string Name> {
5689   def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5690                            _.ImmAllZerosV)),
5691             (_.KVT (COPY_TO_REGCLASS
5692                      (!cast<Instruction>(Name # "Zrr")
5693                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5694                                       _.RC:$src1, _.SubRegIdx),
5695                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5696                                       _.RC:$src2, _.SubRegIdx)),
5697                    _.KRC))>;
5698
5699   def : Pat<(_.KVT (and _.KRC:$mask,
5700                         (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5701                                 _.ImmAllZerosV))),
5702             (COPY_TO_REGCLASS
5703              (!cast<Instruction>(Name # "Zrrk")
5704               (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5705               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5706                              _.RC:$src1, _.SubRegIdx),
5707               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5708                              _.RC:$src2, _.SubRegIdx)),
5709              _.KRC)>;
5710
5711   def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5712             (_.KVT (COPY_TO_REGCLASS
5713                      (!cast<Instruction>(Name # "Zrr")
5714                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5715                                       _.RC:$src, _.SubRegIdx),
5716                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5717                                       _.RC:$src, _.SubRegIdx)),
5718                    _.KRC))>;
5719
5720   def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5721             (COPY_TO_REGCLASS
5722              (!cast<Instruction>(Name # "Zrrk")
5723               (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5724               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5725                              _.RC:$src, _.SubRegIdx),
5726               (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5727                              _.RC:$src, _.SubRegIdx)),
5728              _.KRC)>;
5729 }
5730
5731 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5732                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5733   let Predicates  = [HasAVX512] in
5734   defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5735            avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5736
5737   let Predicates = [HasAVX512, HasVLX] in {
5738   defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5739               avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5740   defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5741               avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5742   }
5743   let Predicates = [HasAVX512, NoVLX] in {
5744   defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5745   defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5746   }
5747 }
5748
5749 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5750                             X86SchedWriteWidths sched> {
5751   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5752                                  avx512vl_i32_info>;
5753   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5754                                  avx512vl_i64_info>, VEX_W;
5755 }
5756
5757 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5758                             PatFrag OpNode, X86SchedWriteWidths sched> {
5759   let Predicates = [HasBWI] in {
5760   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5761                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5762   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5763                             v64i8_info, NAME#"B">, EVEX_V512;
5764   }
5765   let Predicates = [HasVLX, HasBWI] in {
5766
5767   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5768                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5769   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5770                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5771   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5772                             v32i8x_info, NAME#"B">, EVEX_V256;
5773   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5774                             v16i8x_info, NAME#"B">, EVEX_V128;
5775   }
5776
5777   let Predicates = [HasAVX512, NoVLX] in {
5778   defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5779   defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5780   defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5781   defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5782   }
5783 }
5784
5785 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5786 // as commutable here because we already canonicalized all zeros vectors to the
5787 // RHS during lowering.
5788 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5789                          (setcc node:$src1, node:$src2, SETEQ)>;
5790 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5791                          (setcc node:$src1, node:$src2, SETNE)>;
5792
5793 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5794                                    PatFrag OpNode, X86SchedWriteWidths sched> :
5795   avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5796   avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5797
5798 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5799                                          SchedWriteVecLogic>, T8PD;
5800 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5801                                          SchedWriteVecLogic>, T8XS;
5802
5803 //===----------------------------------------------------------------------===//
5804 // AVX-512  Shift instructions
5805 //===----------------------------------------------------------------------===//
5806
5807 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5808                             string OpcodeStr, SDNode OpNode,
5809                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5810   let ExeDomain = _.ExeDomain in {
5811   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5812                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5813                       "$src2, $src1", "$src1, $src2",
5814                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5815                    Sched<[sched]>;
5816   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5817                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5818                        "$src2, $src1", "$src1, $src2",
5819                    (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5820                           (i8 imm:$src2)))>,
5821                    Sched<[sched.Folded]>;
5822   }
5823 }
5824
5825 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5826                              string OpcodeStr, SDNode OpNode,
5827                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5828   let ExeDomain = _.ExeDomain in
5829   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5830                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5831       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5832      (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5833      EVEX_B, Sched<[sched.Folded]>;
5834 }
5835
5836 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5837                             X86FoldableSchedWrite sched, ValueType SrcVT,
5838                             PatFrag bc_frag, X86VectorVTInfo _> {
5839    // src2 is always 128-bit
5840   let ExeDomain = _.ExeDomain in {
5841   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5842                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5843                       "$src2, $src1", "$src1, $src2",
5844                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5845                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
5846   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5847                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5848                        "$src2, $src1", "$src1, $src2",
5849                    (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5850                    AVX512BIBase,
5851                    EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5852   }
5853 }
5854
5855 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5856                               X86SchedWriteWidths sched, ValueType SrcVT,
5857                               PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5858                               Predicate prd> {
5859   let Predicates = [prd] in
5860   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5861                                bc_frag, VTInfo.info512>, EVEX_V512,
5862                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5863   let Predicates = [prd, HasVLX] in {
5864   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5865                                bc_frag, VTInfo.info256>, EVEX_V256,
5866                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5867   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5868                                bc_frag, VTInfo.info128>, EVEX_V128,
5869                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5870   }
5871 }
5872
5873 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5874                               string OpcodeStr, SDNode OpNode,
5875                               X86SchedWriteWidths sched,
5876                               bit NotEVEX2VEXConvertibleQ = 0> {
5877   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5878                               bc_v4i32, avx512vl_i32_info, HasAVX512>;
5879   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5880   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5881                               bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5882   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5883                               bc_v2i64, avx512vl_i16_info, HasBWI>;
5884 }
5885
5886 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5887                                   string OpcodeStr, SDNode OpNode,
5888                                   X86SchedWriteWidths sched,
5889                                   AVX512VLVectorVTInfo VTInfo> {
5890   let Predicates = [HasAVX512] in
5891   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5892                               sched.ZMM, VTInfo.info512>,
5893              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5894                                VTInfo.info512>, EVEX_V512;
5895   let Predicates = [HasAVX512, HasVLX] in {
5896   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5897                               sched.YMM, VTInfo.info256>,
5898              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5899                                VTInfo.info256>, EVEX_V256;
5900   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5901                               sched.XMM, VTInfo.info128>,
5902              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5903                                VTInfo.info128>, EVEX_V128;
5904   }
5905 }
5906
5907 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5908                               string OpcodeStr, SDNode OpNode,
5909                               X86SchedWriteWidths sched> {
5910   let Predicates = [HasBWI] in
5911   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5912                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5913   let Predicates = [HasVLX, HasBWI] in {
5914   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5915                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5916   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5917                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5918   }
5919 }
5920
5921 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5922                                Format ImmFormR, Format ImmFormM,
5923                                string OpcodeStr, SDNode OpNode,
5924                                X86SchedWriteWidths sched,
5925                                bit NotEVEX2VEXConvertibleQ = 0> {
5926   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5927                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5928   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5929   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5930                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5931 }
5932
5933 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5934                                  SchedWriteVecShiftImm>,
5935              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5936                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5937
5938 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5939                                  SchedWriteVecShiftImm>,
5940              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5941                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5942
5943 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5944                                  SchedWriteVecShiftImm, 1>,
5945              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5946                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5947
5948 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5949                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5950 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5951                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5952
5953 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5954                                 SchedWriteVecShift>;
5955 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5956                                 SchedWriteVecShift, 1>;
5957 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5958                                 SchedWriteVecShift>;
5959
5960 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5961 let Predicates = [HasAVX512, NoVLX] in {
5962   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5963             (EXTRACT_SUBREG (v8i64
5964               (VPSRAQZrr
5965                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5966                  VR128X:$src2)), sub_ymm)>;
5967
5968   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5969             (EXTRACT_SUBREG (v8i64
5970               (VPSRAQZrr
5971                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5972                  VR128X:$src2)), sub_xmm)>;
5973
5974   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5975             (EXTRACT_SUBREG (v8i64
5976               (VPSRAQZri
5977                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5978                  imm:$src2)), sub_ymm)>;
5979
5980   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5981             (EXTRACT_SUBREG (v8i64
5982               (VPSRAQZri
5983                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5984                  imm:$src2)), sub_xmm)>;
5985 }
5986
5987 //===-------------------------------------------------------------------===//
5988 // Variable Bit Shifts
5989 //===-------------------------------------------------------------------===//
5990
5991 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5992                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5993   let ExeDomain = _.ExeDomain in {
5994   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5995                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5996                       "$src2, $src1", "$src1, $src2",
5997                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5998                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
5999   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6000                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6001                        "$src2, $src1", "$src1, $src2",
6002                    (_.VT (OpNode _.RC:$src1,
6003                    (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
6004                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6005                    Sched<[sched.Folded, ReadAfterLd]>;
6006   }
6007 }
6008
6009 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6010                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6011   let ExeDomain = _.ExeDomain in
6012   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6013                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6014                     "${src2}"##_.BroadcastStr##", $src1",
6015                     "$src1, ${src2}"##_.BroadcastStr,
6016                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6017                                                 (_.ScalarLdFrag addr:$src2)))))>,
6018                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6019                     Sched<[sched.Folded, ReadAfterLd]>;
6020 }
6021
6022 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6023                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6024   let Predicates  = [HasAVX512] in
6025   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6026            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6027
6028   let Predicates = [HasAVX512, HasVLX] in {
6029   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6030               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6031   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6032               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6033   }
6034 }
6035
6036 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6037                                   SDNode OpNode, X86SchedWriteWidths sched> {
6038   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6039                                  avx512vl_i32_info>;
6040   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6041                                  avx512vl_i64_info>, VEX_W;
6042 }
6043
6044 // Use 512bit version to implement 128/256 bit in case NoVLX.
6045 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6046                                      SDNode OpNode, list<Predicate> p> {
6047   let Predicates = p in {
6048   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6049                                   (_.info256.VT _.info256.RC:$src2))),
6050             (EXTRACT_SUBREG
6051                 (!cast<Instruction>(OpcodeStr#"Zrr")
6052                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6053                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6054              sub_ymm)>;
6055
6056   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6057                                   (_.info128.VT _.info128.RC:$src2))),
6058             (EXTRACT_SUBREG
6059                 (!cast<Instruction>(OpcodeStr#"Zrr")
6060                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6061                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6062              sub_xmm)>;
6063   }
6064 }
6065 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6066                               SDNode OpNode, X86SchedWriteWidths sched> {
6067   let Predicates = [HasBWI] in
6068   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6069               EVEX_V512, VEX_W;
6070   let Predicates = [HasVLX, HasBWI] in {
6071
6072   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6073               EVEX_V256, VEX_W;
6074   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6075               EVEX_V128, VEX_W;
6076   }
6077 }
6078
6079 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6080               avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6081
6082 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6083               avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6084
6085 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6086               avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6087
6088 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6089 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6090
6091 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6092 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6093 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6094 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6095
6096 // Special handing for handling VPSRAV intrinsics.
6097 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6098                                          list<Predicate> p> {
6099   let Predicates = p in {
6100     def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6101               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6102                _.RC:$src2)>;
6103     def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6104               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6105                _.RC:$src1, addr:$src2)>;
6106     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6107                      (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6108               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6109                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6110     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6111                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6112                      _.RC:$src0)),
6113               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6114                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6115     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6116                      (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6117               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6118                _.RC:$src1, _.RC:$src2)>;
6119     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6120                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6121                      _.ImmAllZerosV)),
6122               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6123                _.RC:$src1, addr:$src2)>;
6124   }
6125 }
6126
6127 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6128                                          list<Predicate> p> :
6129            avx512_var_shift_int_lowering<InstrStr, _, p> {
6130   let Predicates = p in {
6131     def : Pat<(_.VT (X86vsrav _.RC:$src1,
6132                      (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6133               (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6134                _.RC:$src1, addr:$src2)>;
6135     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6136                      (X86vsrav _.RC:$src1,
6137                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6138                      _.RC:$src0)),
6139               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6140                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6141     def : Pat<(_.VT (vselect _.KRCWM:$mask,
6142                      (X86vsrav _.RC:$src1,
6143                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6144                      _.ImmAllZerosV)),
6145               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6146                _.RC:$src1, addr:$src2)>;
6147   }
6148 }
6149
6150 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6151 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6152 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6153 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6154 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6155 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6156 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6157 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6158 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6159
6160 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6161 let Predicates = [HasAVX512, NoVLX] in {
6162   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6163             (EXTRACT_SUBREG (v8i64
6164               (VPROLVQZrr
6165                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6166                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6167                        sub_xmm)>;
6168   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6169             (EXTRACT_SUBREG (v8i64
6170               (VPROLVQZrr
6171                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6172                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6173                        sub_ymm)>;
6174
6175   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6176             (EXTRACT_SUBREG (v16i32
6177               (VPROLVDZrr
6178                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6179                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6180                         sub_xmm)>;
6181   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6182             (EXTRACT_SUBREG (v16i32
6183               (VPROLVDZrr
6184                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6185                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6186                         sub_ymm)>;
6187
6188   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6189             (EXTRACT_SUBREG (v8i64
6190               (VPROLQZri
6191                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6192                         imm:$src2)), sub_xmm)>;
6193   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6194             (EXTRACT_SUBREG (v8i64
6195               (VPROLQZri
6196                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6197                        imm:$src2)), sub_ymm)>;
6198
6199   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6200             (EXTRACT_SUBREG (v16i32
6201               (VPROLDZri
6202                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6203                         imm:$src2)), sub_xmm)>;
6204   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6205             (EXTRACT_SUBREG (v16i32
6206               (VPROLDZri
6207                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6208                         imm:$src2)), sub_ymm)>;
6209 }
6210
6211 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6212 let Predicates = [HasAVX512, NoVLX] in {
6213   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6214             (EXTRACT_SUBREG (v8i64
6215               (VPRORVQZrr
6216                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6217                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6218                        sub_xmm)>;
6219   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6220             (EXTRACT_SUBREG (v8i64
6221               (VPRORVQZrr
6222                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6223                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6224                        sub_ymm)>;
6225
6226   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6227             (EXTRACT_SUBREG (v16i32
6228               (VPRORVDZrr
6229                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6230                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6231                         sub_xmm)>;
6232   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6233             (EXTRACT_SUBREG (v16i32
6234               (VPRORVDZrr
6235                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6236                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6237                         sub_ymm)>;
6238
6239   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6240             (EXTRACT_SUBREG (v8i64
6241               (VPRORQZri
6242                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6243                         imm:$src2)), sub_xmm)>;
6244   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6245             (EXTRACT_SUBREG (v8i64
6246               (VPRORQZri
6247                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6248                        imm:$src2)), sub_ymm)>;
6249
6250   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6251             (EXTRACT_SUBREG (v16i32
6252               (VPRORDZri
6253                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6254                         imm:$src2)), sub_xmm)>;
6255   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6256             (EXTRACT_SUBREG (v16i32
6257               (VPRORDZri
6258                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6259                         imm:$src2)), sub_ymm)>;
6260 }
6261
6262 //===-------------------------------------------------------------------===//
6263 // 1-src variable permutation VPERMW/D/Q
6264 //===-------------------------------------------------------------------===//
6265
6266 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6267                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6268   let Predicates  = [HasAVX512] in
6269   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6270            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6271
6272   let Predicates = [HasAVX512, HasVLX] in
6273   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6274               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6275 }
6276
6277 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6278                                  string OpcodeStr, SDNode OpNode,
6279                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6280   let Predicates = [HasAVX512] in
6281   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6282                               sched, VTInfo.info512>,
6283              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6284                                sched, VTInfo.info512>, EVEX_V512;
6285   let Predicates = [HasAVX512, HasVLX] in
6286   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6287                               sched, VTInfo.info256>,
6288              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6289                                sched, VTInfo.info256>, EVEX_V256;
6290 }
6291
6292 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6293                               Predicate prd, SDNode OpNode,
6294                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6295   let Predicates = [prd] in
6296   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6297               EVEX_V512 ;
6298   let Predicates = [HasVLX, prd] in {
6299   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6300               EVEX_V256 ;
6301   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6302               EVEX_V128 ;
6303   }
6304 }
6305
6306 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6307                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6308 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6309                                WriteVarShuffle256, avx512vl_i8_info>;
6310
6311 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6312                                     WriteVarShuffle256, avx512vl_i32_info>;
6313 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6314                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6315 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6316                                      WriteFVarShuffle256, avx512vl_f32_info>;
6317 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6318                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6319
6320 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6321                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6322                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6323 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6324                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6325                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6326
6327 //===----------------------------------------------------------------------===//
6328 // AVX-512 - VPERMIL
6329 //===----------------------------------------------------------------------===//
6330
6331 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6332                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6333                              X86VectorVTInfo Ctrl> {
6334   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6335                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6336                   "$src2, $src1", "$src1, $src2",
6337                   (_.VT (OpNode _.RC:$src1,
6338                                (Ctrl.VT Ctrl.RC:$src2)))>,
6339                   T8PD, EVEX_4V, Sched<[sched]>;
6340   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6341                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6342                   "$src2, $src1", "$src1, $src2",
6343                   (_.VT (OpNode
6344                            _.RC:$src1,
6345                            (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6346                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6347                   Sched<[sched.Folded, ReadAfterLd]>;
6348   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6349                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6350                    "${src2}"##_.BroadcastStr##", $src1",
6351                    "$src1, ${src2}"##_.BroadcastStr,
6352                    (_.VT (OpNode
6353                             _.RC:$src1,
6354                             (Ctrl.VT (X86VBroadcast
6355                                        (Ctrl.ScalarLdFrag addr:$src2)))))>,
6356                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6357                    Sched<[sched.Folded, ReadAfterLd]>;
6358 }
6359
6360 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6361                                     X86SchedWriteWidths sched,
6362                                     AVX512VLVectorVTInfo _,
6363                                     AVX512VLVectorVTInfo Ctrl> {
6364   let Predicates = [HasAVX512] in {
6365     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6366                                   _.info512, Ctrl.info512>, EVEX_V512;
6367   }
6368   let Predicates = [HasAVX512, HasVLX] in {
6369     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6370                                   _.info128, Ctrl.info128>, EVEX_V128;
6371     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6372                                   _.info256, Ctrl.info256>, EVEX_V256;
6373   }
6374 }
6375
6376 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6377                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6378   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6379                                       _, Ctrl>;
6380   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6381                                     X86VPermilpi, SchedWriteFShuffle, _>,
6382                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6383 }
6384
6385 let ExeDomain = SSEPackedSingle in
6386 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6387                                avx512vl_i32_info>;
6388 let ExeDomain = SSEPackedDouble in
6389 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6390                                avx512vl_i64_info>, VEX_W1X;
6391
6392 //===----------------------------------------------------------------------===//
6393 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6394 //===----------------------------------------------------------------------===//
6395
6396 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6397                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6398                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6399 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6400                                   X86PShufhw, SchedWriteShuffle>,
6401                                   EVEX, AVX512XSIi8Base;
6402 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6403                                   X86PShuflw, SchedWriteShuffle>,
6404                                   EVEX, AVX512XDIi8Base;
6405
6406 //===----------------------------------------------------------------------===//
6407 // AVX-512 - VPSHUFB
6408 //===----------------------------------------------------------------------===//
6409
6410 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6411                                X86SchedWriteWidths sched> {
6412   let Predicates = [HasBWI] in
6413   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6414                               EVEX_V512;
6415
6416   let Predicates = [HasVLX, HasBWI] in {
6417   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6418                               EVEX_V256;
6419   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6420                               EVEX_V128;
6421   }
6422 }
6423
6424 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6425                                   SchedWriteVarShuffle>, VEX_WIG;
6426
6427 //===----------------------------------------------------------------------===//
6428 // Move Low to High and High to Low packed FP Instructions
6429 //===----------------------------------------------------------------------===//
6430
6431 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6432           (ins VR128X:$src1, VR128X:$src2),
6433           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6434           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6435           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6436 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6437           (ins VR128X:$src1, VR128X:$src2),
6438           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6439           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6440           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6441
6442 //===----------------------------------------------------------------------===//
6443 // VMOVHPS/PD VMOVLPS Instructions
6444 // All patterns was taken from SSS implementation.
6445 //===----------------------------------------------------------------------===//
6446
6447 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
6448                                   X86VectorVTInfo _> {
6449   let ExeDomain = _.ExeDomain in
6450   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6451                   (ins _.RC:$src1, f64mem:$src2),
6452                   !strconcat(OpcodeStr,
6453                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6454                   [(set _.RC:$dst,
6455                      (OpNode _.RC:$src1,
6456                        (_.VT (bitconvert
6457                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6458                   Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
6459 }
6460
6461 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
6462                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6463 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6464                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6465 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
6466                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6467 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
6468                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6469
6470 let Predicates = [HasAVX512] in {
6471   // VMOVHPS patterns
6472   def : Pat<(X86Movlhps VR128X:$src1,
6473                (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
6474           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6475   def : Pat<(X86Movlhps VR128X:$src1,
6476                (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
6477           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
6478   // VMOVHPD patterns
6479   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6480                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6481            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6482   // VMOVLPS patterns
6483   def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6484           (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
6485   // VMOVLPD patterns
6486   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6487           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6488   def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6489                            (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6490           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6491 }
6492
6493 let SchedRW = [WriteFStore] in {
6494 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6495                        (ins f64mem:$dst, VR128X:$src),
6496                        "vmovhps\t{$src, $dst|$dst, $src}",
6497                        [(store (f64 (extractelt
6498                                      (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6499                                                 (bc_v2f64 (v4f32 VR128X:$src))),
6500                                      (iPTR 0))), addr:$dst)]>,
6501                        EVEX, EVEX_CD8<32, CD8VT2>;
6502 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6503                        (ins f64mem:$dst, VR128X:$src),
6504                        "vmovhpd\t{$src, $dst|$dst, $src}",
6505                        [(store (f64 (extractelt
6506                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6507                                      (iPTR 0))), addr:$dst)]>,
6508                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6509 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6510                        (ins f64mem:$dst, VR128X:$src),
6511                        "vmovlps\t{$src, $dst|$dst, $src}",
6512                        [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6513                                      (iPTR 0))), addr:$dst)]>,
6514                        EVEX, EVEX_CD8<32, CD8VT2>;
6515 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6516                        (ins f64mem:$dst, VR128X:$src),
6517                        "vmovlpd\t{$src, $dst|$dst, $src}",
6518                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6519                                      (iPTR 0))), addr:$dst)]>,
6520                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6521 } // SchedRW
6522
6523 let Predicates = [HasAVX512] in {
6524   // VMOVHPD patterns
6525   def : Pat<(store (f64 (extractelt
6526                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6527                            (iPTR 0))), addr:$dst),
6528            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6529   // VMOVLPS patterns
6530   def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6531                    addr:$src1),
6532             (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
6533   // VMOVLPD patterns
6534   def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6535                    addr:$src1),
6536             (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
6537 }
6538 //===----------------------------------------------------------------------===//
6539 // FMA - Fused Multiply Operations
6540 //
6541
6542 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6543                                X86FoldableSchedWrite sched,
6544                                X86VectorVTInfo _, string Suff> {
6545   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6546   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6547           (ins _.RC:$src2, _.RC:$src3),
6548           OpcodeStr, "$src3, $src2", "$src2, $src3",
6549           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6550           AVX512FMA3Base, Sched<[sched]>;
6551
6552   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6553           (ins _.RC:$src2, _.MemOp:$src3),
6554           OpcodeStr, "$src3, $src2", "$src2, $src3",
6555           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6556           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6557
6558   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6559             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6560             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6561             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6562             (OpNode _.RC:$src2,
6563              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6564              AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6565   }
6566 }
6567
6568 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6569                                  X86FoldableSchedWrite sched,
6570                                  X86VectorVTInfo _, string Suff> {
6571   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6572   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6573           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6574           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6575           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6576           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6577 }
6578
6579 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6580                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6581                                    AVX512VLVectorVTInfo _, string Suff> {
6582   let Predicates = [HasAVX512] in {
6583     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6584                                       _.info512, Suff>,
6585                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6586                                         _.info512, Suff>,
6587                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6588   }
6589   let Predicates = [HasVLX, HasAVX512] in {
6590     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6591                                     _.info256, Suff>,
6592                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6593     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6594                                     _.info128, Suff>,
6595                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6596   }
6597 }
6598
6599 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6600                               SDNode OpNodeRnd> {
6601     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6602                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6603     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6604                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6605                                       VEX_W;
6606 }
6607
6608 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6609 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6610 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6611 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6612 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6613 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6614
6615
6616 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6617                                X86FoldableSchedWrite sched,
6618                                X86VectorVTInfo _, string Suff> {
6619   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6620   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6621           (ins _.RC:$src2, _.RC:$src3),
6622           OpcodeStr, "$src3, $src2", "$src2, $src3",
6623           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6624           vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6625
6626   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6627           (ins _.RC:$src2, _.MemOp:$src3),
6628           OpcodeStr, "$src3, $src2", "$src2, $src3",
6629           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6630           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6631
6632   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6633          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6634          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6635          "$src2, ${src3}"##_.BroadcastStr,
6636          (_.VT (OpNode _.RC:$src2,
6637                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6638                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6639          Sched<[sched.Folded, ReadAfterLd]>;
6640   }
6641 }
6642
6643 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6644                                  X86FoldableSchedWrite sched,
6645                                  X86VectorVTInfo _, string Suff> {
6646   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6647   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6648           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6649           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6650           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6651           1, 1, vselect, 1>,
6652           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6653 }
6654
6655 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6656                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6657                                    AVX512VLVectorVTInfo _, string Suff> {
6658   let Predicates = [HasAVX512] in {
6659     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6660                                       _.info512, Suff>,
6661                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6662                                         _.info512, Suff>,
6663                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6664   }
6665   let Predicates = [HasVLX, HasAVX512] in {
6666     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6667                                     _.info256, Suff>,
6668                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6669     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6670                                     _.info128, Suff>,
6671                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6672   }
6673 }
6674
6675 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6676                               SDNode OpNodeRnd > {
6677     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6678                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6679     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6680                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6681                                       VEX_W;
6682 }
6683
6684 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6685 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6686 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6687 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6688 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6689 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6690
6691 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6692                                X86FoldableSchedWrite sched,
6693                                X86VectorVTInfo _, string Suff> {
6694   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6695   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6696           (ins _.RC:$src2, _.RC:$src3),
6697           OpcodeStr, "$src3, $src2", "$src2, $src3",
6698           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6699           AVX512FMA3Base, Sched<[sched]>;
6700
6701   // Pattern is 312 order so that the load is in a different place from the
6702   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6703   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6704           (ins _.RC:$src2, _.MemOp:$src3),
6705           OpcodeStr, "$src3, $src2", "$src2, $src3",
6706           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6707           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6708
6709   // Pattern is 312 order so that the load is in a different place from the
6710   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6711   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6712          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6713          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6714          "$src2, ${src3}"##_.BroadcastStr,
6715          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6716                        _.RC:$src1, _.RC:$src2)), 1, 0>,
6717          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6718   }
6719 }
6720
6721 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6722                                  X86FoldableSchedWrite sched,
6723                                  X86VectorVTInfo _, string Suff> {
6724   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6725   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6726           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6727           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6728           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6729           1, 1, vselect, 1>,
6730           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6731 }
6732
6733 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6734                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6735                                    AVX512VLVectorVTInfo _, string Suff> {
6736   let Predicates = [HasAVX512] in {
6737     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6738                                       _.info512, Suff>,
6739                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6740                                         _.info512, Suff>,
6741                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6742   }
6743   let Predicates = [HasVLX, HasAVX512] in {
6744     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6745                                     _.info256, Suff>,
6746                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6747     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6748                                     _.info128, Suff>,
6749                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6750   }
6751 }
6752
6753 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6754                               SDNode OpNodeRnd > {
6755     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6756                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6757     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6758                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6759                                       VEX_W;
6760 }
6761
6762 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6763 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6764 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6765 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6766 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6767 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6768
6769 // Scalar FMA
6770 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6771                                dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6772                                dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
6773 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6774   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6775           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6776           "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>,
6777           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6778
6779   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6780           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6781           "$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>,
6782           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6783
6784   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6785          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6786          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
6787          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6788
6789   let isCodeGenOnly = 1, isCommutable = 1 in {
6790     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6791                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6792                      !strconcat(OpcodeStr,
6793                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6794                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6795     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6796                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6797                     !strconcat(OpcodeStr,
6798                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6799                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6800   }// isCodeGenOnly = 1
6801 }// Constraints = "$src1 = $dst"
6802 }
6803
6804 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6805                             string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6806                             SDNode OpNodeRnds1, SDNode OpNodes3,
6807                             SDNode OpNodeRnds3, X86VectorVTInfo _,
6808                             string SUFF> {
6809   let ExeDomain = _.ExeDomain in {
6810   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6811                 // Operands for intrinsic are in 123 order to preserve passthu
6812                 // semantics.
6813                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6814                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6815                          _.ScalarIntMemCPat:$src3)),
6816                 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6817                          (i32 imm:$rc))),
6818                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6819                          _.FRC:$src3))),
6820                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6821                          (_.ScalarLdFrag addr:$src3)))), 0>;
6822
6823   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6824                 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6825                 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6826                               _.RC:$src1)),
6827                 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6828                                   (i32 imm:$rc))),
6829                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6830                                           _.FRC:$src1))),
6831                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6832                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
6833
6834   // One pattern is 312 order so that the load is in a different place from the
6835   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6836   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6837                 (null_frag),
6838                 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6839                               _.RC:$src2)),
6840                 (null_frag),
6841                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6842                          _.FRC:$src2))),
6843                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6844                                  _.FRC:$src1, _.FRC:$src2))), 1>;
6845   }
6846 }
6847
6848 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6849                         string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6850                         SDNode OpNodeRnds1, SDNode OpNodes3,
6851                         SDNode OpNodeRnds3> {
6852   let Predicates = [HasAVX512] in {
6853     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6854                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6855                                  f32x_info, "SS">,
6856                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6857     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6858                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6859                                  f64x_info, "SD">,
6860                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6861   }
6862 }
6863
6864 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6865                             X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6866 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6867                             X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6868 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6869                             X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6870 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6871                             X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
6872
6873 multiclass avx512_scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
6874                                       SDNode Move, X86VectorVTInfo _,
6875                                       PatLeaf ZeroFP> {
6876   let Predicates = [HasAVX512] in {
6877     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6878                 (Op _.FRC:$src2,
6879                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6880                     _.FRC:$src3))))),
6881               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6882                VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6883                (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6884
6885     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6886                (X86selects VK1WM:$mask,
6887                 (Op _.FRC:$src2,
6888                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6889                     _.FRC:$src3),
6890                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6891               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6892                VR128X:$src1, VK1WM:$mask,
6893                (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6894                (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6895
6896     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6897                (X86selects VK1WM:$mask,
6898                 (Op _.FRC:$src2, _.FRC:$src3,
6899                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6900                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6901               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6902                VR128X:$src1, VK1WM:$mask,
6903                (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6904                (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6905
6906     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6907                (X86selects VK1WM:$mask,
6908                 (Op _.FRC:$src2,
6909                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6910                     _.FRC:$src3),
6911                 (_.EltVT ZeroFP)))))),
6912               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6913                VR128X:$src1, VK1WM:$mask,
6914                (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
6915                (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
6916   }
6917 }
6918
6919 defm : avx512_scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss,
6920                                   v4f32x_info, fp32imm0>;
6921 defm : avx512_scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss,
6922                                   v4f32x_info, fp32imm0>;
6923 defm : avx512_scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss,
6924                                   v4f32x_info, fp32imm0>;
6925 defm : avx512_scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss,
6926                                   v4f32x_info, fp32imm0>;
6927
6928 defm : avx512_scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd,
6929                                   v2f64x_info, fp64imm0>;
6930 defm : avx512_scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd,
6931                                   v2f64x_info, fp64imm0>;
6932 defm : avx512_scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd,
6933                                   v2f64x_info, fp64imm0>;
6934 defm : avx512_scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd,
6935                                   v2f64x_info, fp64imm0>;
6936
6937 //===----------------------------------------------------------------------===//
6938 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6939 //===----------------------------------------------------------------------===//
6940 let Constraints = "$src1 = $dst" in {
6941 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6942                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6943   // NOTE: The SDNode have the multiply operands first with the add last.
6944   // This enables commuted load patterns to be autogenerated by tablegen.
6945   let ExeDomain = _.ExeDomain in {
6946   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6947           (ins _.RC:$src2, _.RC:$src3),
6948           OpcodeStr, "$src3, $src2", "$src2, $src3",
6949           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6950          AVX512FMA3Base, Sched<[sched]>;
6951
6952   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6953           (ins _.RC:$src2, _.MemOp:$src3),
6954           OpcodeStr, "$src3, $src2", "$src2, $src3",
6955           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6956           AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6957
6958   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6959             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6960             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6961             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6962             (OpNode _.RC:$src2,
6963                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6964                     _.RC:$src1)>,
6965             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6966   }
6967 }
6968 } // Constraints = "$src1 = $dst"
6969
6970 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6971                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6972   let Predicates = [HasIFMA] in {
6973     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6974                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6975   }
6976   let Predicates = [HasVLX, HasIFMA] in {
6977     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6978                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6979     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6980                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6981   }
6982 }
6983
6984 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6985                                          SchedWriteVecIMul, avx512vl_i64_info>,
6986                                          VEX_W;
6987 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6988                                          SchedWriteVecIMul, avx512vl_i64_info>,
6989                                          VEX_W;
6990
6991 //===----------------------------------------------------------------------===//
6992 // AVX-512  Scalar convert from sign integer to float/double
6993 //===----------------------------------------------------------------------===//
6994
6995 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
6996                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6997                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6998   let hasSideEffects = 0 in {
6999     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7000               (ins DstVT.FRC:$src1, SrcRC:$src),
7001               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7002               EVEX_4V, Sched<[sched]>;
7003     let mayLoad = 1 in
7004       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7005               (ins DstVT.FRC:$src1, x86memop:$src),
7006               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7007               EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7008   } // hasSideEffects = 0
7009   let isCodeGenOnly = 1 in {
7010     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7011                   (ins DstVT.RC:$src1, SrcRC:$src2),
7012                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7013                   [(set DstVT.RC:$dst,
7014                         (OpNode (DstVT.VT DstVT.RC:$src1),
7015                                  SrcRC:$src2,
7016                                  (i32 FROUND_CURRENT)))]>,
7017                  EVEX_4V, Sched<[sched]>;
7018
7019     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7020                   (ins DstVT.RC:$src1, x86memop:$src2),
7021                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7022                   [(set DstVT.RC:$dst,
7023                         (OpNode (DstVT.VT DstVT.RC:$src1),
7024                                  (ld_frag addr:$src2),
7025                                  (i32 FROUND_CURRENT)))]>,
7026                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7027   }//isCodeGenOnly = 1
7028 }
7029
7030 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7031                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7032                                X86VectorVTInfo DstVT, string asm> {
7033   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7034               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7035               !strconcat(asm,
7036                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7037               [(set DstVT.RC:$dst,
7038                     (OpNode (DstVT.VT DstVT.RC:$src1),
7039                              SrcRC:$src2,
7040                              (i32 imm:$rc)))]>,
7041               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7042 }
7043
7044 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7045                                 X86FoldableSchedWrite sched,
7046                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7047                                 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7048   defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7049               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7050                             ld_frag, asm>, VEX_LIG;
7051 }
7052
7053 let Predicates = [HasAVX512] in {
7054 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7055                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7056                                  XS, EVEX_CD8<32, CD8VT1>;
7057 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7058                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7059                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7060 defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7061                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7062                                  XD, EVEX_CD8<32, CD8VT1>;
7063 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7064                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7065                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7066
7067 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7068               (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7069 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7070               (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7071
7072 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7073           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7074 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7075           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7076 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7077           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7078 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7079           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7080
7081 def : Pat<(f32 (sint_to_fp GR32:$src)),
7082           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7083 def : Pat<(f32 (sint_to_fp GR64:$src)),
7084           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7085 def : Pat<(f64 (sint_to_fp GR32:$src)),
7086           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7087 def : Pat<(f64 (sint_to_fp GR64:$src)),
7088           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7089
7090 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7091                                   v4f32x_info, i32mem, loadi32,
7092                                   "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7093 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7094                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7095                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7096 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7097                                   i32mem, loadi32, "cvtusi2sd{l}">,
7098                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7099 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7100                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7101                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7102
7103 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7104               (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7105 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7106               (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7107
7108 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7109           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7110 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7111           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7112 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7113           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7114 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7115           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7116
7117 def : Pat<(f32 (uint_to_fp GR32:$src)),
7118           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7119 def : Pat<(f32 (uint_to_fp GR64:$src)),
7120           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7121 def : Pat<(f64 (uint_to_fp GR32:$src)),
7122           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7123 def : Pat<(f64 (uint_to_fp GR64:$src)),
7124           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7125 }
7126
7127 //===----------------------------------------------------------------------===//
7128 // AVX-512  Scalar convert from float/double to integer
7129 //===----------------------------------------------------------------------===//
7130
7131 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7132                                   X86VectorVTInfo DstVT, SDNode OpNode,
7133                                   X86FoldableSchedWrite sched, string asm,
7134                                   string aliasStr,
7135                                   bit CodeGenOnly = 1> {
7136   let Predicates = [HasAVX512] in {
7137     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7138                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7139                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
7140                 EVEX, VEX_LIG, Sched<[sched]>;
7141     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7142                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7143                  [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7144                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7145                  Sched<[sched]>;
7146     let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7147     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7148                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7149                 [(set DstVT.RC:$dst, (OpNode
7150                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
7151                       (i32 FROUND_CURRENT)))]>,
7152                 EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7153
7154     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7155             (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7156     def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7157             (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7158   } // Predicates = [HasAVX512]
7159 }
7160
7161 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7162                                           X86VectorVTInfo DstVT, SDNode OpNode,
7163                                           X86FoldableSchedWrite sched, string asm,
7164                                           string aliasStr> :
7165   avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
7166   let Predicates = [HasAVX512] in {
7167     def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7168             (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7169                                             SrcVT.IntScalarMemOp:$src), 0, "att">;
7170   } // Predicates = [HasAVX512]
7171 }
7172
7173 // Convert float/double to signed/unsigned int 32/64
7174 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
7175                                    X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
7176                                    XS, EVEX_CD8<32, CD8VT1>;
7177 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
7178                                    X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
7179                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7180 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
7181                                    X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
7182                                    XS, EVEX_CD8<32, CD8VT1>;
7183 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
7184                                    X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
7185                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7186 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
7187                                    X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
7188                                    XD, EVEX_CD8<64, CD8VT1>;
7189 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
7190                                    X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
7191                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7192 defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
7193                                    X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7194                                    XD, EVEX_CD8<64, CD8VT1>;
7195 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
7196                                    X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7197                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7198
7199 // The SSE version of these instructions are disabled for AVX512.
7200 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
7201 let Predicates = [HasAVX512] in {
7202   def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
7203             (VCVTSS2SIZrr_Int VR128X:$src)>;
7204   def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
7205             (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
7206   def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
7207             (VCVTSS2SI64Zrr_Int VR128X:$src)>;
7208   def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
7209             (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
7210   def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
7211             (VCVTSD2SIZrr_Int VR128X:$src)>;
7212   def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
7213             (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
7214   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
7215             (VCVTSD2SI64Zrr_Int VR128X:$src)>;
7216   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
7217             (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
7218 } // HasAVX512
7219
7220 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7221 // which produce unnecessary vmovs{s,d} instructions
7222 let Predicates = [HasAVX512] in {
7223 def : Pat<(v4f32 (X86Movss
7224                    (v4f32 VR128X:$dst),
7225                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7226           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7227
7228 def : Pat<(v4f32 (X86Movss
7229                    (v4f32 VR128X:$dst),
7230                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7231           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7232
7233 def : Pat<(v4f32 (X86Movss
7234                    (v4f32 VR128X:$dst),
7235                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7236           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7237
7238 def : Pat<(v4f32 (X86Movss
7239                    (v4f32 VR128X:$dst),
7240                    (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7241           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7242
7243 def : Pat<(v2f64 (X86Movsd
7244                    (v2f64 VR128X:$dst),
7245                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7246           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7247
7248 def : Pat<(v2f64 (X86Movsd
7249                    (v2f64 VR128X:$dst),
7250                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7251           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7252
7253 def : Pat<(v2f64 (X86Movsd
7254                    (v2f64 VR128X:$dst),
7255                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7256           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7257
7258 def : Pat<(v2f64 (X86Movsd
7259                    (v2f64 VR128X:$dst),
7260                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7261           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7262
7263 def : Pat<(v4f32 (X86Movss
7264                    (v4f32 VR128X:$dst),
7265                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7266           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7267
7268 def : Pat<(v4f32 (X86Movss
7269                    (v4f32 VR128X:$dst),
7270                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7271           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7272
7273 def : Pat<(v4f32 (X86Movss
7274                    (v4f32 VR128X:$dst),
7275                    (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7276           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7277
7278 def : Pat<(v4f32 (X86Movss
7279                    (v4f32 VR128X:$dst),
7280                    (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7281           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7282
7283 def : Pat<(v2f64 (X86Movsd
7284                    (v2f64 VR128X:$dst),
7285                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7286           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7287
7288 def : Pat<(v2f64 (X86Movsd
7289                    (v2f64 VR128X:$dst),
7290                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7291           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7292
7293 def : Pat<(v2f64 (X86Movsd
7294                    (v2f64 VR128X:$dst),
7295                    (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7296           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7297
7298 def : Pat<(v2f64 (X86Movsd
7299                    (v2f64 VR128X:$dst),
7300                    (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7301           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7302 } // Predicates = [HasAVX512]
7303
7304 // Convert float/double to signed/unsigned int 32/64 with truncation
7305 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7306                             X86VectorVTInfo _DstRC, SDNode OpNode,
7307                             SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7308                             string aliasStr, bit CodeGenOnly = 1>{
7309 let Predicates = [HasAVX512] in {
7310   let isCodeGenOnly = 1 in {
7311   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7312               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7313               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7314               EVEX, Sched<[sched]>;
7315   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7316               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7317               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7318               EVEX, Sched<[sched.Folded, ReadAfterLd]>;
7319   }
7320
7321   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7322             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7323            [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7324                                  (i32 FROUND_CURRENT)))]>,
7325            EVEX, VEX_LIG, Sched<[sched]>;
7326   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7327             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7328             [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7329                                   (i32 FROUND_NO_EXC)))]>,
7330                                   EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7331   let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7332   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7333               (ins _SrcRC.IntScalarMemOp:$src),
7334               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7335               [(set _DstRC.RC:$dst, (OpNodeRnd
7336                                      (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
7337                                      (i32 FROUND_CURRENT)))]>,
7338               EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7339
7340   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7341           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7342   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7343           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7344 } //HasAVX512
7345 }
7346
7347 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7348                                      X86VectorVTInfo _SrcRC,
7349                                      X86VectorVTInfo _DstRC, SDNode OpNode,
7350                                      SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7351                                      string aliasStr> :
7352   avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
7353                    aliasStr, 0> {
7354 let Predicates = [HasAVX512] in {
7355   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7356           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7357                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7358 }
7359 }
7360
7361 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7362                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
7363                         XS, EVEX_CD8<32, CD8VT1>;
7364 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7365                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
7366                         VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7367 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7368                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
7369                         XD, EVEX_CD8<64, CD8VT1>;
7370 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7371                         fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
7372                         VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7373
7374 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7375                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
7376                         XS, EVEX_CD8<32, CD8VT1>;
7377 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7378                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
7379                         XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7380 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7381                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
7382                         XD, EVEX_CD8<64, CD8VT1>;
7383 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7384                         fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
7385                         XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7386
7387 let Predicates = [HasAVX512] in {
7388   def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
7389             (VCVTTSS2SIZrr_Int VR128X:$src)>;
7390   def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
7391             (VCVTTSS2SIZrm_Int ssmem:$src)>;
7392   def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
7393             (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
7394   def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
7395             (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
7396   def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
7397             (VCVTTSD2SIZrr_Int VR128X:$src)>;
7398   def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
7399             (VCVTTSD2SIZrm_Int sdmem:$src)>;
7400   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
7401             (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
7402   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
7403             (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
7404 } // HasAVX512
7405
7406 //===----------------------------------------------------------------------===//
7407 // AVX-512  Convert form float to double and back
7408 //===----------------------------------------------------------------------===//
7409
7410 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7411                                 X86VectorVTInfo _Src, SDNode OpNode,
7412                                 X86FoldableSchedWrite sched> {
7413   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7414                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7415                          "$src2, $src1", "$src1, $src2",
7416                          (_.VT (OpNode (_.VT _.RC:$src1),
7417                                        (_Src.VT _Src.RC:$src2),
7418                                        (i32 FROUND_CURRENT)))>,
7419                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7420   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7421                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7422                          "$src2, $src1", "$src1, $src2",
7423                          (_.VT (OpNode (_.VT _.RC:$src1),
7424                                   (_Src.VT _Src.ScalarIntMemCPat:$src2),
7425                                   (i32 FROUND_CURRENT)))>,
7426                          EVEX_4V, VEX_LIG,
7427                          Sched<[sched.Folded, ReadAfterLd]>;
7428
7429   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7430     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7431                (ins _.FRC:$src1, _Src.FRC:$src2),
7432                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7433                EVEX_4V, VEX_LIG, Sched<[sched]>;
7434     let mayLoad = 1 in
7435     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7436                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7437                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7438                EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7439   }
7440 }
7441
7442 // Scalar Coversion with SAE - suppress all exceptions
7443 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7444                                     X86VectorVTInfo _Src, SDNode OpNodeRnd,
7445                                     X86FoldableSchedWrite sched> {
7446   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7447                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7448                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7449                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7450                                          (_Src.VT _Src.RC:$src2),
7451                                          (i32 FROUND_NO_EXC)))>,
7452                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7453 }
7454
7455 // Scalar Conversion with rounding control (RC)
7456 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7457                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7458                                    X86FoldableSchedWrite sched> {
7459   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7460                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7461                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7462                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7463                                          (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7464                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7465                         EVEX_B, EVEX_RC;
7466 }
7467 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7468                                   SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7469                                   X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7470   let Predicates = [HasAVX512] in {
7471     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7472              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7473                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7474   }
7475 }
7476
7477 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7478                                       X86FoldableSchedWrite sched,
7479                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7480   let Predicates = [HasAVX512] in {
7481     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7482              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7483              EVEX_CD8<32, CD8VT1>, XS;
7484   }
7485 }
7486 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7487                                          X86froundRnd, WriteCvtSD2SS, f64x_info,
7488                                          f32x_info>;
7489 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7490                                           X86fpextRnd, WriteCvtSS2SD, f32x_info,
7491                                           f64x_info>;
7492
7493 def : Pat<(f64 (fpextend FR32X:$src)),
7494           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7495           Requires<[HasAVX512]>;
7496 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7497           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7498           Requires<[HasAVX512, OptForSize]>;
7499
7500 def : Pat<(f64 (extloadf32 addr:$src)),
7501           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7502       Requires<[HasAVX512, OptForSize]>;
7503
7504 def : Pat<(f64 (extloadf32 addr:$src)),
7505           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7506           Requires<[HasAVX512, OptForSpeed]>;
7507
7508 def : Pat<(f32 (fpround FR64X:$src)),
7509           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7510            Requires<[HasAVX512]>;
7511
7512 def : Pat<(v4f32 (X86Movss
7513                    (v4f32 VR128X:$dst),
7514                    (v4f32 (scalar_to_vector
7515                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7516           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7517           Requires<[HasAVX512]>;
7518
7519 def : Pat<(v2f64 (X86Movsd
7520                    (v2f64 VR128X:$dst),
7521                    (v2f64 (scalar_to_vector
7522                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7523           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7524           Requires<[HasAVX512]>;
7525
7526 //===----------------------------------------------------------------------===//
7527 // AVX-512  Vector convert from signed/unsigned integer to float/double
7528 //          and from float/double to signed/unsigned integer
7529 //===----------------------------------------------------------------------===//
7530
7531 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7532                           X86VectorVTInfo _Src, SDNode OpNode,
7533                           X86FoldableSchedWrite sched,
7534                           string Broadcast = _.BroadcastStr,
7535                           string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7536
7537   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7538                          (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7539                          (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7540                          EVEX, Sched<[sched]>;
7541
7542   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7543                          (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7544                          (_.VT (OpNode (_Src.VT
7545                              (bitconvert (_Src.LdFrag addr:$src)))))>,
7546                          EVEX, Sched<[sched.Folded]>;
7547
7548   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7549                          (ins _Src.ScalarMemOp:$src), OpcodeStr,
7550                          "${src}"##Broadcast, "${src}"##Broadcast,
7551                          (_.VT (OpNode (_Src.VT
7552                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7553                             ))>, EVEX, EVEX_B,
7554                          Sched<[sched.Folded]>;
7555 }
7556 // Coversion with SAE - suppress all exceptions
7557 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7558                               X86VectorVTInfo _Src, SDNode OpNodeRnd,
7559                               X86FoldableSchedWrite sched> {
7560   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7561                         (ins _Src.RC:$src), OpcodeStr,
7562                         "{sae}, $src", "$src, {sae}",
7563                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7564                                (i32 FROUND_NO_EXC)))>,
7565                         EVEX, EVEX_B, Sched<[sched]>;
7566 }
7567
7568 // Conversion with rounding control (RC)
7569 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7570                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
7571                          X86FoldableSchedWrite sched> {
7572   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7573                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7574                         "$rc, $src", "$src, $rc",
7575                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7576                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7577 }
7578
7579 // Extend Float to Double
7580 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7581                            X86SchedWriteWidths sched> {
7582   let Predicates = [HasAVX512] in {
7583     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7584                             fpextend, sched.ZMM>,
7585              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7586                                 X86vfpextRnd, sched.ZMM>, EVEX_V512;
7587   }
7588   let Predicates = [HasVLX] in {
7589     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7590                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7591     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7592                                sched.YMM>, EVEX_V256;
7593   }
7594 }
7595
7596 // Truncate Double to Float
7597 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7598   let Predicates = [HasAVX512] in {
7599     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7600              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7601                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7602   }
7603   let Predicates = [HasVLX] in {
7604     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7605                                X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7606     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7607                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7608
7609     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7610                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7611     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7612                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7613     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7614                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7615     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7616                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7617   }
7618 }
7619
7620 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7621                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
7622 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7623                                   PS, EVEX_CD8<32, CD8VH>;
7624
7625 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7626             (VCVTPS2PDZrm addr:$src)>;
7627
7628 let Predicates = [HasVLX] in {
7629   let AddedComplexity = 15 in {
7630     def : Pat<(X86vzmovl (v2f64 (bitconvert
7631                                  (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7632               (VCVTPD2PSZ128rr VR128X:$src)>;
7633     def : Pat<(X86vzmovl (v2f64 (bitconvert
7634                                  (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7635               (VCVTPD2PSZ128rm addr:$src)>;
7636   }
7637   def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7638               (VCVTPS2PDZ128rm addr:$src)>;
7639   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7640               (VCVTPS2PDZ256rm addr:$src)>;
7641 }
7642
7643 // Convert Signed/Unsigned Doubleword to Double
7644 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7645                            SDNode OpNode128, X86SchedWriteWidths sched> {
7646   // No rounding in this op
7647   let Predicates = [HasAVX512] in
7648     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7649                             sched.ZMM>, EVEX_V512;
7650
7651   let Predicates = [HasVLX] in {
7652     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7653                                OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7654     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7655                                sched.YMM>, EVEX_V256;
7656   }
7657 }
7658
7659 // Convert Signed/Unsigned Doubleword to Float
7660 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7661                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7662   let Predicates = [HasAVX512] in
7663     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7664                             sched.ZMM>,
7665              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7666                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7667
7668   let Predicates = [HasVLX] in {
7669     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7670                                sched.XMM>, EVEX_V128;
7671     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7672                                sched.YMM>, EVEX_V256;
7673   }
7674 }
7675
7676 // Convert Float to Signed/Unsigned Doubleword with truncation
7677 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7678                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7679   let Predicates = [HasAVX512] in {
7680     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7681                             sched.ZMM>,
7682              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7683                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7684   }
7685   let Predicates = [HasVLX] in {
7686     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7687                                sched.XMM>, EVEX_V128;
7688     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7689                                sched.YMM>, EVEX_V256;
7690   }
7691 }
7692
7693 // Convert Float to Signed/Unsigned Doubleword
7694 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7695                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7696   let Predicates = [HasAVX512] in {
7697     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7698                             sched.ZMM>,
7699              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7700                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7701   }
7702   let Predicates = [HasVLX] in {
7703     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7704                                sched.XMM>, EVEX_V128;
7705     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7706                                sched.YMM>, EVEX_V256;
7707   }
7708 }
7709
7710 // Convert Double to Signed/Unsigned Doubleword with truncation
7711 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7712                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7713   let Predicates = [HasAVX512] in {
7714     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7715                             sched.ZMM>,
7716              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7717                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7718   }
7719   let Predicates = [HasVLX] in {
7720     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7721     // memory forms of these instructions in Asm Parser. They have the same
7722     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7723     // due to the same reason.
7724     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7725                                OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7726     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7727                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7728
7729     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7730                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7731     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7732                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7733     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7734                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7735     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7736                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7737   }
7738 }
7739
7740 // Convert Double to Signed/Unsigned Doubleword
7741 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7742                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7743   let Predicates = [HasAVX512] in {
7744     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7745                             sched.ZMM>,
7746              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7747                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7748   }
7749   let Predicates = [HasVLX] in {
7750     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7751     // memory forms of these instructions in Asm Parcer. They have the same
7752     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7753     // due to the same reason.
7754     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7755                                sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7756     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7757                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7758
7759     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7760                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7761     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7762                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7763     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7764                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7765     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7766                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7767   }
7768 }
7769
7770 // Convert Double to Signed/Unsigned Quardword
7771 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7772                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7773   let Predicates = [HasDQI] in {
7774     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7775                             sched.ZMM>,
7776              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7777                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7778   }
7779   let Predicates = [HasDQI, HasVLX] in {
7780     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7781                                sched.XMM>, EVEX_V128;
7782     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7783                                sched.YMM>, EVEX_V256;
7784   }
7785 }
7786
7787 // Convert Double to Signed/Unsigned Quardword with truncation
7788 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7789                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7790   let Predicates = [HasDQI] in {
7791     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7792                             sched.ZMM>,
7793              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7794                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7795   }
7796   let Predicates = [HasDQI, HasVLX] in {
7797     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7798                                sched.XMM>, EVEX_V128;
7799     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7800                                sched.YMM>, EVEX_V256;
7801   }
7802 }
7803
7804 // Convert Signed/Unsigned Quardword to Double
7805 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7806                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7807   let Predicates = [HasDQI] in {
7808     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7809                             sched.ZMM>,
7810              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7811                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7812   }
7813   let Predicates = [HasDQI, HasVLX] in {
7814     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7815                                sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7816     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7817                                sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7818   }
7819 }
7820
7821 // Convert Float to Signed/Unsigned Quardword
7822 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7823                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7824   let Predicates = [HasDQI] in {
7825     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7826                             sched.ZMM>,
7827              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7828                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7829   }
7830   let Predicates = [HasDQI, HasVLX] in {
7831     // Explicitly specified broadcast string, since we take only 2 elements
7832     // from v4f32x_info source
7833     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7834                                sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7835     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7836                                sched.YMM>, EVEX_V256;
7837   }
7838 }
7839
7840 // Convert Float to Signed/Unsigned Quardword with truncation
7841 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7842                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7843   let Predicates = [HasDQI] in {
7844     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7845              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7846                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7847   }
7848   let Predicates = [HasDQI, HasVLX] in {
7849     // Explicitly specified broadcast string, since we take only 2 elements
7850     // from v4f32x_info source
7851     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7852                                sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7853     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7854                                sched.YMM>, EVEX_V256;
7855   }
7856 }
7857
7858 // Convert Signed/Unsigned Quardword to Float
7859 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7860                            SDNode OpNode128, SDNode OpNodeRnd,
7861                            X86SchedWriteWidths sched> {
7862   let Predicates = [HasDQI] in {
7863     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7864                             sched.ZMM>,
7865              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7866                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7867   }
7868   let Predicates = [HasDQI, HasVLX] in {
7869     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7870     // memory forms of these instructions in Asm Parcer. They have the same
7871     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7872     // due to the same reason.
7873     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7874                                sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7875                                NotEVEX2VEXConvertible;
7876     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7877                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7878                                NotEVEX2VEXConvertible;
7879
7880     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7881                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7882     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7883                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7884     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7885                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7886     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7887                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7888   }
7889 }
7890
7891 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7892                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
7893
7894 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7895                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
7896                                 PS, EVEX_CD8<32, CD8VF>;
7897
7898 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
7899                                 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
7900                                 XS, EVEX_CD8<32, CD8VF>;
7901
7902 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
7903                                  X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
7904                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
7905
7906 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
7907                                  X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
7908                                  EVEX_CD8<32, CD8VF>;
7909
7910 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
7911                                  X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
7912                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
7913
7914 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7915                                   X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
7916                                   EVEX_CD8<32, CD8VH>;
7917
7918 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7919                                  X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
7920                                  EVEX_CD8<32, CD8VF>;
7921
7922 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7923                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
7924                                  EVEX_CD8<32, CD8VF>;
7925
7926 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7927                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
7928                                  VEX_W, EVEX_CD8<64, CD8VF>;
7929
7930 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7931                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
7932                                  PS, EVEX_CD8<32, CD8VF>;
7933
7934 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7935                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
7936                                  PS, EVEX_CD8<64, CD8VF>;
7937
7938 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7939                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
7940                                  PD, EVEX_CD8<64, CD8VF>;
7941
7942 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7943                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
7944                                  EVEX_CD8<32, CD8VH>;
7945
7946 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7947                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
7948                                  PD, EVEX_CD8<64, CD8VF>;
7949
7950 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7951                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
7952                                  EVEX_CD8<32, CD8VH>;
7953
7954 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
7955                                  X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
7956                                  PD, EVEX_CD8<64, CD8VF>;
7957
7958 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
7959                                  X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
7960                                  EVEX_CD8<32, CD8VH>;
7961
7962 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
7963                                  X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
7964                                  PD, EVEX_CD8<64, CD8VF>;
7965
7966 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
7967                                  X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
7968                                  EVEX_CD8<32, CD8VH>;
7969
7970 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7971                             X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
7972                             EVEX_CD8<64, CD8VF>;
7973
7974 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7975                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
7976                             EVEX_CD8<64, CD8VF>;
7977
7978 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7979                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
7980                             EVEX_CD8<64, CD8VF>;
7981
7982 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7983                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
7984                             EVEX_CD8<64, CD8VF>;
7985
7986 let Predicates = [HasAVX512] in  {
7987   def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
7988             (VCVTTPS2DQZrr VR512:$src)>;
7989   def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
7990             (VCVTTPS2DQZrm addr:$src)>;
7991
7992   def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
7993             (VCVTTPS2UDQZrr VR512:$src)>;
7994   def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
7995             (VCVTTPS2UDQZrm addr:$src)>;
7996
7997   def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
7998             (VCVTTPD2DQZrr VR512:$src)>;
7999   def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8000             (VCVTTPD2DQZrm addr:$src)>;
8001
8002   def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8003             (VCVTTPD2UDQZrr VR512:$src)>;
8004   def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8005             (VCVTTPD2UDQZrm addr:$src)>;
8006 }
8007
8008 let Predicates = [HasVLX] in {
8009   def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8010             (VCVTTPS2DQZ128rr VR128X:$src)>;
8011   def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8012             (VCVTTPS2DQZ128rm addr:$src)>;
8013
8014   def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8015             (VCVTTPS2UDQZ128rr VR128X:$src)>;
8016   def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8017             (VCVTTPS2UDQZ128rm addr:$src)>;
8018
8019   def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8020             (VCVTTPS2DQZ256rr VR256X:$src)>;
8021   def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8022             (VCVTTPS2DQZ256rm addr:$src)>;
8023
8024   def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8025             (VCVTTPS2UDQZ256rr VR256X:$src)>;
8026   def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8027             (VCVTTPS2UDQZ256rm addr:$src)>;
8028
8029   def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8030             (VCVTTPD2DQZ256rr VR256X:$src)>;
8031   def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8032             (VCVTTPD2DQZ256rm addr:$src)>;
8033
8034   def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8035             (VCVTTPD2UDQZ256rr VR256X:$src)>;
8036   def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8037             (VCVTTPD2UDQZ256rm addr:$src)>;
8038 }
8039
8040 let Predicates = [HasDQI] in {
8041   def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8042             (VCVTTPS2QQZrr VR256X:$src)>;
8043   def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8044             (VCVTTPS2QQZrm addr:$src)>;
8045
8046   def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8047             (VCVTTPS2UQQZrr VR256X:$src)>;
8048   def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8049             (VCVTTPS2UQQZrm addr:$src)>;
8050
8051   def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8052             (VCVTTPD2QQZrr VR512:$src)>;
8053   def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8054             (VCVTTPD2QQZrm addr:$src)>;
8055
8056   def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8057             (VCVTTPD2UQQZrr VR512:$src)>;
8058   def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8059             (VCVTTPD2UQQZrm addr:$src)>;
8060 }
8061
8062 let Predicates = [HasDQI, HasVLX] in {
8063   def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8064             (VCVTTPS2QQZ256rr VR128X:$src)>;
8065   def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8066             (VCVTTPS2QQZ256rm addr:$src)>;
8067
8068   def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8069             (VCVTTPS2UQQZ256rr VR128X:$src)>;
8070   def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8071             (VCVTTPS2UQQZ256rm addr:$src)>;
8072
8073   def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8074             (VCVTTPD2QQZ128rr VR128X:$src)>;
8075   def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8076             (VCVTTPD2QQZ128rm addr:$src)>;
8077
8078   def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8079             (VCVTTPD2UQQZ128rr VR128X:$src)>;
8080   def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8081             (VCVTTPD2UQQZ128rm addr:$src)>;
8082
8083   def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8084             (VCVTTPD2QQZ256rr VR256X:$src)>;
8085   def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8086             (VCVTTPD2QQZ256rm addr:$src)>;
8087
8088   def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8089             (VCVTTPD2UQQZ256rr VR256X:$src)>;
8090   def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8091             (VCVTTPD2UQQZ256rm addr:$src)>;
8092 }
8093
8094 let Predicates = [HasAVX512, NoVLX] in {
8095 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8096           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8097            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8098                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8099
8100 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8101           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8102            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8103                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8104
8105 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8106           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8107            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8108                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8109
8110 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8111           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8112            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8113                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8114
8115 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8116           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8117            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8118                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8119
8120 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8121           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8122            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8123                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8124
8125 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8126           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8127            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8128                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8129 }
8130
8131 let Predicates = [HasAVX512, HasVLX] in {
8132   let AddedComplexity = 15 in {
8133     def : Pat<(X86vzmovl (v2i64 (bitconvert
8134                                 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8135               (VCVTPD2DQZ128rr VR128X:$src)>;
8136     def : Pat<(X86vzmovl (v2i64 (bitconvert
8137                                 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8138               (VCVTPD2DQZ128rm addr:$src)>;
8139     def : Pat<(X86vzmovl (v2i64 (bitconvert
8140                                  (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8141               (VCVTPD2UDQZ128rr VR128X:$src)>;
8142     def : Pat<(X86vzmovl (v2i64 (bitconvert
8143                                 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8144               (VCVTTPD2DQZ128rr VR128X:$src)>;
8145     def : Pat<(X86vzmovl (v2i64 (bitconvert
8146                                 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8147               (VCVTTPD2DQZ128rm addr:$src)>;
8148     def : Pat<(X86vzmovl (v2i64 (bitconvert
8149                                  (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8150               (VCVTTPD2UDQZ128rr VR128X:$src)>;
8151   }
8152
8153   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8154             (VCVTDQ2PDZ128rm addr:$src)>;
8155   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8156             (VCVTDQ2PDZ128rm addr:$src)>;
8157
8158   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8159             (VCVTUDQ2PDZ128rm addr:$src)>;
8160   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8161             (VCVTUDQ2PDZ128rm addr:$src)>;
8162 }
8163
8164 let Predicates = [HasAVX512] in {
8165   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8166             (VCVTPD2PSZrm addr:$src)>;
8167   def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8168             (VCVTPS2PDZrm addr:$src)>;
8169 }
8170
8171 let Predicates = [HasDQI, HasVLX] in {
8172   let AddedComplexity = 15 in {
8173     def : Pat<(X86vzmovl (v2f64 (bitconvert
8174                                 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8175               (VCVTQQ2PSZ128rr VR128X:$src)>;
8176     def : Pat<(X86vzmovl (v2f64 (bitconvert
8177                                 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8178               (VCVTUQQ2PSZ128rr VR128X:$src)>;
8179   }
8180 }
8181
8182 let Predicates = [HasDQI, NoVLX] in {
8183 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8184           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8185            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8186                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8187
8188 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8189           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8190            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8191                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8192
8193 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8194           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8195            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8196                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8197
8198 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8199           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8200            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8201                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8202
8203 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8204           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8205            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8206                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
8207
8208 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8209           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8210            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8211                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8212
8213 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8214           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8215            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8216                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8217
8218 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8219           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8220            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8221                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8222
8223 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8224           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8225            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8226                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8227
8228 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8229           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8230            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8231                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
8232
8233 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8234           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8235            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8236                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
8237
8238 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8239           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8240            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8241                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
8242 }
8243
8244 //===----------------------------------------------------------------------===//
8245 // Half precision conversion instructions
8246 //===----------------------------------------------------------------------===//
8247
8248 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8249                            X86MemOperand x86memop, PatFrag ld_frag,
8250                            X86FoldableSchedWrite sched> {
8251   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8252                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8253                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8254                             T8PD, Sched<[sched]>;
8255   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8256                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8257                             (X86cvtph2ps (_src.VT
8258                                           (bitconvert
8259                                            (ld_frag addr:$src))))>,
8260                             T8PD, Sched<[sched.Folded]>;
8261 }
8262
8263 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8264                                X86FoldableSchedWrite sched> {
8265   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8266                              (ins _src.RC:$src), "vcvtph2ps",
8267                              "{sae}, $src", "$src, {sae}",
8268                              (X86cvtph2psRnd (_src.VT _src.RC:$src),
8269                                              (i32 FROUND_NO_EXC))>,
8270                              T8PD, EVEX_B, Sched<[sched]>;
8271 }
8272
8273 let Predicates = [HasAVX512] in
8274   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8275                                     WriteCvtPH2PSZ>,
8276                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8277                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8278
8279 let Predicates = [HasVLX] in {
8280   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8281                        loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8282                        EVEX_CD8<32, CD8VH>;
8283   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8284                        loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8285                        EVEX_CD8<32, CD8VH>;
8286
8287   // Pattern match vcvtph2ps of a scalar i64 load.
8288   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8289             (VCVTPH2PSZ128rm addr:$src)>;
8290   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8291             (VCVTPH2PSZ128rm addr:$src)>;
8292   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8293               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8294             (VCVTPH2PSZ128rm addr:$src)>;
8295 }
8296
8297 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8298                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8299   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8300                    (ins _src.RC:$src1, i32u8imm:$src2),
8301                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8302                    (X86cvtps2ph (_src.VT _src.RC:$src1),
8303                                 (i32 imm:$src2)), 0, 0>,
8304                    AVX512AIi8Base, Sched<[RR]>;
8305   let hasSideEffects = 0, mayStore = 1 in {
8306     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8307                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8308                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8309                Sched<[MR]>;
8310     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8311                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8312                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8313                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8314   }
8315 }
8316
8317 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8318                                SchedWrite Sched> {
8319   let hasSideEffects = 0 in
8320   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8321                    (outs _dest.RC:$dst),
8322                    (ins _src.RC:$src1, i32u8imm:$src2),
8323                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8324                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8325 }
8326
8327 let Predicates = [HasAVX512] in {
8328   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8329                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8330                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8331                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8332   let Predicates = [HasVLX] in {
8333     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8334                                          WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8335                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8336     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8337                                          WriteCvtPS2PH, WriteCvtPS2PHSt>,
8338                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8339   }
8340
8341   def : Pat<(store (f64 (extractelt
8342                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8343                          (iPTR 0))), addr:$dst),
8344             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8345   def : Pat<(store (i64 (extractelt
8346                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8347                          (iPTR 0))), addr:$dst),
8348             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8349   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8350             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8351   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8352             (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8353 }
8354
8355 // Patterns for matching conversions from float to half-float and vice versa.
8356 let Predicates = [HasVLX] in {
8357   // Use MXCSR.RC for rounding instead of explicitly specifying the default
8358   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8359   // configurations we support (the default). However, falling back to MXCSR is
8360   // more consistent with other instructions, which are always controlled by it.
8361   // It's encoded as 0b100.
8362   def : Pat<(fp_to_f16 FR32X:$src),
8363             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
8364               (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
8365
8366   def : Pat<(f16_to_fp GR16:$src),
8367             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
8368               (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
8369
8370   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8371             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
8372               (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
8373 }
8374
8375 //  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8376 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8377                             string OpcodeStr, X86FoldableSchedWrite sched> {
8378   let hasSideEffects = 0 in
8379   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8380                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8381                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8382 }
8383
8384 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8385   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8386                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8387   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8388                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8389   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8390                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8391   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8392                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8393 }
8394
8395 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8396   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8397                                  "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8398                                  EVEX_CD8<32, CD8VT1>;
8399   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8400                                   "ucomisd", WriteFCom>, PD, EVEX,
8401                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8402   let Pattern = []<dag> in {
8403     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8404                                    "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8405                                    EVEX_CD8<32, CD8VT1>;
8406     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8407                                    "comisd", WriteFCom>, PD, EVEX,
8408                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8409   }
8410   let isCodeGenOnly = 1 in {
8411     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8412                           sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8413                           EVEX_CD8<32, CD8VT1>;
8414     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8415                           sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8416                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8417
8418     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8419                           sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8420                           EVEX_CD8<32, CD8VT1>;
8421     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8422                           sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8423                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8424   }
8425 }
8426
8427 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8428 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8429                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8430   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8431   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8432                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8433                            "$src2, $src1", "$src1, $src2",
8434                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8435                            EVEX_4V, Sched<[sched]>;
8436   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8437                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8438                          "$src2, $src1", "$src1, $src2",
8439                          (OpNode (_.VT _.RC:$src1),
8440                           _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8441                           Sched<[sched.Folded, ReadAfterLd]>;
8442 }
8443 }
8444
8445 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8446                                f32x_info>, EVEX_CD8<32, CD8VT1>,
8447                                T8PD;
8448 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8449                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8450                                T8PD;
8451 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8452                                  SchedWriteFRsqrt.Scl, f32x_info>,
8453                                  EVEX_CD8<32, CD8VT1>, T8PD;
8454 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8455                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8456                                  EVEX_CD8<64, CD8VT1>, T8PD;
8457
8458 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8459 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8460                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8461   let ExeDomain = _.ExeDomain in {
8462   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8463                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8464                          (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD,
8465                          Sched<[sched]>;
8466   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8467                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8468                          (OpNode (_.FloatVT
8469                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8470                          Sched<[sched.Folded, ReadAfterLd]>;
8471   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8472                           (ins _.ScalarMemOp:$src), OpcodeStr,
8473                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8474                           (OpNode (_.FloatVT
8475                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8476                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8477   }
8478 }
8479
8480 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8481                                 X86SchedWriteWidths sched> {
8482   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8483                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8484   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8485                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8486
8487   // Define only if AVX512VL feature is present.
8488   let Predicates = [HasVLX] in {
8489     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8490                                 OpNode, sched.XMM, v4f32x_info>,
8491                                EVEX_V128, EVEX_CD8<32, CD8VF>;
8492     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8493                                 OpNode, sched.YMM, v8f32x_info>,
8494                                EVEX_V256, EVEX_CD8<32, CD8VF>;
8495     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8496                                 OpNode, sched.XMM, v2f64x_info>,
8497                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8498     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8499                                 OpNode, sched.YMM, v4f64x_info>,
8500                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8501   }
8502 }
8503
8504 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8505 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8506
8507 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8508 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8509                          SDNode OpNode, X86FoldableSchedWrite sched> {
8510   let ExeDomain = _.ExeDomain in {
8511   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8512                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8513                            "$src2, $src1", "$src1, $src2",
8514                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8515                            (i32 FROUND_CURRENT))>,
8516                            Sched<[sched]>;
8517
8518   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8519                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8520                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8521                             (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8522                             (i32 FROUND_NO_EXC))>, EVEX_B,
8523                             Sched<[sched]>;
8524
8525   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8526                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8527                          "$src2, $src1", "$src1, $src2",
8528                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8529                          (i32 FROUND_CURRENT))>,
8530                          Sched<[sched.Folded, ReadAfterLd]>;
8531   }
8532 }
8533
8534 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8535                         X86FoldableSchedWrite sched> {
8536   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8537                EVEX_CD8<32, CD8VT1>;
8538   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8539                EVEX_CD8<64, CD8VT1>, VEX_W;
8540 }
8541
8542 let Predicates = [HasERI] in {
8543   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8544                               T8PD, EVEX_4V;
8545   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8546                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8547 }
8548
8549 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8550                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8551 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8552
8553 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8554                          SDNode OpNode, X86FoldableSchedWrite sched> {
8555   let ExeDomain = _.ExeDomain in {
8556   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8557                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8558                          (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8559                          Sched<[sched]>;
8560
8561   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8562                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8563                          (OpNode (_.FloatVT
8564                              (bitconvert (_.LdFrag addr:$src))),
8565                           (i32 FROUND_CURRENT))>,
8566                           Sched<[sched.Folded, ReadAfterLd]>;
8567
8568   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8569                          (ins _.ScalarMemOp:$src), OpcodeStr,
8570                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8571                          (OpNode (_.FloatVT
8572                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8573                                  (i32 FROUND_CURRENT))>, EVEX_B,
8574                          Sched<[sched.Folded, ReadAfterLd]>;
8575   }
8576 }
8577 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8578                          SDNode OpNode, X86FoldableSchedWrite sched> {
8579   let ExeDomain = _.ExeDomain in
8580   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8581                         (ins _.RC:$src), OpcodeStr,
8582                         "{sae}, $src", "$src, {sae}",
8583                         (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8584                         EVEX_B, Sched<[sched]>;
8585 }
8586
8587 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8588                        X86SchedWriteWidths sched> {
8589    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8590               avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8591               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8592    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8593               avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8594               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8595 }
8596
8597 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8598                                   SDNode OpNode, X86SchedWriteWidths sched> {
8599   // Define only if AVX512VL feature is present.
8600   let Predicates = [HasVLX] in {
8601     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8602                                      EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8603     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8604                                      EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8605     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8606                                      EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8607     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8608                                      EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8609   }
8610 }
8611
8612 let Predicates = [HasERI] in {
8613  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8614  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8615  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8616 }
8617 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8618                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8619                                           SchedWriteFRnd>, EVEX;
8620
8621 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8622                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8623   let ExeDomain = _.ExeDomain in
8624   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8625                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8626                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8627                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8628 }
8629
8630 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8631                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8632   let ExeDomain = _.ExeDomain in {
8633   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8634                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8635                          (_.FloatVT (fsqrt _.RC:$src))>, EVEX,
8636                          Sched<[sched]>;
8637   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8638                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8639                          (fsqrt (_.FloatVT
8640                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8641                            Sched<[sched.Folded, ReadAfterLd]>;
8642   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8643                           (ins _.ScalarMemOp:$src), OpcodeStr,
8644                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8645                           (fsqrt (_.FloatVT
8646                             (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8647                           EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8648   }
8649 }
8650
8651 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8652                                   X86SchedWriteSizes sched> {
8653   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8654                                 sched.PS.ZMM, v16f32_info>,
8655                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8656   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8657                                 sched.PD.ZMM, v8f64_info>,
8658                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8659   // Define only if AVX512VL feature is present.
8660   let Predicates = [HasVLX] in {
8661     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8662                                      sched.PS.XMM, v4f32x_info>,
8663                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8664     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8665                                      sched.PS.YMM, v8f32x_info>,
8666                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8667     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8668                                      sched.PD.XMM, v2f64x_info>,
8669                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8670     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8671                                      sched.PD.YMM, v4f64x_info>,
8672                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8673   }
8674 }
8675
8676 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8677                                         X86SchedWriteSizes sched> {
8678   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8679                                       sched.PS.ZMM, v16f32_info>,
8680                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8681   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8682                                       sched.PD.ZMM, v8f64_info>,
8683                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8684 }
8685
8686 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8687                               X86VectorVTInfo _, string Name> {
8688   let ExeDomain = _.ExeDomain in {
8689     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8690                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8691                          "$src2, $src1", "$src1, $src2",
8692                          (X86fsqrtRnds (_.VT _.RC:$src1),
8693                                     (_.VT _.RC:$src2),
8694                                     (i32 FROUND_CURRENT))>,
8695                          Sched<[sched]>;
8696     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8697                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8698                          "$src2, $src1", "$src1, $src2",
8699                          (X86fsqrtRnds (_.VT _.RC:$src1),
8700                                     _.ScalarIntMemCPat:$src2,
8701                                     (i32 FROUND_CURRENT))>,
8702                          Sched<[sched.Folded, ReadAfterLd]>;
8703     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8704                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8705                          "$rc, $src2, $src1", "$src1, $src2, $rc",
8706                          (X86fsqrtRnds (_.VT _.RC:$src1),
8707                                      (_.VT _.RC:$src2),
8708                                      (i32 imm:$rc))>,
8709                          EVEX_B, EVEX_RC, Sched<[sched]>;
8710
8711     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8712       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8713                 (ins _.FRC:$src1, _.FRC:$src2),
8714                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8715                 Sched<[sched]>;
8716       let mayLoad = 1 in
8717         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8718                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8719                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8720                   Sched<[sched.Folded, ReadAfterLd]>;
8721     }
8722   }
8723
8724   let Predicates = [HasAVX512] in {
8725     def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8726               (!cast<Instruction>(Name#Zr)
8727                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8728   }
8729
8730   let Predicates = [HasAVX512, OptForSize] in {
8731     def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8732               (!cast<Instruction>(Name#Zm)
8733                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8734   }
8735 }
8736
8737 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8738                                   X86SchedWriteSizes sched> {
8739   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8740                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8741   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8742                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8743 }
8744
8745 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8746              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8747
8748 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8749
8750 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8751                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8752   let ExeDomain = _.ExeDomain in {
8753   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8754                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8755                            "$src3, $src2, $src1", "$src1, $src2, $src3",
8756                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8757                            (i32 imm:$src3)))>,
8758                            Sched<[sched]>;
8759
8760   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8761                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8762                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8763                          (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8764                          (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8765                          Sched<[sched]>;
8766
8767   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8768                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8769                          OpcodeStr,
8770                          "$src3, $src2, $src1", "$src1, $src2, $src3",
8771                          (_.VT (X86RndScales _.RC:$src1,
8772                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8773                          Sched<[sched.Folded, ReadAfterLd]>;
8774
8775   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8776     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8777                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8778                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8779                []>, Sched<[sched]>;
8780
8781     let mayLoad = 1 in
8782       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8783                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8784                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8785                  []>, Sched<[sched.Folded, ReadAfterLd]>;
8786   }
8787   }
8788
8789   let Predicates = [HasAVX512] in {
8790     def : Pat<(ffloor _.FRC:$src),
8791               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8792                _.FRC:$src, (i32 0x9)))>;
8793     def : Pat<(fceil _.FRC:$src),
8794               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8795                _.FRC:$src, (i32 0xa)))>;
8796     def : Pat<(ftrunc _.FRC:$src),
8797               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8798                _.FRC:$src, (i32 0xb)))>;
8799     def : Pat<(frint _.FRC:$src),
8800               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8801                _.FRC:$src, (i32 0x4)))>;
8802     def : Pat<(fnearbyint _.FRC:$src),
8803               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8804                _.FRC:$src, (i32 0xc)))>;
8805   }
8806
8807   let Predicates = [HasAVX512, OptForSize] in {
8808     def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8809               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8810                addr:$src, (i32 0x9)))>;
8811     def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8812               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8813                addr:$src, (i32 0xa)))>;
8814     def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8815               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8816                addr:$src, (i32 0xb)))>;
8817     def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8818               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8819                addr:$src, (i32 0x4)))>;
8820     def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8821               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8822                addr:$src, (i32 0xc)))>;
8823   }
8824 }
8825
8826 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8827                                            SchedWriteFRnd.Scl, f32x_info>,
8828                                            AVX512AIi8Base, EVEX_4V,
8829                                            EVEX_CD8<32, CD8VT1>;
8830
8831 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8832                                            SchedWriteFRnd.Scl, f64x_info>,
8833                                            VEX_W, AVX512AIi8Base, EVEX_4V,
8834                                            EVEX_CD8<64, CD8VT1>;
8835
8836 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8837                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8838                                 dag OutMask, Predicate BasePredicate> {
8839   let Predicates = [BasePredicate] in {
8840     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8841                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8842                (extractelt _.VT:$dst, (iPTR 0))))),
8843               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8844                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8845
8846     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8847                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8848                ZeroFP))),
8849               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8850                OutMask, _.VT:$src2, _.VT:$src1)>;
8851   }
8852 }
8853
8854 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8855                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8856                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8857 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8858                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8859                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8860
8861 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8862                                     dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8863                                     bits<8> ImmV, dag OutMask,
8864                                     Predicate BasePredicate> {
8865   let Predicates = [BasePredicate] in {
8866     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8867                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8868                (extractelt _.VT:$dst, (iPTR 0))))),
8869               (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8870                _.VT:$dst, OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8871
8872     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8873                (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8874               (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8875                OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8876   }
8877 }
8878
8879 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8880                                 (v1i1 (scalar_to_vector GR32:$mask)),
8881                                 v4f32x_info, fp32imm0, 0x01,
8882                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8883 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8884                                 (v1i1 (scalar_to_vector GR8:$mask)),
8885                                 v4f32x_info, fp32imm0, 0x01,
8886                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8887 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8888                                 (v1i1 (scalar_to_vector GR32:$mask)),
8889                                 v4f32x_info, fp32imm0, 0x02,
8890                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8891 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8892                                 (v1i1 (scalar_to_vector GR8:$mask)),
8893                                 v4f32x_info, fp32imm0, 0x02,
8894                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8895 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8896                                 (v1i1 (scalar_to_vector GR32:$mask)),
8897                                 v2f64x_info, fp64imm0, 0x01,
8898                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8899 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8900                                 (v1i1 (scalar_to_vector GR8:$mask)),
8901                                 v2f64x_info, fp64imm0, 0x01,
8902                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8903 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8904                                 (v1i1 (scalar_to_vector GR32:$mask)),
8905                                 v2f64x_info, fp64imm0, 0x02,
8906                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8907 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8908                                 (v1i1 (scalar_to_vector GR8:$mask)),
8909                                 v2f64x_info, fp64imm0, 0x02,
8910                                 (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8911
8912
8913 //-------------------------------------------------
8914 // Integer truncate and extend operations
8915 //-------------------------------------------------
8916
8917 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8918                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
8919                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8920   let ExeDomain = DestInfo.ExeDomain in
8921   defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8922                       (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8923                       (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
8924                       EVEX, T8XS, Sched<[sched]>;
8925
8926   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
8927     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8928                (ins x86memop:$dst, SrcInfo.RC:$src),
8929                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
8930                EVEX, Sched<[sched.Folded]>;
8931
8932     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8933                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8934                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
8935                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
8936   }//mayStore = 1, hasSideEffects = 0
8937 }
8938
8939 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8940                                     X86VectorVTInfo DestInfo,
8941                                     PatFrag truncFrag, PatFrag mtruncFrag,
8942                                     string Name> {
8943
8944   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8945             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
8946                                     addr:$dst, SrcInfo.RC:$src)>;
8947
8948   def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8949                                                (SrcInfo.VT SrcInfo.RC:$src)),
8950             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
8951                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8952 }
8953
8954 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
8955                         SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
8956                         AVX512VLVectorVTInfo VTSrcInfo,
8957                         X86VectorVTInfo DestInfoZ128,
8958                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8959                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8960                         X86MemOperand x86memopZ, PatFrag truncFrag,
8961                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
8962
8963   let Predicates = [HasVLX, prd] in {
8964     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
8965                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
8966                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8967                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
8968
8969     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
8970                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
8971                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8972                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
8973   }
8974   let Predicates = [prd] in
8975     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
8976                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
8977                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8978                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
8979 }
8980
8981 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8982                            X86FoldableSchedWrite sched, PatFrag StoreNode,
8983                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8984   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
8985                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
8986                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
8987                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8988 }
8989
8990 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8991                            X86FoldableSchedWrite sched, PatFrag StoreNode,
8992                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
8993   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
8994                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
8995                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
8996                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8997 }
8998
8999 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9000                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9001                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9002   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9003                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9004                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9005                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9006 }
9007
9008 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9009                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9010                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9011   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9012                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9013                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9014                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9015 }
9016
9017 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9018                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9019                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9020   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9021                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9022                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9023                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9024 }
9025
9026 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9027                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9028                            PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9029   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9030                           sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9031                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9032                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9033 }
9034
9035 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, WriteShuffle256,
9036                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9037 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, WriteShuffle256,
9038                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
9039 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9040                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
9041
9042 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   trunc, WriteShuffle256,
9043                                   truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9044 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, WriteShuffle256,
9045                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
9046 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9047                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
9048
9049 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   trunc, WriteShuffle256,
9050                                   truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9051 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, WriteShuffle256,
9052                                   truncstore_s_vi32, masked_truncstore_s_vi32>;
9053 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9054                                   truncstore_us_vi32, masked_truncstore_us_vi32>;
9055
9056 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9057                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9058 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, WriteShuffle256,
9059                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
9060 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, WriteShuffle256,
9061                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
9062
9063 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9064                                   truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9065 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, WriteShuffle256,
9066                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
9067 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, WriteShuffle256,
9068                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
9069
9070 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9071                                   truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9072 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, WriteShuffle256,
9073                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
9074 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, WriteShuffle256,
9075                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
9076
9077 let Predicates = [HasAVX512, NoVLX] in {
9078 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9079          (v8i16 (EXTRACT_SUBREG
9080                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9081                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9082 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9083          (v4i32 (EXTRACT_SUBREG
9084                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9085                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9086 }
9087
9088 let Predicates = [HasBWI, NoVLX] in {
9089 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9090          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9091                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9092 }
9093
9094 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9095               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9096               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9097   let ExeDomain = DestInfo.ExeDomain in {
9098   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9099                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9100                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9101                   EVEX, Sched<[sched]>;
9102
9103   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9104                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9105                   (DestInfo.VT (LdFrag addr:$src))>,
9106                 EVEX, Sched<[sched.Folded]>;
9107   }
9108 }
9109
9110 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9111           SDNode OpNode, SDNode InVecNode, string ExtTy,
9112           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9113   let Predicates = [HasVLX, HasBWI] in {
9114     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9115                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9116                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9117
9118     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9119                     v16i8x_info, i128mem, LdFrag, OpNode>,
9120                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9121   }
9122   let Predicates = [HasBWI] in {
9123     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9124                     v32i8x_info, i256mem, LdFrag, OpNode>,
9125                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9126   }
9127 }
9128
9129 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9130           SDNode OpNode, SDNode InVecNode, string ExtTy,
9131           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9132   let Predicates = [HasVLX, HasAVX512] in {
9133     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9134                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9135                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9136
9137     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9138                    v16i8x_info, i64mem, LdFrag, OpNode>,
9139                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9140   }
9141   let Predicates = [HasAVX512] in {
9142     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9143                    v16i8x_info, i128mem, LdFrag, OpNode>,
9144                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9145   }
9146 }
9147
9148 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9149           SDNode OpNode, SDNode InVecNode, string ExtTy,
9150           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9151   let Predicates = [HasVLX, HasAVX512] in {
9152     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9153                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9154                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9155
9156     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9157                    v16i8x_info, i32mem, LdFrag, OpNode>,
9158                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9159   }
9160   let Predicates = [HasAVX512] in {
9161     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9162                    v16i8x_info, i64mem, LdFrag, OpNode>,
9163                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9164   }
9165 }
9166
9167 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9168          SDNode OpNode, SDNode InVecNode, string ExtTy,
9169          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9170   let Predicates = [HasVLX, HasAVX512] in {
9171     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9172                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9173                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9174
9175     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9176                    v8i16x_info, i128mem, LdFrag, OpNode>,
9177                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9178   }
9179   let Predicates = [HasAVX512] in {
9180     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9181                    v16i16x_info, i256mem, LdFrag, OpNode>,
9182                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9183   }
9184 }
9185
9186 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9187          SDNode OpNode, SDNode InVecNode, string ExtTy,
9188          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9189   let Predicates = [HasVLX, HasAVX512] in {
9190     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9191                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9192                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9193
9194     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9195                    v8i16x_info, i64mem, LdFrag, OpNode>,
9196                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9197   }
9198   let Predicates = [HasAVX512] in {
9199     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9200                    v8i16x_info, i128mem, LdFrag, OpNode>,
9201                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9202   }
9203 }
9204
9205 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9206          SDNode OpNode, SDNode InVecNode, string ExtTy,
9207          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9208
9209   let Predicates = [HasVLX, HasAVX512] in {
9210     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9211                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9212                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9213
9214     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9215                    v4i32x_info, i128mem, LdFrag, OpNode>,
9216                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9217   }
9218   let Predicates = [HasAVX512] in {
9219     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9220                    v8i32x_info, i256mem, LdFrag, OpNode>,
9221                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9222   }
9223 }
9224
9225 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9226 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9227 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9228 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9229 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9230 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9231
9232 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9233 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9234 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9235 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9236 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9237 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9238
9239
9240 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9241                                  SDNode InVecOp> {
9242   // 128-bit patterns
9243   let Predicates = [HasVLX, HasBWI] in {
9244   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9245             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9246   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9247             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9248   def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9249             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9250   def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9251             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9252   def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9253             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9254   }
9255   let Predicates = [HasVLX] in {
9256   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9257             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9258   def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9259             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9260   def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9261             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9262   def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9263             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9264
9265   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9266             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9267   def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9268             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9269   def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9270             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9271   def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9272             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9273
9274   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9275             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9276   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9277             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9278   def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9279             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9280   def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9281             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9282   def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9283             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9284
9285   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9286             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9287   def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9288             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9289   def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9290             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9291   def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9292             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9293
9294   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9295             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9296   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9297             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9298   def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9299             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9300   def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9301             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9302   def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9303             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9304   }
9305   // 256-bit patterns
9306   let Predicates = [HasVLX, HasBWI] in {
9307   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9308             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9309   def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9310             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9311   def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9312             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9313   }
9314   let Predicates = [HasVLX] in {
9315   def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9316             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9317   def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9318             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9319   def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9320             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9321   def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9322             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9323
9324   def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9325             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9326   def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9327             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9328   def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9329             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9330   def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9331             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9332
9333   def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9334             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9335   def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9336             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9337   def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9338             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9339
9340   def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9341             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9342   def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9343             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9344   def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9345             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9346   def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9347             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9348
9349   def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9350             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9351   def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9352             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9353   def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9354             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9355   }
9356   // 512-bit patterns
9357   let Predicates = [HasBWI] in {
9358   def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9359             (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9360   }
9361   let Predicates = [HasAVX512] in {
9362   def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9363             (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9364
9365   def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9366             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9367   def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9368             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9369
9370   def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9371             (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9372
9373   def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9374             (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9375
9376   def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9377             (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9378   }
9379 }
9380
9381 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9382 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9383
9384 //===----------------------------------------------------------------------===//
9385 // GATHER - SCATTER Operations
9386
9387 // FIXME: Improve scheduling of gather/scatter instructions.
9388 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9389                          X86MemOperand memop, PatFrag GatherNode,
9390                          RegisterClass MaskRC = _.KRCWM> {
9391   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9392       ExeDomain = _.ExeDomain in
9393   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9394             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9395             !strconcat(OpcodeStr#_.Suffix,
9396             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9397             [(set _.RC:$dst, MaskRC:$mask_wb,
9398               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9399                      vectoraddr:$src2))]>, EVEX, EVEX_K,
9400              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9401 }
9402
9403 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9404                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9405   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9406                                       vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9407   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9408                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9409 let Predicates = [HasVLX] in {
9410   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9411                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9412   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9413                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9414   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9415                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9416   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9417                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9418 }
9419 }
9420
9421 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9422                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9423   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9424                                        mgatherv16i32>, EVEX_V512;
9425   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9426                                        mgatherv8i64>, EVEX_V512;
9427 let Predicates = [HasVLX] in {
9428   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9429                                           vy256xmem, mgatherv8i32>, EVEX_V256;
9430   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9431                                           vy128xmem, mgatherv4i64>, EVEX_V256;
9432   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9433                                           vx128xmem, mgatherv4i32>, EVEX_V128;
9434   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9435                                           vx64xmem, mgatherv2i64, VK2WM>,
9436                                           EVEX_V128;
9437 }
9438 }
9439
9440
9441 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9442                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9443
9444 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9445                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9446
9447 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9448                           X86MemOperand memop, PatFrag ScatterNode,
9449                           RegisterClass MaskRC = _.KRCWM> {
9450
9451 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9452
9453   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9454             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9455             !strconcat(OpcodeStr#_.Suffix,
9456             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9457             [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9458                                     MaskRC:$mask,  vectoraddr:$dst))]>,
9459             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9460             Sched<[WriteStore]>;
9461 }
9462
9463 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9464                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9465   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9466                                       vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9467   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9468                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9469 let Predicates = [HasVLX] in {
9470   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9471                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9472   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9473                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9474   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9475                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9476   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9477                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9478 }
9479 }
9480
9481 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9482                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9483   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9484                                        mscatterv16i32>, EVEX_V512;
9485   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9486                                        mscatterv8i64>, EVEX_V512;
9487 let Predicates = [HasVLX] in {
9488   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9489                                           vy256xmem, mscatterv8i32>, EVEX_V256;
9490   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9491                                           vy128xmem, mscatterv4i64>, EVEX_V256;
9492   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9493                                           vx128xmem, mscatterv4i32>, EVEX_V128;
9494   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9495                                           vx64xmem, mscatterv2i64, VK2WM>,
9496                                           EVEX_V128;
9497 }
9498 }
9499
9500 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9501                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9502
9503 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9504                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9505
9506 // prefetch
9507 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9508                        RegisterClass KRC, X86MemOperand memop> {
9509   let Predicates = [HasPFI], hasSideEffects = 1 in
9510   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9511             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9512             EVEX, EVEX_K, Sched<[WriteLoad]>;
9513 }
9514
9515 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9516                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9517
9518 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9519                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9520
9521 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9522                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9523
9524 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9525                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9526
9527 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9528                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9529
9530 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9531                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9532
9533 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9534                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9535
9536 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9537                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9538
9539 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9540                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9541
9542 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9543                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9544
9545 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9546                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9547
9548 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9549                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9550
9551 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9552                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9553
9554 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9555                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9556
9557 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9558                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9559
9560 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9561                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9562
9563 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9564 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9565                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9566                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9567                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9568 }
9569
9570 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9571                                  string OpcodeStr, Predicate prd> {
9572 let Predicates = [prd] in
9573   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9574
9575   let Predicates = [prd, HasVLX] in {
9576     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9577     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9578   }
9579 }
9580
9581 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9582 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9583 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9584 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9585
9586 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9587     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9588                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9589                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9590                         EVEX, Sched<[WriteMove]>;
9591 }
9592
9593 // Use 512bit version to implement 128/256 bit in case NoVLX.
9594 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9595                                            X86VectorVTInfo _,
9596                                            string Name> {
9597
9598   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9599             (_.KVT (COPY_TO_REGCLASS
9600                      (!cast<Instruction>(Name#"Zrr")
9601                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9602                                       _.RC:$src, _.SubRegIdx)),
9603                    _.KRC))>;
9604 }
9605
9606 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9607                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9608   let Predicates = [prd] in
9609     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9610                                             EVEX_V512;
9611
9612   let Predicates = [prd, HasVLX] in {
9613     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9614                                               EVEX_V256;
9615     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9616                                                EVEX_V128;
9617   }
9618   let Predicates = [prd, NoVLX] in {
9619     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9620     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9621   }
9622 }
9623
9624 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9625                                               avx512vl_i8_info, HasBWI>;
9626 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9627                                               avx512vl_i16_info, HasBWI>, VEX_W;
9628 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9629                                               avx512vl_i32_info, HasDQI>;
9630 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9631                                               avx512vl_i64_info, HasDQI>, VEX_W;
9632
9633 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9634 // is available, but BWI is not. We can't handle this in lowering because
9635 // a target independent DAG combine likes to combine sext and trunc.
9636 let Predicates = [HasDQI, NoBWI] in {
9637   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9638             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9639   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9640             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9641 }
9642
9643 //===----------------------------------------------------------------------===//
9644 // AVX-512 - COMPRESS and EXPAND
9645 //
9646
9647 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9648                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9649   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9650               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9651               (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9652               Sched<[sched]>;
9653
9654   let mayStore = 1, hasSideEffects = 0 in
9655   def mr : AVX5128I<opc, MRMDestMem, (outs),
9656               (ins _.MemOp:$dst, _.RC:$src),
9657               OpcodeStr # "\t{$src, $dst|$dst, $src}",
9658               []>, EVEX_CD8<_.EltSize, CD8VT1>,
9659               Sched<[sched.Folded]>;
9660
9661   def mrk : AVX5128I<opc, MRMDestMem, (outs),
9662               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9663               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9664               []>,
9665               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9666               Sched<[sched.Folded]>;
9667 }
9668
9669 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9670   def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9671                                                (_.VT _.RC:$src)),
9672             (!cast<Instruction>(Name#_.ZSuffix##mrk)
9673                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9674 }
9675
9676 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9677                                  X86FoldableSchedWrite sched,
9678                                  AVX512VLVectorVTInfo VTInfo,
9679                                  Predicate Pred = HasAVX512> {
9680   let Predicates = [Pred] in
9681   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9682            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9683
9684   let Predicates = [Pred, HasVLX] in {
9685     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9686                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9687     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9688                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9689   }
9690 }
9691
9692 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9693 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9694                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9695 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9696                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9697 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9698                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9699 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9700                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9701
9702 // expand
9703 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9704                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9705   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9706               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9707               (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9708               Sched<[sched]>;
9709
9710   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9711               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9712               (_.VT (X86expand (_.VT (bitconvert
9713                                       (_.LdFrag addr:$src1)))))>,
9714             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9715             Sched<[sched.Folded, ReadAfterLd]>;
9716 }
9717
9718 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9719
9720   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9721             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9722                                         _.KRCWM:$mask, addr:$src)>;
9723
9724   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9725             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9726                                         _.KRCWM:$mask, addr:$src)>;
9727
9728   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9729                                                (_.VT _.RC:$src0))),
9730             (!cast<Instruction>(Name#_.ZSuffix##rmk)
9731                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9732 }
9733
9734 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9735                                X86FoldableSchedWrite sched,
9736                                AVX512VLVectorVTInfo VTInfo,
9737                                Predicate Pred = HasAVX512> {
9738   let Predicates = [Pred] in
9739   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9740            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9741
9742   let Predicates = [Pred, HasVLX] in {
9743     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9744                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9745     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9746                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9747   }
9748 }
9749
9750 // FIXME: Is there a better scheduler class for VPEXPAND?
9751 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9752                                       avx512vl_i32_info>, EVEX;
9753 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9754                                       avx512vl_i64_info>, EVEX, VEX_W;
9755 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9756                                       avx512vl_f32_info>, EVEX;
9757 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9758                                       avx512vl_f64_info>, EVEX, VEX_W;
9759
9760 //handle instruction  reg_vec1 = op(reg_vec,imm)
9761 //                               op(mem_vec,imm)
9762 //                               op(broadcast(eltVt),imm)
9763 //all instruction created with FROUND_CURRENT
9764 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9765                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9766   let ExeDomain = _.ExeDomain in {
9767   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9768                       (ins _.RC:$src1, i32u8imm:$src2),
9769                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9770                       (OpNode (_.VT _.RC:$src1),
9771                               (i32 imm:$src2))>, Sched<[sched]>;
9772   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9773                     (ins _.MemOp:$src1, i32u8imm:$src2),
9774                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9775                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9776                             (i32 imm:$src2))>,
9777                     Sched<[sched.Folded, ReadAfterLd]>;
9778   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9779                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9780                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9781                     "${src1}"##_.BroadcastStr##", $src2",
9782                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9783                             (i32 imm:$src2))>, EVEX_B,
9784                     Sched<[sched.Folded, ReadAfterLd]>;
9785   }
9786 }
9787
9788 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9789 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9790                                           SDNode OpNode, X86FoldableSchedWrite sched,
9791                                           X86VectorVTInfo _> {
9792   let ExeDomain = _.ExeDomain in
9793   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9794                       (ins _.RC:$src1, i32u8imm:$src2),
9795                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9796                       "$src1, {sae}, $src2",
9797                       (OpNode (_.VT _.RC:$src1),
9798                               (i32 imm:$src2),
9799                               (i32 FROUND_NO_EXC))>,
9800                       EVEX_B, Sched<[sched]>;
9801 }
9802
9803 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9804             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9805             SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9806   let Predicates = [prd] in {
9807     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9808                                            _.info512>,
9809                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9810                                                sched.ZMM, _.info512>, EVEX_V512;
9811   }
9812   let Predicates = [prd, HasVLX] in {
9813     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9814                                            _.info128>, EVEX_V128;
9815     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9816                                            _.info256>, EVEX_V256;
9817   }
9818 }
9819
9820 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9821 //                               op(reg_vec2,mem_vec,imm)
9822 //                               op(reg_vec2,broadcast(eltVt),imm)
9823 //all instruction created with FROUND_CURRENT
9824 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9825                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9826   let ExeDomain = _.ExeDomain in {
9827   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9828                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9829                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9830                       (OpNode (_.VT _.RC:$src1),
9831                               (_.VT _.RC:$src2),
9832                               (i32 imm:$src3))>,
9833                       Sched<[sched]>;
9834   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9835                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9836                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9837                     (OpNode (_.VT _.RC:$src1),
9838                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
9839                             (i32 imm:$src3))>,
9840                     Sched<[sched.Folded, ReadAfterLd]>;
9841   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9842                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9843                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9844                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
9845                     (OpNode (_.VT _.RC:$src1),
9846                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9847                             (i32 imm:$src3))>, EVEX_B,
9848                     Sched<[sched.Folded, ReadAfterLd]>;
9849   }
9850 }
9851
9852 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9853 //                               op(reg_vec2,mem_vec,imm)
9854 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9855                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9856                               X86VectorVTInfo SrcInfo>{
9857   let ExeDomain = DestInfo.ExeDomain in {
9858   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9859                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9860                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9861                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9862                                (SrcInfo.VT SrcInfo.RC:$src2),
9863                                (i8 imm:$src3)))>,
9864                   Sched<[sched]>;
9865   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9866                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9867                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9868                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9869                              (SrcInfo.VT (bitconvert
9870                                                 (SrcInfo.LdFrag addr:$src2))),
9871                              (i8 imm:$src3)))>,
9872                 Sched<[sched.Folded, ReadAfterLd]>;
9873   }
9874 }
9875
9876 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9877 //                               op(reg_vec2,mem_vec,imm)
9878 //                               op(reg_vec2,broadcast(eltVt),imm)
9879 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9880                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9881   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9882
9883   let ExeDomain = _.ExeDomain in
9884   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9885                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9886                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9887                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
9888                     (OpNode (_.VT _.RC:$src1),
9889                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9890                             (i8 imm:$src3))>, EVEX_B,
9891                     Sched<[sched.Folded, ReadAfterLd]>;
9892 }
9893
9894 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9895 //                                      op(reg_vec2,mem_scalar,imm)
9896 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9897                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9898   let ExeDomain = _.ExeDomain in {
9899   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9900                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9901                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9902                       (OpNode (_.VT _.RC:$src1),
9903                               (_.VT _.RC:$src2),
9904                               (i32 imm:$src3))>,
9905                       Sched<[sched]>;
9906   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9907                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9908                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9909                     (OpNode (_.VT _.RC:$src1),
9910                             (_.VT (scalar_to_vector
9911                                       (_.ScalarLdFrag addr:$src2))),
9912                             (i32 imm:$src3))>,
9913                     Sched<[sched.Folded, ReadAfterLd]>;
9914   }
9915 }
9916
9917 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9918 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9919                                     SDNode OpNode, X86FoldableSchedWrite sched,
9920                                     X86VectorVTInfo _> {
9921   let ExeDomain = _.ExeDomain in
9922   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9923                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9924                       OpcodeStr, "$src3, {sae}, $src2, $src1",
9925                       "$src1, $src2, {sae}, $src3",
9926                       (OpNode (_.VT _.RC:$src1),
9927                               (_.VT _.RC:$src2),
9928                               (i32 imm:$src3),
9929                               (i32 FROUND_NO_EXC))>,
9930                       EVEX_B, Sched<[sched]>;
9931 }
9932
9933 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9934 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9935                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9936   let ExeDomain = _.ExeDomain in
9937   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9938                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9939                       OpcodeStr, "$src3, {sae}, $src2, $src1",
9940                       "$src1, $src2, {sae}, $src3",
9941                       (OpNode (_.VT _.RC:$src1),
9942                               (_.VT _.RC:$src2),
9943                               (i32 imm:$src3),
9944                               (i32 FROUND_NO_EXC))>,
9945                       EVEX_B, Sched<[sched]>;
9946 }
9947
9948 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9949             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9950             SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9951   let Predicates = [prd] in {
9952     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
9953                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
9954                                   EVEX_V512;
9955
9956   }
9957   let Predicates = [prd, HasVLX] in {
9958     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
9959                                   EVEX_V128;
9960     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
9961                                   EVEX_V256;
9962   }
9963 }
9964
9965 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9966                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
9967                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
9968   let Predicates = [Pred] in {
9969     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
9970                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9971   }
9972   let Predicates = [Pred, HasVLX] in {
9973     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
9974                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9975     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
9976                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9977   }
9978 }
9979
9980 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9981                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
9982                                   Predicate Pred = HasAVX512> {
9983   let Predicates = [Pred] in {
9984     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
9985                                 EVEX_V512;
9986   }
9987   let Predicates = [Pred, HasVLX] in {
9988     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
9989                                 EVEX_V128;
9990     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
9991                                 EVEX_V256;
9992   }
9993 }
9994
9995 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9996                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
9997                   SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
9998   let Predicates = [prd] in {
9999      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10000               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10001   }
10002 }
10003
10004 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10005                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10006                     SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10007   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10008                             opcPs, OpNode, OpNodeRnd, sched, prd>,
10009                             EVEX_CD8<32, CD8VF>;
10010   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10011                             opcPd, OpNode, OpNodeRnd, sched, prd>,
10012                             EVEX_CD8<64, CD8VF>, VEX_W;
10013 }
10014
10015 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10016                               X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10017                               AVX512AIi8Base, EVEX;
10018 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10019                               X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10020                               AVX512AIi8Base, EVEX;
10021 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10022                               X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10023                               AVX512AIi8Base, EVEX;
10024
10025 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10026                                                 0x50, X86VRange, X86VRangeRnd,
10027                                                 SchedWriteFAdd, HasDQI>,
10028       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10029 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10030                                                 0x50, X86VRange, X86VRangeRnd,
10031                                                 SchedWriteFAdd, HasDQI>,
10032       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10033
10034 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10035       f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10036       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10037 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10038       0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10039       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10040
10041 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10042       0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10043       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10044 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10045       0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10046       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10047
10048 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10049       0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10050       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10051 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10052       0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10053       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10054
10055 let Predicates = [HasAVX512] in {
10056 def : Pat<(v16f32 (ffloor VR512:$src)),
10057           (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
10058 def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), VR512:$dst)),
10059           (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0x9))>;
10060 def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), v16f32_info.ImmAllZerosV)),
10061           (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0x9))>;
10062 def : Pat<(v16f32 (fnearbyint VR512:$src)),
10063           (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
10064 def : Pat<(v16f32 (fceil VR512:$src)),
10065           (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
10066 def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), VR512:$dst)),
10067           (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0xA))>;
10068 def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), v16f32_info.ImmAllZerosV)),
10069           (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0xA))>;
10070 def : Pat<(v16f32 (frint VR512:$src)),
10071           (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
10072 def : Pat<(v16f32 (ftrunc VR512:$src)),
10073           (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
10074
10075 def : Pat<(v16f32 (ffloor (loadv16f32 addr:$src))),
10076           (VRNDSCALEPSZrmi addr:$src, (i32 0x9))>;
10077 def : Pat<(v16f32 (fnearbyint (loadv16f32 addr:$src))),
10078           (VRNDSCALEPSZrmi addr:$src, (i32 0xC))>;
10079 def : Pat<(v16f32 (fceil (loadv16f32 addr:$src))),
10080           (VRNDSCALEPSZrmi addr:$src, (i32 0xA))>;
10081 def : Pat<(v16f32 (frint (loadv16f32 addr:$src))),
10082           (VRNDSCALEPSZrmi addr:$src, (i32 0x4))>;
10083 def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
10084           (VRNDSCALEPSZrmi addr:$src, (i32 0xB))>;
10085
10086 def : Pat<(v8f64 (ffloor VR512:$src)),
10087           (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
10088 def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), VR512:$dst)),
10089           (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0x9))>;
10090 def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), v8f64_info.ImmAllZerosV)),
10091           (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0x9))>;
10092 def : Pat<(v8f64 (fnearbyint VR512:$src)),
10093           (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
10094 def : Pat<(v8f64 (fceil VR512:$src)),
10095           (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
10096 def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), VR512:$dst)),
10097           (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0xA))>;
10098 def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), v8f64_info.ImmAllZerosV)),
10099           (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0xA))>;
10100 def : Pat<(v8f64 (frint VR512:$src)),
10101           (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
10102 def : Pat<(v8f64 (ftrunc VR512:$src)),
10103           (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
10104
10105 def : Pat<(v8f64 (ffloor (loadv8f64 addr:$src))),
10106           (VRNDSCALEPDZrmi addr:$src, (i32 0x9))>;
10107 def : Pat<(v8f64 (fnearbyint (loadv8f64 addr:$src))),
10108           (VRNDSCALEPDZrmi addr:$src, (i32 0xC))>;
10109 def : Pat<(v8f64 (fceil (loadv8f64 addr:$src))),
10110           (VRNDSCALEPDZrmi addr:$src, (i32 0xA))>;
10111 def : Pat<(v8f64 (frint (loadv8f64 addr:$src))),
10112           (VRNDSCALEPDZrmi addr:$src, (i32 0x4))>;
10113 def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
10114           (VRNDSCALEPDZrmi addr:$src, (i32 0xB))>;
10115 }
10116
10117 let Predicates = [HasVLX] in {
10118 def : Pat<(v4f32 (ffloor VR128X:$src)),
10119           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
10120 def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10121           (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0x9))>;
10122 def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), v4f32x_info.ImmAllZerosV)),
10123           (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0x9))>;
10124 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
10125           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
10126 def : Pat<(v4f32 (fceil VR128X:$src)),
10127           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
10128 def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10129           (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0xA))>;
10130 def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), v4f32x_info.ImmAllZerosV)),
10131           (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0xA))>;
10132 def : Pat<(v4f32 (frint VR128X:$src)),
10133           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
10134 def : Pat<(v4f32 (ftrunc VR128X:$src)),
10135           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
10136
10137 def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
10138           (VRNDSCALEPSZ128rmi addr:$src, (i32 0x9))>;
10139 def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
10140           (VRNDSCALEPSZ128rmi addr:$src, (i32 0xC))>;
10141 def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
10142           (VRNDSCALEPSZ128rmi addr:$src, (i32 0xA))>;
10143 def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
10144           (VRNDSCALEPSZ128rmi addr:$src, (i32 0x4))>;
10145 def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
10146           (VRNDSCALEPSZ128rmi addr:$src, (i32 0xB))>;
10147
10148 def : Pat<(v2f64 (ffloor VR128X:$src)),
10149           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
10150 def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10151           (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10152 def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), v2f64x_info.ImmAllZerosV)),
10153           (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10154 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
10155           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
10156 def : Pat<(v2f64 (fceil VR128X:$src)),
10157           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
10158 def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10159           (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10160 def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), v2f64x_info.ImmAllZerosV)),
10161           (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10162 def : Pat<(v2f64 (frint VR128X:$src)),
10163           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
10164 def : Pat<(v2f64 (ftrunc VR128X:$src)),
10165           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
10166
10167 def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
10168           (VRNDSCALEPDZ128rmi addr:$src, (i32 0x9))>;
10169 def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
10170           (VRNDSCALEPDZ128rmi addr:$src, (i32 0xC))>;
10171 def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
10172           (VRNDSCALEPDZ128rmi addr:$src, (i32 0xA))>;
10173 def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
10174           (VRNDSCALEPDZ128rmi addr:$src, (i32 0x4))>;
10175 def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
10176           (VRNDSCALEPDZ128rmi addr:$src, (i32 0xB))>;
10177
10178 def : Pat<(v8f32 (ffloor VR256X:$src)),
10179           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
10180 def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10181           (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10182 def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), v8f32x_info.ImmAllZerosV)),
10183           (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10184 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
10185           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
10186 def : Pat<(v8f32 (fceil VR256X:$src)),
10187           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
10188 def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10189           (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10190 def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), v8f32x_info.ImmAllZerosV)),
10191           (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10192 def : Pat<(v8f32 (frint VR256X:$src)),
10193           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
10194 def : Pat<(v8f32 (ftrunc VR256X:$src)),
10195           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
10196
10197 def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
10198           (VRNDSCALEPSZ256rmi addr:$src, (i32 0x9))>;
10199 def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
10200           (VRNDSCALEPSZ256rmi addr:$src, (i32 0xC))>;
10201 def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
10202           (VRNDSCALEPSZ256rmi addr:$src, (i32 0xA))>;
10203 def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
10204           (VRNDSCALEPSZ256rmi addr:$src, (i32 0x4))>;
10205 def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
10206           (VRNDSCALEPSZ256rmi addr:$src, (i32 0xB))>;
10207
10208 def : Pat<(v4f64 (ffloor VR256X:$src)),
10209           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
10210 def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10211           (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10212 def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), v4f64x_info.ImmAllZerosV)),
10213           (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10214 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
10215           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
10216 def : Pat<(v4f64 (fceil VR256X:$src)),
10217           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
10218 def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10219           (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10220 def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), v4f64x_info.ImmAllZerosV)),
10221           (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10222 def : Pat<(v4f64 (frint VR256X:$src)),
10223           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
10224 def : Pat<(v4f64 (ftrunc VR256X:$src)),
10225           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
10226
10227 def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
10228           (VRNDSCALEPDZ256rmi addr:$src, (i32 0x9))>;
10229 def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
10230           (VRNDSCALEPDZ256rmi addr:$src, (i32 0xC))>;
10231 def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
10232           (VRNDSCALEPDZ256rmi addr:$src, (i32 0xA))>;
10233 def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
10234           (VRNDSCALEPDZ256rmi addr:$src, (i32 0x4))>;
10235 def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
10236           (VRNDSCALEPDZ256rmi addr:$src, (i32 0xB))>;
10237 }
10238
10239 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10240                                           X86FoldableSchedWrite sched,
10241                                           X86VectorVTInfo _,
10242                                           X86VectorVTInfo CastInfo,
10243                                           string EVEX2VEXOvrd> {
10244   let ExeDomain = _.ExeDomain in {
10245   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10246                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10247                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10248                   (_.VT (bitconvert
10249                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10250                                                   (i8 imm:$src3)))))>,
10251                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10252   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10253                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10254                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10255                 (_.VT
10256                  (bitconvert
10257                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10258                                            (bitconvert (_.LdFrag addr:$src2)),
10259                                            (i8 imm:$src3)))))>,
10260                 Sched<[sched.Folded, ReadAfterLd]>,
10261                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10262   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10263                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10264                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10265                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10266                     (_.VT
10267                      (bitconvert
10268                       (CastInfo.VT
10269                        (X86Shuf128 _.RC:$src1,
10270                                    (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10271                                    (i8 imm:$src3)))))>, EVEX_B,
10272                     Sched<[sched.Folded, ReadAfterLd]>;
10273   }
10274 }
10275
10276 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10277                                    AVX512VLVectorVTInfo _,
10278                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10279                                    string EVEX2VEXOvrd>{
10280   let Predicates = [HasAVX512] in
10281   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10282                                           _.info512, CastInfo.info512, "">, EVEX_V512;
10283
10284   let Predicates = [HasAVX512, HasVLX] in
10285   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10286                                              _.info256, CastInfo.info256,
10287                                              EVEX2VEXOvrd>, EVEX_V256;
10288 }
10289
10290 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10291       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10292 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10293       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10294 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10295       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10296 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10297       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10298
10299 let Predicates = [HasAVX512] in {
10300 // Provide fallback in case the load node that is used in the broadcast
10301 // patterns above is used by additional users, which prevents the pattern
10302 // selection.
10303 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10304           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10305                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10306                           0)>;
10307 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10308           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10309                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10310                           0)>;
10311
10312 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10313           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10314                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10315                           0)>;
10316 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10317           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10318                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10319                           0)>;
10320
10321 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10322           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10323                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10324                           0)>;
10325
10326 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10327           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10328                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10329                           0)>;
10330 }
10331
10332 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10333                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10334   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10335   // instantiation of this class.
10336   let ExeDomain = _.ExeDomain in {
10337   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10338                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10339                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10340                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10341                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10342   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10343                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10344                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10345                 (_.VT (X86VAlign _.RC:$src1,
10346                                  (bitconvert (_.LdFrag addr:$src2)),
10347                                  (i8 imm:$src3)))>,
10348                 Sched<[sched.Folded, ReadAfterLd]>,
10349                 EVEX2VEXOverride<"VPALIGNRrmi">;
10350
10351   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10352                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10353                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10354                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10355                    (X86VAlign _.RC:$src1,
10356                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10357                               (i8 imm:$src3))>, EVEX_B,
10358                    Sched<[sched.Folded, ReadAfterLd]>;
10359   }
10360 }
10361
10362 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10363                                 AVX512VLVectorVTInfo _> {
10364   let Predicates = [HasAVX512] in {
10365     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10366                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10367   }
10368   let Predicates = [HasAVX512, HasVLX] in {
10369     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10370                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10371     // We can't really override the 256-bit version so change it back to unset.
10372     let EVEX2VEXOverride = ? in
10373     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10374                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10375   }
10376 }
10377
10378 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10379                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10380 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10381                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10382                                    VEX_W;
10383
10384 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10385                                          SchedWriteShuffle, avx512vl_i8_info,
10386                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10387
10388 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10389 // into vpalignr.
10390 def ValignqImm32XForm : SDNodeXForm<imm, [{
10391   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10392 }]>;
10393 def ValignqImm8XForm : SDNodeXForm<imm, [{
10394   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10395 }]>;
10396 def ValigndImm8XForm : SDNodeXForm<imm, [{
10397   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10398 }]>;
10399
10400 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10401                                         X86VectorVTInfo From, X86VectorVTInfo To,
10402                                         SDNodeXForm ImmXForm> {
10403   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10404                             (bitconvert
10405                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10406                                               imm:$src3))),
10407                             To.RC:$src0)),
10408             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10409                                                   To.RC:$src1, To.RC:$src2,
10410                                                   (ImmXForm imm:$src3))>;
10411
10412   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10413                             (bitconvert
10414                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10415                                               imm:$src3))),
10416                             To.ImmAllZerosV)),
10417             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10418                                                    To.RC:$src1, To.RC:$src2,
10419                                                    (ImmXForm imm:$src3))>;
10420
10421   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10422                             (bitconvert
10423                              (From.VT (OpNode From.RC:$src1,
10424                                       (bitconvert (To.LdFrag addr:$src2)),
10425                                       imm:$src3))),
10426                             To.RC:$src0)),
10427             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10428                                                   To.RC:$src1, addr:$src2,
10429                                                   (ImmXForm imm:$src3))>;
10430
10431   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10432                             (bitconvert
10433                              (From.VT (OpNode From.RC:$src1,
10434                                       (bitconvert (To.LdFrag addr:$src2)),
10435                                       imm:$src3))),
10436                             To.ImmAllZerosV)),
10437             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10438                                                    To.RC:$src1, addr:$src2,
10439                                                    (ImmXForm imm:$src3))>;
10440 }
10441
10442 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10443                                            X86VectorVTInfo From,
10444                                            X86VectorVTInfo To,
10445                                            SDNodeXForm ImmXForm> :
10446       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10447   def : Pat<(From.VT (OpNode From.RC:$src1,
10448                              (bitconvert (To.VT (X86VBroadcast
10449                                                 (To.ScalarLdFrag addr:$src2)))),
10450                              imm:$src3)),
10451             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10452                                                   (ImmXForm imm:$src3))>;
10453
10454   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10455                             (bitconvert
10456                              (From.VT (OpNode From.RC:$src1,
10457                                       (bitconvert
10458                                        (To.VT (X86VBroadcast
10459                                                (To.ScalarLdFrag addr:$src2)))),
10460                                       imm:$src3))),
10461                             To.RC:$src0)),
10462             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10463                                                    To.RC:$src1, addr:$src2,
10464                                                    (ImmXForm imm:$src3))>;
10465
10466   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10467                             (bitconvert
10468                              (From.VT (OpNode From.RC:$src1,
10469                                       (bitconvert
10470                                        (To.VT (X86VBroadcast
10471                                                (To.ScalarLdFrag addr:$src2)))),
10472                                       imm:$src3))),
10473                             To.ImmAllZerosV)),
10474             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10475                                                     To.RC:$src1, addr:$src2,
10476                                                     (ImmXForm imm:$src3))>;
10477 }
10478
10479 let Predicates = [HasAVX512] in {
10480   // For 512-bit we lower to the widest element type we can. So we only need
10481   // to handle converting valignq to valignd.
10482   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10483                                          v16i32_info, ValignqImm32XForm>;
10484 }
10485
10486 let Predicates = [HasVLX] in {
10487   // For 128-bit we lower to the widest element type we can. So we only need
10488   // to handle converting valignq to valignd.
10489   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10490                                          v4i32x_info, ValignqImm32XForm>;
10491   // For 256-bit we lower to the widest element type we can. So we only need
10492   // to handle converting valignq to valignd.
10493   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10494                                          v8i32x_info, ValignqImm32XForm>;
10495 }
10496
10497 let Predicates = [HasVLX, HasBWI] in {
10498   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10499   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10500                                       v16i8x_info, ValignqImm8XForm>;
10501   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10502                                       v16i8x_info, ValigndImm8XForm>;
10503 }
10504
10505 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10506                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10507                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10508
10509 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10510                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10511   let ExeDomain = _.ExeDomain in {
10512   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10513                     (ins _.RC:$src1), OpcodeStr,
10514                     "$src1", "$src1",
10515                     (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10516                     Sched<[sched]>;
10517
10518   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10519                   (ins _.MemOp:$src1), OpcodeStr,
10520                   "$src1", "$src1",
10521                   (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10522             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10523             Sched<[sched.Folded]>;
10524   }
10525 }
10526
10527 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10528                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10529            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10530   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10531                   (ins _.ScalarMemOp:$src1), OpcodeStr,
10532                   "${src1}"##_.BroadcastStr,
10533                   "${src1}"##_.BroadcastStr,
10534                   (_.VT (OpNode (X86VBroadcast
10535                                     (_.ScalarLdFrag addr:$src1))))>,
10536              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10537              Sched<[sched.Folded]>;
10538 }
10539
10540 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10541                               X86SchedWriteWidths sched,
10542                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10543   let Predicates = [prd] in
10544     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10545                              EVEX_V512;
10546
10547   let Predicates = [prd, HasVLX] in {
10548     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10549                               EVEX_V256;
10550     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10551                               EVEX_V128;
10552   }
10553 }
10554
10555 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10556                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10557                                Predicate prd> {
10558   let Predicates = [prd] in
10559     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10560                               EVEX_V512;
10561
10562   let Predicates = [prd, HasVLX] in {
10563     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10564                                  EVEX_V256;
10565     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10566                                  EVEX_V128;
10567   }
10568 }
10569
10570 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10571                                  SDNode OpNode, X86SchedWriteWidths sched,
10572                                  Predicate prd> {
10573   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10574                                avx512vl_i64_info, prd>, VEX_W;
10575   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10576                                avx512vl_i32_info, prd>;
10577 }
10578
10579 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10580                                  SDNode OpNode, X86SchedWriteWidths sched,
10581                                  Predicate prd> {
10582   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10583                               avx512vl_i16_info, prd>, VEX_WIG;
10584   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10585                               avx512vl_i8_info, prd>, VEX_WIG;
10586 }
10587
10588 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10589                                   bits<8> opc_d, bits<8> opc_q,
10590                                   string OpcodeStr, SDNode OpNode,
10591                                   X86SchedWriteWidths sched> {
10592   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10593                                     HasAVX512>,
10594               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10595                                     HasBWI>;
10596 }
10597
10598 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10599                                     SchedWriteVecALU>;
10600
10601 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10602 let Predicates = [HasAVX512, NoVLX] in {
10603   def : Pat<(v4i64 (abs VR256X:$src)),
10604             (EXTRACT_SUBREG
10605                 (VPABSQZrr
10606                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10607              sub_ymm)>;
10608   def : Pat<(v2i64 (abs VR128X:$src)),
10609             (EXTRACT_SUBREG
10610                 (VPABSQZrr
10611                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10612              sub_xmm)>;
10613 }
10614
10615 // Use 512bit version to implement 128/256 bit.
10616 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10617                                  AVX512VLVectorVTInfo _, Predicate prd> {
10618   let Predicates = [prd, NoVLX] in {
10619     def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10620               (EXTRACT_SUBREG
10621                 (!cast<Instruction>(InstrStr # "Zrr")
10622                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10623                                  _.info256.RC:$src1,
10624                                  _.info256.SubRegIdx)),
10625               _.info256.SubRegIdx)>;
10626
10627     def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10628               (EXTRACT_SUBREG
10629                 (!cast<Instruction>(InstrStr # "Zrr")
10630                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10631                                  _.info128.RC:$src1,
10632                                  _.info128.SubRegIdx)),
10633               _.info128.SubRegIdx)>;
10634   }
10635 }
10636
10637 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10638                                         SchedWriteVecIMul, HasCDI>;
10639
10640 // FIXME: Is there a better scheduler class for VPCONFLICT?
10641 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10642                                         SchedWriteVecALU, HasCDI>;
10643
10644 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10645 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10646 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10647
10648 //===---------------------------------------------------------------------===//
10649 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10650 //===---------------------------------------------------------------------===//
10651
10652 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10653 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10654                                      SchedWriteVecALU, HasVPOPCNTDQ>;
10655
10656 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10657 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10658
10659 //===---------------------------------------------------------------------===//
10660 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10661 //===---------------------------------------------------------------------===//
10662
10663 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10664                             X86SchedWriteWidths sched> {
10665   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10666                                       avx512vl_f32_info, HasAVX512>, XS;
10667 }
10668
10669 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10670                                   SchedWriteFShuffle>;
10671 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10672                                   SchedWriteFShuffle>;
10673
10674 //===----------------------------------------------------------------------===//
10675 // AVX-512 - MOVDDUP
10676 //===----------------------------------------------------------------------===//
10677
10678 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10679                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10680   let ExeDomain = _.ExeDomain in {
10681   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10682                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
10683                    (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10684                    Sched<[sched]>;
10685   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10686                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10687                  (_.VT (OpNode (_.VT (scalar_to_vector
10688                                        (_.ScalarLdFrag addr:$src)))))>,
10689                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10690                  Sched<[sched.Folded]>;
10691   }
10692 }
10693
10694 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10695                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10696   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10697                            VTInfo.info512>, EVEX_V512;
10698
10699   let Predicates = [HasAVX512, HasVLX] in {
10700     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10701                                 VTInfo.info256>, EVEX_V256;
10702     defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10703                                    VTInfo.info128>, EVEX_V128;
10704   }
10705 }
10706
10707 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10708                           X86SchedWriteWidths sched> {
10709   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10710                                         avx512vl_f64_info>, XD, VEX_W;
10711 }
10712
10713 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10714
10715 let Predicates = [HasVLX] in {
10716 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10717           (VMOVDDUPZ128rm addr:$src)>;
10718 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10719           (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10720 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10721           (VMOVDDUPZ128rm addr:$src)>;
10722
10723 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10724                    (v2f64 VR128X:$src0)),
10725           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10726                            (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10727 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10728                    (bitconvert (v4i32 immAllZerosV))),
10729           (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10730
10731 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10732                    (v2f64 VR128X:$src0)),
10733           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10734 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10735                    (bitconvert (v4i32 immAllZerosV))),
10736           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10737
10738 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10739                    (v2f64 VR128X:$src0)),
10740           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10741 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10742                    (bitconvert (v4i32 immAllZerosV))),
10743           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10744 }
10745
10746 //===----------------------------------------------------------------------===//
10747 // AVX-512 - Unpack Instructions
10748 //===----------------------------------------------------------------------===//
10749
10750 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10751                                  SchedWriteFShuffleSizes>;
10752 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10753                                  SchedWriteFShuffleSizes>;
10754
10755 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10756                                        SchedWriteShuffle, HasBWI>;
10757 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10758                                        SchedWriteShuffle, HasBWI>;
10759 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10760                                        SchedWriteShuffle, HasBWI>;
10761 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10762                                        SchedWriteShuffle, HasBWI>;
10763
10764 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10765                                        SchedWriteShuffle, HasAVX512>;
10766 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10767                                        SchedWriteShuffle, HasAVX512>;
10768 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10769                                         SchedWriteShuffle, HasAVX512>;
10770 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10771                                         SchedWriteShuffle, HasAVX512>;
10772
10773 //===----------------------------------------------------------------------===//
10774 // AVX-512 - Extract & Insert Integer Instructions
10775 //===----------------------------------------------------------------------===//
10776
10777 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10778                                                             X86VectorVTInfo _> {
10779   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10780               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10781               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10782               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10783                        addr:$dst)]>,
10784               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10785 }
10786
10787 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10788   let Predicates = [HasBWI] in {
10789     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10790                   (ins _.RC:$src1, u8imm:$src2),
10791                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10792                   [(set GR32orGR64:$dst,
10793                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10794                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10795
10796     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10797   }
10798 }
10799
10800 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10801   let Predicates = [HasBWI] in {
10802     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10803                   (ins _.RC:$src1, u8imm:$src2),
10804                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10805                   [(set GR32orGR64:$dst,
10806                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10807                   EVEX, PD, Sched<[WriteVecExtract]>;
10808
10809     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10810     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10811                    (ins _.RC:$src1, u8imm:$src2),
10812                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10813                    EVEX, TAPD, FoldGenData<NAME#rr>,
10814                    Sched<[WriteVecExtract]>;
10815
10816     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10817   }
10818 }
10819
10820 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10821                                                             RegisterClass GRC> {
10822   let Predicates = [HasDQI] in {
10823     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10824                   (ins _.RC:$src1, u8imm:$src2),
10825                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10826                   [(set GRC:$dst,
10827                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10828                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10829
10830     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10831                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10832                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10833                 [(store (extractelt (_.VT _.RC:$src1),
10834                                     imm:$src2),addr:$dst)]>,
10835                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10836                 Sched<[WriteVecExtractSt]>;
10837   }
10838 }
10839
10840 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10841 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10842 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10843 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10844
10845 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10846                                             X86VectorVTInfo _, PatFrag LdFrag> {
10847   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10848       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10849       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10850       [(set _.RC:$dst,
10851           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10852       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
10853 }
10854
10855 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10856                                             X86VectorVTInfo _, PatFrag LdFrag> {
10857   let Predicates = [HasBWI] in {
10858     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10859         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10860         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10861         [(set _.RC:$dst,
10862             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10863         Sched<[WriteVecInsert]>;
10864
10865     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10866   }
10867 }
10868
10869 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10870                                          X86VectorVTInfo _, RegisterClass GRC> {
10871   let Predicates = [HasDQI] in {
10872     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10873         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10874         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10875         [(set _.RC:$dst,
10876             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10877         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10878
10879     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10880                                     _.ScalarLdFrag>, TAPD;
10881   }
10882 }
10883
10884 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10885                                      extloadi8>, TAPD, VEX_WIG;
10886 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10887                                      extloadi16>, PD, VEX_WIG;
10888 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10889 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10890
10891 //===----------------------------------------------------------------------===//
10892 // VSHUFPS - VSHUFPD Operations
10893 //===----------------------------------------------------------------------===//
10894
10895 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10896                         AVX512VLVectorVTInfo VTInfo_FP>{
10897   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10898                                     SchedWriteFShuffle>,
10899                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10900                                     AVX512AIi8Base, EVEX_4V;
10901 }
10902
10903 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10904 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10905
10906 //===----------------------------------------------------------------------===//
10907 // AVX-512 - Byte shift Left/Right
10908 //===----------------------------------------------------------------------===//
10909
10910 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10911 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10912                                Format MRMm, string OpcodeStr,
10913                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10914   def rr : AVX512<opc, MRMr,
10915              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10916              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10917              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
10918              Sched<[sched]>;
10919   def rm : AVX512<opc, MRMm,
10920            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10921            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10922            [(set _.RC:$dst,(_.VT (OpNode
10923                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
10924                                  (i8 imm:$src2))))]>,
10925            Sched<[sched.Folded, ReadAfterLd]>;
10926 }
10927
10928 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10929                                    Format MRMm, string OpcodeStr,
10930                                    X86SchedWriteWidths sched, Predicate prd>{
10931   let Predicates = [prd] in
10932     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10933                                  sched.ZMM, v64i8_info>, EVEX_V512;
10934   let Predicates = [prd, HasVLX] in {
10935     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10936                                     sched.YMM, v32i8x_info>, EVEX_V256;
10937     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10938                                     sched.XMM, v16i8x_info>, EVEX_V128;
10939   }
10940 }
10941 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
10942                                        SchedWriteShuffle, HasBWI>,
10943                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10944 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
10945                                        SchedWriteShuffle, HasBWI>,
10946                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10947
10948 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
10949                                 string OpcodeStr, X86FoldableSchedWrite sched,
10950                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
10951   def rr : AVX512BI<opc, MRMSrcReg,
10952              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
10953              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10954              [(set _dst.RC:$dst,(_dst.VT
10955                                 (OpNode (_src.VT _src.RC:$src1),
10956                                         (_src.VT _src.RC:$src2))))]>,
10957              Sched<[sched]>;
10958   def rm : AVX512BI<opc, MRMSrcMem,
10959            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10960            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10961            [(set _dst.RC:$dst,(_dst.VT
10962                               (OpNode (_src.VT _src.RC:$src1),
10963                               (_src.VT (bitconvert
10964                                         (_src.LdFrag addr:$src2))))))]>,
10965            Sched<[sched.Folded, ReadAfterLd]>;
10966 }
10967
10968 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
10969                                     string OpcodeStr, X86SchedWriteWidths sched,
10970                                     Predicate prd> {
10971   let Predicates = [prd] in
10972     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
10973                                   v8i64_info, v64i8_info>, EVEX_V512;
10974   let Predicates = [prd, HasVLX] in {
10975     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
10976                                      v4i64x_info, v32i8x_info>, EVEX_V256;
10977     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
10978                                      v2i64x_info, v16i8x_info>, EVEX_V128;
10979   }
10980 }
10981
10982 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
10983                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
10984
10985 // Transforms to swizzle an immediate to enable better matching when
10986 // memory operand isn't in the right place.
10987 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
10988   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
10989   uint8_t Imm = N->getZExtValue();
10990   // Swap bits 1/4 and 3/6.
10991   uint8_t NewImm = Imm & 0xa5;
10992   if (Imm & 0x02) NewImm |= 0x10;
10993   if (Imm & 0x10) NewImm |= 0x02;
10994   if (Imm & 0x08) NewImm |= 0x40;
10995   if (Imm & 0x40) NewImm |= 0x08;
10996   return getI8Imm(NewImm, SDLoc(N));
10997 }]>;
10998 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
10999   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11000   uint8_t Imm = N->getZExtValue();
11001   // Swap bits 2/4 and 3/5.
11002   uint8_t NewImm = Imm & 0xc3;
11003   if (Imm & 0x04) NewImm |= 0x10;
11004   if (Imm & 0x10) NewImm |= 0x04;
11005   if (Imm & 0x08) NewImm |= 0x20;
11006   if (Imm & 0x20) NewImm |= 0x08;
11007   return getI8Imm(NewImm, SDLoc(N));
11008 }]>;
11009 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11010   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11011   uint8_t Imm = N->getZExtValue();
11012   // Swap bits 1/2 and 5/6.
11013   uint8_t NewImm = Imm & 0x99;
11014   if (Imm & 0x02) NewImm |= 0x04;
11015   if (Imm & 0x04) NewImm |= 0x02;
11016   if (Imm & 0x20) NewImm |= 0x40;
11017   if (Imm & 0x40) NewImm |= 0x20;
11018   return getI8Imm(NewImm, SDLoc(N));
11019 }]>;
11020 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11021   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11022   uint8_t Imm = N->getZExtValue();
11023   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11024   uint8_t NewImm = Imm & 0x81;
11025   if (Imm & 0x02) NewImm |= 0x04;
11026   if (Imm & 0x04) NewImm |= 0x10;
11027   if (Imm & 0x08) NewImm |= 0x40;
11028   if (Imm & 0x10) NewImm |= 0x02;
11029   if (Imm & 0x20) NewImm |= 0x08;
11030   if (Imm & 0x40) NewImm |= 0x20;
11031   return getI8Imm(NewImm, SDLoc(N));
11032 }]>;
11033 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11034   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11035   uint8_t Imm = N->getZExtValue();
11036   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11037   uint8_t NewImm = Imm & 0x81;
11038   if (Imm & 0x02) NewImm |= 0x10;
11039   if (Imm & 0x04) NewImm |= 0x02;
11040   if (Imm & 0x08) NewImm |= 0x20;
11041   if (Imm & 0x10) NewImm |= 0x04;
11042   if (Imm & 0x20) NewImm |= 0x40;
11043   if (Imm & 0x40) NewImm |= 0x08;
11044   return getI8Imm(NewImm, SDLoc(N));
11045 }]>;
11046
11047 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11048                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11049                           string Name>{
11050   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11051   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11052                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11053                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11054                       (OpNode (_.VT _.RC:$src1),
11055                               (_.VT _.RC:$src2),
11056                               (_.VT _.RC:$src3),
11057                               (i8 imm:$src4)), 1, 1>,
11058                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11059   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11060                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11061                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11062                     (OpNode (_.VT _.RC:$src1),
11063                             (_.VT _.RC:$src2),
11064                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11065                             (i8 imm:$src4)), 1, 0>,
11066                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11067                     Sched<[sched.Folded, ReadAfterLd]>;
11068   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11069                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11070                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11071                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11072                     (OpNode (_.VT _.RC:$src1),
11073                             (_.VT _.RC:$src2),
11074                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11075                             (i8 imm:$src4)), 1, 0>, EVEX_B,
11076                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11077                     Sched<[sched.Folded, ReadAfterLd]>;
11078   }// Constraints = "$src1 = $dst"
11079
11080   // Additional patterns for matching passthru operand in other positions.
11081   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11082                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11083                    _.RC:$src1)),
11084             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11085              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11086   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11087                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11088                    _.RC:$src1)),
11089             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11090              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11091
11092   // Additional patterns for matching loads in other positions.
11093   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11094                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11095             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11096                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11097   def : Pat<(_.VT (OpNode _.RC:$src1,
11098                           (bitconvert (_.LdFrag addr:$src3)),
11099                           _.RC:$src2, (i8 imm:$src4))),
11100             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11101                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11102
11103   // Additional patterns for matching zero masking with loads in other
11104   // positions.
11105   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11106                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11107                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11108                    _.ImmAllZerosV)),
11109             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11110              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11111   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11112                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11113                     _.RC:$src2, (i8 imm:$src4)),
11114                    _.ImmAllZerosV)),
11115             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11116              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11117
11118   // Additional patterns for matching masked loads with different
11119   // operand orders.
11120   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11121                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11122                     _.RC:$src2, (i8 imm:$src4)),
11123                    _.RC:$src1)),
11124             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11125              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11126   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11127                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11128                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11129                    _.RC:$src1)),
11130             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11131              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11132   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11133                    (OpNode _.RC:$src2, _.RC:$src1,
11134                     (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11135                    _.RC:$src1)),
11136             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11137              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11138   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11139                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11140                     _.RC:$src1, (i8 imm:$src4)),
11141                    _.RC:$src1)),
11142             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11143              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11144   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11145                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11146                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11147                    _.RC:$src1)),
11148             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11149              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11150
11151   // Additional patterns for matching broadcasts in other positions.
11152   def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11153                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11154             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11155                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11156   def : Pat<(_.VT (OpNode _.RC:$src1,
11157                           (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11158                           _.RC:$src2, (i8 imm:$src4))),
11159             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11160                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11161
11162   // Additional patterns for matching zero masking with broadcasts in other
11163   // positions.
11164   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11165                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11166                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11167                    _.ImmAllZerosV)),
11168             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11169              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11170              (VPTERNLOG321_imm8 imm:$src4))>;
11171   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11172                    (OpNode _.RC:$src1,
11173                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11174                     _.RC:$src2, (i8 imm:$src4)),
11175                    _.ImmAllZerosV)),
11176             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11177              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11178              (VPTERNLOG132_imm8 imm:$src4))>;
11179
11180   // Additional patterns for matching masked broadcasts with different
11181   // operand orders.
11182   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11183                    (OpNode _.RC:$src1,
11184                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11185                     _.RC:$src2, (i8 imm:$src4)),
11186                    _.RC:$src1)),
11187             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11188              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11189   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11190                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11191                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11192                    _.RC:$src1)),
11193             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11194              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11195   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11196                    (OpNode _.RC:$src2, _.RC:$src1,
11197                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11198                     (i8 imm:$src4)), _.RC:$src1)),
11199             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11200              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11201   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11202                    (OpNode _.RC:$src2,
11203                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11204                     _.RC:$src1, (i8 imm:$src4)),
11205                    _.RC:$src1)),
11206             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11207              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11208   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11209                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11210                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11211                    _.RC:$src1)),
11212             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11213              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11214 }
11215
11216 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11217                                  AVX512VLVectorVTInfo _> {
11218   let Predicates = [HasAVX512] in
11219     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11220                                _.info512, NAME>, EVEX_V512;
11221   let Predicates = [HasAVX512, HasVLX] in {
11222     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11223                                _.info128, NAME>, EVEX_V128;
11224     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11225                                _.info256, NAME>, EVEX_V256;
11226   }
11227 }
11228
11229 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11230                                         avx512vl_i32_info>;
11231 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11232                                         avx512vl_i64_info>, VEX_W;
11233
11234 // Patterns to implement vnot using vpternlog instead of creating all ones
11235 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11236 // so that the result is only dependent on src0. But we use the same source
11237 // for all operands to prevent a false dependency.
11238 // TODO: We should maybe have a more generalized algorithm for folding to
11239 // vpternlog.
11240 let Predicates = [HasAVX512] in {
11241   def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11242             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11243 }
11244
11245 let Predicates = [HasAVX512, NoVLX] in {
11246   def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11247             (EXTRACT_SUBREG
11248              (VPTERNLOGQZrri
11249               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11250               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11251               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11252               (i8 15)), sub_xmm)>;
11253   def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11254             (EXTRACT_SUBREG
11255              (VPTERNLOGQZrri
11256               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11257               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11258               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11259               (i8 15)), sub_ymm)>;
11260 }
11261
11262 let Predicates = [HasVLX] in {
11263   def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11264             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11265   def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11266             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11267 }
11268
11269 //===----------------------------------------------------------------------===//
11270 // AVX-512 - FixupImm
11271 //===----------------------------------------------------------------------===//
11272
11273 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11274                                   X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11275   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11276     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11277                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11278                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11279                         (OpNode (_.VT _.RC:$src1),
11280                                 (_.VT _.RC:$src2),
11281                                 (_.IntVT _.RC:$src3),
11282                                 (i32 imm:$src4),
11283                                 (i32 FROUND_CURRENT))>, Sched<[sched]>;
11284     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11285                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11286                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11287                       (OpNode (_.VT _.RC:$src1),
11288                               (_.VT _.RC:$src2),
11289                               (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
11290                               (i32 imm:$src4),
11291                               (i32 FROUND_CURRENT))>,
11292                       Sched<[sched.Folded, ReadAfterLd]>;
11293     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11294                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11295                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11296                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11297                       (OpNode (_.VT _.RC:$src1),
11298                               (_.VT _.RC:$src2),
11299                               (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11300                               (i32 imm:$src4),
11301                               (i32 FROUND_CURRENT))>,
11302                     EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11303   } // Constraints = "$src1 = $dst"
11304 }
11305
11306 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11307                                       SDNode OpNode, X86FoldableSchedWrite sched,
11308                                       X86VectorVTInfo _>{
11309 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11310   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11311                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11312                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11313                       "$src2, $src3, {sae}, $src4",
11314                       (OpNode (_.VT _.RC:$src1),
11315                                 (_.VT _.RC:$src2),
11316                                 (_.IntVT _.RC:$src3),
11317                                 (i32 imm:$src4),
11318                                 (i32 FROUND_NO_EXC))>,
11319                       EVEX_B, Sched<[sched]>;
11320   }
11321 }
11322
11323 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11324                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11325                                   X86VectorVTInfo _src3VT> {
11326   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11327       ExeDomain = _.ExeDomain in {
11328     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11329                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11330                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11331                       (OpNode (_.VT _.RC:$src1),
11332                               (_.VT _.RC:$src2),
11333                               (_src3VT.VT _src3VT.RC:$src3),
11334                               (i32 imm:$src4),
11335                               (i32 FROUND_CURRENT))>, Sched<[sched]>;
11336     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11337                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11338                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11339                       "$src2, $src3, {sae}, $src4",
11340                       (OpNode (_.VT _.RC:$src1),
11341                               (_.VT _.RC:$src2),
11342                               (_src3VT.VT _src3VT.RC:$src3),
11343                               (i32 imm:$src4),
11344                               (i32 FROUND_NO_EXC))>,
11345                       EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11346     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11347                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11348                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11349                      (OpNode (_.VT _.RC:$src1),
11350                              (_.VT _.RC:$src2),
11351                              (_src3VT.VT (scalar_to_vector
11352                                        (_src3VT.ScalarLdFrag addr:$src3))),
11353                              (i32 imm:$src4),
11354                              (i32 FROUND_CURRENT))>,
11355                      Sched<[sched.Folded, ReadAfterLd]>;
11356   }
11357 }
11358
11359 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11360                                       AVX512VLVectorVTInfo _Vec> {
11361   let Predicates = [HasAVX512] in
11362     defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11363                                        _Vec.info512>,
11364                 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11365                                 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
11366   let Predicates = [HasAVX512, HasVLX] in {
11367     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11368                             _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
11369     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11370                             _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
11371   }
11372 }
11373
11374 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11375                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11376                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11377 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11378                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11379                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11380 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info>,
11381                          EVEX_CD8<32, CD8VF>;
11382 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info>,
11383                          EVEX_CD8<64, CD8VF>, VEX_W;
11384
11385 // Patterns used to select SSE scalar fp arithmetic instructions from
11386 // either:
11387 //
11388 // (1) a scalar fp operation followed by a blend
11389 //
11390 // The effect is that the backend no longer emits unnecessary vector
11391 // insert instructions immediately after SSE scalar fp instructions
11392 // like addss or mulss.
11393 //
11394 // For example, given the following code:
11395 //   __m128 foo(__m128 A, __m128 B) {
11396 //     A[0] += B[0];
11397 //     return A;
11398 //   }
11399 //
11400 // Previously we generated:
11401 //   addss %xmm0, %xmm1
11402 //   movss %xmm1, %xmm0
11403 //
11404 // We now generate:
11405 //   addss %xmm1, %xmm0
11406 //
11407 // (2) a vector packed single/double fp operation followed by a vector insert
11408 //
11409 // The effect is that the backend converts the packed fp instruction
11410 // followed by a vector insert into a single SSE scalar fp instruction.
11411 //
11412 // For example, given the following code:
11413 //   __m128 foo(__m128 A, __m128 B) {
11414 //     __m128 C = A + B;
11415 //     return (__m128) {c[0], a[1], a[2], a[3]};
11416 //   }
11417 //
11418 // Previously we generated:
11419 //   addps %xmm0, %xmm1
11420 //   movss %xmm1, %xmm0
11421 //
11422 // We now generate:
11423 //   addss %xmm1, %xmm0
11424
11425 // TODO: Some canonicalization in lowering would simplify the number of
11426 // patterns we have to try to match.
11427 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11428                                            X86VectorVTInfo _, PatLeaf ZeroFP> {
11429   let Predicates = [HasAVX512] in {
11430     // extracted scalar math op with insert via movss
11431     def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector
11432           (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11433           _.FRC:$src))))),
11434       (!cast<I>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11435           (COPY_TO_REGCLASS _.FRC:$src, VR128X))>;
11436
11437     // vector math op with insert via movss
11438     def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst),
11439           (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))),
11440       (!cast<I>("V"#OpcPrefix#Zrr_Int) _.VT:$dst, _.VT:$src)>;
11441
11442     // extracted masked scalar math op with insert via movss
11443     def : Pat<(MoveNode (_.VT VR128X:$src1),
11444                (scalar_to_vector
11445                 (X86selects VK1WM:$mask,
11446                             (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11447                                 _.FRC:$src2),
11448                             _.FRC:$src0))),
11449       (!cast<I>("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS _.FRC:$src0, VR128X),
11450           VK1WM:$mask, _.VT:$src1,
11451           (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
11452     
11453     // extracted masked scalar math op with insert via movss
11454     def : Pat<(MoveNode (_.VT VR128X:$src1),
11455                (scalar_to_vector
11456                 (X86selects VK1WM:$mask,
11457                             (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11458                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
11459       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
11460           VK1WM:$mask, _.VT:$src1,
11461           (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>;
11462   }
11463 }
11464
11465 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11466 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11467 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11468 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11469
11470 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11471 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11472 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11473 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11474
11475
11476 //===----------------------------------------------------------------------===//
11477 // AES instructions
11478 //===----------------------------------------------------------------------===//
11479
11480 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11481   let Predicates = [HasVLX, HasVAES] in {
11482     defm Z128 : AESI_binop_rm_int<Op, OpStr,
11483                                   !cast<Intrinsic>(IntPrefix),
11484                                   loadv2i64, 0, VR128X, i128mem>,
11485                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11486     defm Z256 : AESI_binop_rm_int<Op, OpStr,
11487                                   !cast<Intrinsic>(IntPrefix##"_256"),
11488                                   loadv4i64, 0, VR256X, i256mem>,
11489                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11490     }
11491     let Predicates = [HasAVX512, HasVAES] in
11492     defm Z    : AESI_binop_rm_int<Op, OpStr,
11493                                   !cast<Intrinsic>(IntPrefix##"_512"),
11494                                   loadv8i64, 0, VR512, i512mem>,
11495                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11496 }
11497
11498 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11499 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11500 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11501 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11502
11503 //===----------------------------------------------------------------------===//
11504 // PCLMUL instructions - Carry less multiplication
11505 //===----------------------------------------------------------------------===//
11506
11507 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11508 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11509                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11510
11511 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11512 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11513                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11514
11515 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11516                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11517                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
11518 }
11519
11520 // Aliases
11521 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11522 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11523 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11524
11525 //===----------------------------------------------------------------------===//
11526 // VBMI2
11527 //===----------------------------------------------------------------------===//
11528
11529 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11530                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11531   let Constraints = "$src1 = $dst",
11532       ExeDomain   = VTI.ExeDomain in {
11533     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11534                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11535                 "$src3, $src2", "$src2, $src3",
11536                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11537                 AVX512FMA3Base, Sched<[sched]>;
11538     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11539                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11540                 "$src3, $src2", "$src2, $src3",
11541                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11542                         (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11543                 AVX512FMA3Base,
11544                 Sched<[sched.Folded, ReadAfterLd]>;
11545   }
11546 }
11547
11548 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11549                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11550          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11551   let Constraints = "$src1 = $dst",
11552       ExeDomain   = VTI.ExeDomain in
11553   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11554               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11555               "${src3}"##VTI.BroadcastStr##", $src2",
11556               "$src2, ${src3}"##VTI.BroadcastStr,
11557               (OpNode VTI.RC:$src1, VTI.RC:$src2,
11558                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11559               AVX512FMA3Base, EVEX_B,
11560               Sched<[sched.Folded, ReadAfterLd]>;
11561 }
11562
11563 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11564                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11565   let Predicates = [HasVBMI2] in
11566   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11567                                    EVEX_V512;
11568   let Predicates = [HasVBMI2, HasVLX] in {
11569     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11570                                    EVEX_V256;
11571     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11572                                    EVEX_V128;
11573   }
11574 }
11575
11576 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11577                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11578   let Predicates = [HasVBMI2] in
11579   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11580                                     EVEX_V512;
11581   let Predicates = [HasVBMI2, HasVLX] in {
11582     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11583                                     EVEX_V256;
11584     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11585                                     EVEX_V128;
11586   }
11587 }
11588 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11589                            SDNode OpNode, X86SchedWriteWidths sched> {
11590   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11591              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11592   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11593              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11594   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11595              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11596 }
11597
11598 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11599                            SDNode OpNode, X86SchedWriteWidths sched> {
11600   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11601              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11602              VEX_W, EVEX_CD8<16, CD8VF>;
11603   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11604              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11605   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11606              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11607 }
11608
11609 // Concat & Shift
11610 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11611 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11612 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11613 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11614
11615 // Compress
11616 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11617                                          avx512vl_i8_info, HasVBMI2>, EVEX,
11618                                          NotMemoryFoldable;
11619 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11620                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11621                                           NotMemoryFoldable;
11622 // Expand
11623 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11624                                       avx512vl_i8_info, HasVBMI2>, EVEX;
11625 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11626                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11627
11628 //===----------------------------------------------------------------------===//
11629 // VNNI
11630 //===----------------------------------------------------------------------===//
11631
11632 let Constraints = "$src1 = $dst" in
11633 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11634                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11635   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11636                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11637                                    "$src3, $src2", "$src2, $src3",
11638                                    (VTI.VT (OpNode VTI.RC:$src1,
11639                                             VTI.RC:$src2, VTI.RC:$src3))>,
11640                                    EVEX_4V, T8PD, Sched<[sched]>;
11641   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11642                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11643                                    "$src3, $src2", "$src2, $src3",
11644                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11645                                             (VTI.VT (bitconvert
11646                                                      (VTI.LdFrag addr:$src3)))))>,
11647                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11648                                    Sched<[sched.Folded, ReadAfterLd]>;
11649   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11650                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11651                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11652                                    "$src2, ${src3}"##VTI.BroadcastStr,
11653                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
11654                                     (VTI.VT (X86VBroadcast
11655                                              (VTI.ScalarLdFrag addr:$src3))))>,
11656                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11657                                    T8PD, Sched<[sched.Folded, ReadAfterLd]>;
11658 }
11659
11660 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11661                        X86SchedWriteWidths sched> {
11662   let Predicates = [HasVNNI] in
11663   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11664   let Predicates = [HasVNNI, HasVLX] in {
11665     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11666     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11667   }
11668 }
11669
11670 // FIXME: Is there a better scheduler class for VPDP?
11671 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11672 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11673 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11674 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11675
11676 //===----------------------------------------------------------------------===//
11677 // Bit Algorithms
11678 //===----------------------------------------------------------------------===//
11679
11680 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11681 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11682                                    avx512vl_i8_info, HasBITALG>;
11683 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11684                                    avx512vl_i16_info, HasBITALG>, VEX_W;
11685
11686 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11687 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11688
11689 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11690   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11691                                 (ins VTI.RC:$src1, VTI.RC:$src2),
11692                                 "vpshufbitqmb",
11693                                 "$src2, $src1", "$src1, $src2",
11694                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11695                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11696                                 Sched<[sched]>;
11697   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11698                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11699                                 "vpshufbitqmb",
11700                                 "$src2, $src1", "$src1, $src2",
11701                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11702                                 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11703                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11704                                 Sched<[sched.Folded, ReadAfterLd]>;
11705 }
11706
11707 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11708   let Predicates = [HasBITALG] in
11709   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11710   let Predicates = [HasBITALG, HasVLX] in {
11711     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11712     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11713   }
11714 }
11715
11716 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11717 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11718
11719 //===----------------------------------------------------------------------===//
11720 // GFNI
11721 //===----------------------------------------------------------------------===//
11722
11723 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11724                                    X86SchedWriteWidths sched> {
11725   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11726   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11727                                 EVEX_V512;
11728   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11729     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11730                                 EVEX_V256;
11731     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11732                                 EVEX_V128;
11733   }
11734 }
11735
11736 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11737                                           SchedWriteVecALU>,
11738                                           EVEX_CD8<8, CD8VF>, T8PD;
11739
11740 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11741                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11742                                       X86VectorVTInfo BcstVTI>
11743            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11744   let ExeDomain = VTI.ExeDomain in
11745   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11746                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11747                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11748                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11749                 (OpNode (VTI.VT VTI.RC:$src1),
11750                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11751                  (i8 imm:$src3))>, EVEX_B,
11752                  Sched<[sched.Folded, ReadAfterLd]>;
11753 }
11754
11755 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11756                                      X86SchedWriteWidths sched> {
11757   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11758   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11759                                            v64i8_info, v8i64_info>, EVEX_V512;
11760   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11761     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11762                                            v32i8x_info, v4i64x_info>, EVEX_V256;
11763     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11764                                            v16i8x_info, v2i64x_info>, EVEX_V128;
11765   }
11766 }
11767
11768 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11769                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
11770                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11771 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11772                          X86GF2P8affineqb, SchedWriteVecIMul>,
11773                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11774
11775
11776 //===----------------------------------------------------------------------===//
11777 // AVX5124FMAPS
11778 //===----------------------------------------------------------------------===//
11779
11780 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11781     Constraints = "$src1 = $dst" in {
11782 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11783                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11784                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
11785                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11786                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11787
11788 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11789                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11790                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11791                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11792                      Sched<[SchedWriteFMA.ZMM.Folded]>;
11793
11794 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11795                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
11796                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
11797                     []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11798                     Sched<[SchedWriteFMA.Scl.Folded]>;
11799
11800 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11801                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11802                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11803                      []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11804                      Sched<[SchedWriteFMA.Scl.Folded]>;
11805 }
11806
11807 //===----------------------------------------------------------------------===//
11808 // AVX5124VNNIW
11809 //===----------------------------------------------------------------------===//
11810
11811 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11812     Constraints = "$src1 = $dst" in {
11813 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11814                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11815                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11816                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11817                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11818
11819 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11820                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11821                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11822                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11823                      Sched<[SchedWriteFMA.ZMM.Folded]>;
11824 }
11825