//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>;
+ }
}
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
// src2 is always 128-bit
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
EVEX_4V;
+ }
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
//===-------------------------------------------------------------------===//
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
+ }
}
multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
+let ExeDomain = SSEPackedSingle in
defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
avx512vl_i32_info>;
+let ExeDomain = SSEPackedDouble in
defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
+ X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
// op(reg_vec2,broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
+ X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
-
+ let ExeDomain = DestInfo.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>;
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
X86VectorVTInfo _>:
avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+ let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
// op(reg_vec2,mem_scalar,imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
-
+ X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
[]>;
}
+ }
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
HasBWI>, EVEX_4V;
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- let Constraints = "$src1 = $dst" in {
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
//===----------------------------------------------------------------------===//
multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- let Constraints = "$src1 = $dst" in {
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
}
multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _>{
-let Constraints = "$src1 = $dst" in {
+ SDNode OpNode, X86VectorVTInfo _>{
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
- let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
+ let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
+ ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
: Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
class AVX512AIi8Base : TAPD {
- Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm2, %zmm3
+; CHECK-NEXT: vmovapd %zmm2, %zmm3
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vmovapd %zmm0, %zmm5
; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm5
; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm4
; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vmovaps %zmm0, %zmm5
; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovaps %zmm0, %zmm4
; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT: vmovapd %zmm0, %zmm4
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
-; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
; CHECK: ## BB#0:
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT: vmovapd %zmm0, %zmm3
; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
-; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm5
+; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; CHECK-NEXT: vmovapd %zmm0, %zmm5
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4]
; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04]
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03]
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04]
; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05]
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05]
; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe8]
+; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xe8]
; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04]
; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03]
; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd]
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05]
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm4 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05]
; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05]
; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8]
+; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05]
-; CHECK-NEXT: vmovdqa64 %ymm0, %ymm4 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xe0]
+; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05]
; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqu64 (%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; AVX512VL-NEXT: movl %eax, {{[0-9]+}}(%rsp) # 4-byte Spill
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r13w
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r14w
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %r15w
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: shll $16, %ebp
-; AVX512VL-NEXT: vmovdqa64 (%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r12d
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r13d
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %ebp
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqu64 {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
+; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovapd {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r14d
-; AVX512VL-NEXT: vmovdqa64 {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
+; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512VL-NEXT: callq __gnu_f2h_ieee
; AVX512VL-NEXT: movl %eax, %r15d
}
define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind {
-; AVX1-LABEL: cvt_2f64_to_2i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: subq $16, %rsp
-; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movw %ax, %bx
-; AVX1-NEXT: shll $16, %ebx
-; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movzwl %ax, %eax
-; AVX1-NEXT: orl %ebx, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: addq $16, %rsp
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: cvt_2f64_to_2i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: subq $16, %rsp
-; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movw %ax, %bx
-; AVX2-NEXT: shll $16, %ebx
-; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movzwl %ax, %eax
-; AVX2-NEXT: orl %ebx, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: addq $16, %rsp
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: cvt_2f64_to_2i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %rbx
-; AVX512F-NEXT: subq $16, %rsp
-; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movw %ax, %bx
-; AVX512F-NEXT: shll $16, %ebx
-; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movzwl %ax, %eax
-; AVX512F-NEXT: orl %ebx, %eax
-; AVX512F-NEXT: vmovd %eax, %xmm0
-; AVX512F-NEXT: addq $16, %rsp
-; AVX512F-NEXT: popq %rbx
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: cvt_2f64_to_2i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: pushq %rbx
-; AVX512VL-NEXT: subq $16, %rsp
-; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movw %ax, %bx
-; AVX512VL-NEXT: shll $16, %ebx
-; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movzwl %ax, %eax
-; AVX512VL-NEXT: orl %ebx, %eax
-; AVX512VL-NEXT: vmovd %eax, %xmm0
-; AVX512VL-NEXT: addq $16, %rsp
-; AVX512VL-NEXT: popq %rbx
-; AVX512VL-NEXT: retq
+; ALL-LABEL: cvt_2f64_to_2i16:
+; ALL: # BB#0:
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: subq $16, %rsp
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
+; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movw %ax, %bx
+; ALL-NEXT: shll $16, %ebx
+; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movzwl %ax, %eax
+; ALL-NEXT: orl %ebx, %eax
+; ALL-NEXT: vmovd %eax, %xmm0
+; ALL-NEXT: addq $16, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: retq
%1 = fptrunc <2 x double> %a0 to <2 x half>
%2 = bitcast <2 x half> %1 to <2 x i16>
ret <2 x i16> %2
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $96, %rsp
-; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rsp) # 64-byte Spill
+; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
}
define void @store_cvt_2f64_to_2i16(<2 x double> %a0, <2 x i16>* %a1) nounwind {
-; AVX1-LABEL: store_cvt_2f64_to_2i16:
-; AVX1: # BB#0:
-; AVX1-NEXT: pushq %rbp
-; AVX1-NEXT: pushq %rbx
-; AVX1-NEXT: subq $24, %rsp
-; AVX1-NEXT: movq %rdi, %rbx
-; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movl %eax, %ebp
-; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX1-NEXT: callq __truncdfhf2
-; AVX1-NEXT: movw %ax, (%rbx)
-; AVX1-NEXT: movw %bp, 2(%rbx)
-; AVX1-NEXT: addq $24, %rsp
-; AVX1-NEXT: popq %rbx
-; AVX1-NEXT: popq %rbp
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: store_cvt_2f64_to_2i16:
-; AVX2: # BB#0:
-; AVX2-NEXT: pushq %rbp
-; AVX2-NEXT: pushq %rbx
-; AVX2-NEXT: subq $24, %rsp
-; AVX2-NEXT: movq %rdi, %rbx
-; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movl %eax, %ebp
-; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX2-NEXT: callq __truncdfhf2
-; AVX2-NEXT: movw %ax, (%rbx)
-; AVX2-NEXT: movw %bp, 2(%rbx)
-; AVX2-NEXT: addq $24, %rsp
-; AVX2-NEXT: popq %rbx
-; AVX2-NEXT: popq %rbp
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: store_cvt_2f64_to_2i16:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: pushq %rbx
-; AVX512F-NEXT: subq $24, %rsp
-; AVX512F-NEXT: movq %rdi, %rbx
-; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movl %eax, %ebp
-; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512F-NEXT: callq __truncdfhf2
-; AVX512F-NEXT: movw %ax, (%rbx)
-; AVX512F-NEXT: movw %bp, 2(%rbx)
-; AVX512F-NEXT: addq $24, %rsp
-; AVX512F-NEXT: popq %rbx
-; AVX512F-NEXT: popq %rbp
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: store_cvt_2f64_to_2i16:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: pushq %rbp
-; AVX512VL-NEXT: pushq %rbx
-; AVX512VL-NEXT: subq $24, %rsp
-; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
-; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movl %eax, %ebp
-; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
-; AVX512VL-NEXT: callq __truncdfhf2
-; AVX512VL-NEXT: movw %ax, (%rbx)
-; AVX512VL-NEXT: movw %bp, 2(%rbx)
-; AVX512VL-NEXT: addq $24, %rsp
-; AVX512VL-NEXT: popq %rbx
-; AVX512VL-NEXT: popq %rbp
-; AVX512VL-NEXT: retq
+; ALL-LABEL: store_cvt_2f64_to_2i16:
+; ALL: # BB#0:
+; ALL-NEXT: pushq %rbp
+; ALL-NEXT: pushq %rbx
+; ALL-NEXT: subq $24, %rsp
+; ALL-NEXT: movq %rdi, %rbx
+; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
+; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movl %eax, %ebp
+; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; ALL-NEXT: callq __truncdfhf2
+; ALL-NEXT: movw %ax, (%rbx)
+; ALL-NEXT: movw %bp, 2(%rbx)
+; ALL-NEXT: addq $24, %rsp
+; ALL-NEXT: popq %rbx
+; ALL-NEXT: popq %rbp
+; ALL-NEXT: retq
%1 = fptrunc <2 x double> %a0 to <2 x half>
%2 = bitcast <2 x half> %1 to <2 x i16>
store <2 x i16> %2, <2 x i16>* %a1
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $88, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu64 %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $32, %rsp
; AVX512VL-NEXT: movq %rdi, %r14
-; AVX512VL-NEXT: vmovdqu64 %ymm0, (%rsp) # 32-byte Spill
+; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bp
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $200, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
-; AVX512VL-NEXT: vmovdqu64 %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
+; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
}
define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3254:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3254:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3254:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3254:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_3276:
-; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_3276:
-; AVX2: # BB#0:
-; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX2-NEXT: retq
-;
-; AVX512VL-LABEL: shuffle_v4f64_3276:
-; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; AVX512VL-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_3276:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x double> %shuffle
}
define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) {
; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT: vmovaps {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
%c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00000010:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00000010:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00000200:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00000200:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00003000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00003000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00040000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00040000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00500000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00500000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_06000000:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_06000000:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00112233:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00112233:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_00001111:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00001111:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
;
; AVX512F-LABEL: shuffle_v8f64_00015444:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00015444:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_00204644:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00204644:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_03004474:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_03004474:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_10004444:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_10004444:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_22006446:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_22006446:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
;
; AVX512F-LABEL: shuffle_v8f64_33307474:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_33307474:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_32104567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_32104567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
;
; AVX512F-LABEL: shuffle_v8f64_00236744:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00236744:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_00226644:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00226644:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_002u6u44:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
;
; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
; CHECK-LABEL: combine_permvar_8f64_identity_mask:
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
+; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,6,5,4,3,2,1,0]
; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
+; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [7,14,5,12,3,10,1,8]
; CHECK-NEXT: vpermpd %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x1, i8 %m)
%res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %res0, <8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, i8 %m)
; CHECK: # BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4]
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: vmovapd %zmm1, %zmm0
; CHECK-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> <i64 3, i64 2, i64 1, i64 undef, i64 undef, i64 6, i64 5, i64 4>, <8 x double> %x1, i8 %m)
ret <8 x double> %1