// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
+
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
- unsigned NumBitsToLoad = 8;
- MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
- SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
+ SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),
Ld->getBasePtr(),
Ld->getMemOperand());
// Replace chain users with the new chain.
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
- SDValue BitVec = DAG.getBitcast(MaskVT, Load);
+ SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);
if (NumElts == 8)
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
- // we should take care to v4i1 and v2i1
+ if (Subtarget.hasVLX()) {
+ // Extract to v4i1/v2i1.
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,
+ DAG.getIntPtrConstant(0, dl));
+ // Finally, do a normal sign-extend to the desired register.
+ return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
+ }
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
%2 = sext <2 x i1> %1 to <2 x i64>
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = sext <4 x i1> %1 to <4 x i32>
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
+; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = sext <4 x i1> %1 to <4 x i64>
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512VLBW-NEXT: vzeroupper
+; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
%2 = zext <2 x i1> %1 to <2 x i64>
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512VLBW-NEXT: vzeroupper
+; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i32>
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512VLBW-NEXT: kmovd %eax, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512VLBW-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
+; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i64>
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
ret <2 x i1> %1
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: kmovd %eax, %k1
-; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
ret <4 x i1> %1