const X86Subtarget &Subtarget) {
assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) &&
"Unexpected PACK opcode");
+ assert(DstVT.isVector() && "VT not a vector?");
// Requires SSE2 but AVX512 has fast vector truncate.
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512() || !DstVT.isVector())
+ if (!Subtarget.hasSSE2())
return SDValue();
EVT SrcVT = In.getValueType();
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
}
if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
+ !Subtarget.hasAVX512() &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
-; AVX256-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
-; AVX256-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
; AVX256VL-NEXT: kshiftrw $8, %k0, %k2
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
-; AVX256VL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX256VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX256VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX256VL-NEXT: vzeroupper
; AVX256VL-NEXT: retq
;
; AVX256VL-NEXT: kshiftrw $8, %k0, %k2
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
-; AVX256VL-NEXT: vpsrlw $8, %xmm1, %xmm1
; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0
-; AVX256VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX256VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX256VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX256VL-NEXT: retq
;