From 6bab3515ebc5108e9f8a418940af58d3b553af4f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 5 Oct 2018 18:13:36 +0000 Subject: [PATCH] [X86] Don't promote i16 compares to i32 if the immediate will fit in 8 bits. The comments in this code say we were trying to avoid 16-bit immediates, but if the immediate fits in 8-bits this isn't an issue. This avoids creating a zero extend that probably won't go away. The movmskb related changes are interesting. The movmskb instruction writes a 32-bit result, but fills the upper bits with 0. So the zero_extend we were previously emitting was free, but we turned a -1 immediate that would fit in 8-bits into a 32-bit immediate so it was still bad. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343871 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++- .../CodeGen/X86/lack-of-signed-truncation-check.ll | 12 ++---- test/CodeGen/X86/movmsk-cmp.ll | 48 +++++++++++----------- test/CodeGen/X86/sat-add.ll | 3 +- test/CodeGen/X86/setcc.ll | 4 +- test/CodeGen/X86/signed-truncation-check.ll | 6 +-- test/CodeGen/X86/x86-shrink-wrapping.ll | 3 +- 7 files changed, 39 insertions(+), 44 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 91fa216c053..f63fd3e0463 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18435,8 +18435,11 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { // Only promote the compare up to I32 if it is a 16 bit operation // with an immediate. 16 bit immediates are to be avoided. - if ((Op0.getValueType() == MVT::i16 && - (isa(Op0) || isa(Op1))) && + if (Op0.getValueType() == MVT::i16 && + ((isa(Op0) && + !cast(Op0)->getAPIntValue().isSignedIntN(8)) || + (isa(Op1) && + !cast(Op1)->getAPIntValue().isSignedIntN(8))) && !DAG.getMachineFunction().getFunction().optForMinSize() && !Subtarget.isAtom()) { unsigned ExtendOp = diff --git a/test/CodeGen/X86/lack-of-signed-truncation-check.ll b/test/CodeGen/X86/lack-of-signed-truncation-check.ll index 4e69e41c813..6509b76bb1d 100644 --- a/test/CodeGen/X86/lack-of-signed-truncation-check.ll +++ b/test/CodeGen/X86/lack-of-signed-truncation-check.ll @@ -508,16 +508,14 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl $128, %eax ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $127, %eax +; X86-NEXT: cmpw $127, %ax ; X86-NEXT: seta %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i8_i16: ; X64: # %bb.0: ; X64-NEXT: subl $-128, %edi -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $127, %eax +; X64-NEXT: cmpw $127, %di ; X64-NEXT: seta %al ; X64-NEXT: retq %tmp0 = add i16 %x, 128 ; 1U << (8-1) @@ -600,16 +598,14 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $8, %eax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $15, %eax +; X86-NEXT: cmpw $15, %ax ; X86-NEXT: seta %al ; X86-NEXT: retl ; ; X64-LABEL: add_ugecmp_bad_i16_i4: ; X64: # %bb.0: ; X64-NEXT: addl $8, %edi -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $15, %eax +; X64-NEXT: cmpw $15, %di ; X64-NEXT: seta %al ; X64-NEXT: retq %tmp0 = add i16 %x, 8 ; 1U << (4-1) diff --git a/test/CodeGen/X86/movmsk-cmp.ll b/test/CodeGen/X86/movmsk-cmp.ll index fac16dcdc59..44d321d4916 100644 --- a/test/CodeGen/X86/movmsk-cmp.ll +++ b/test/CodeGen/X86/movmsk-cmp.ll @@ -8,14 +8,14 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) { ; SSE2-LABEL: allones_v16i8_sign: ; SSE2: # %bb.0: ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; ; AVX-LABEL: allones_v16i8_sign: ; AVX: # %bb.0: ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX-NEXT: cmpw $-1, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; @@ -352,7 +352,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) { ; SSE2-NEXT: pcmpgtw %xmm0, %xmm2 ; SSE2-NEXT: packsswb %xmm3, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -364,7 +364,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) { ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -376,7 +376,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -719,7 +719,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) { ; SSE2-NEXT: packssdw %xmm2, %xmm4 ; SSE2-NEXT: packsswb %xmm3, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -736,7 +736,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) { ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -751,7 +751,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1126,7 +1126,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) { ; SSE2: # %bb.0: ; SSE2-NEXT: psllw $7, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1134,7 +1134,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) { ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX-NEXT: cmpw $-1, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; @@ -1528,7 +1528,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) { ; SSE2-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1544,7 +1544,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) { ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1557,7 +1557,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1973,7 +1973,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) { ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: packsswb %xmm2, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1998,7 +1998,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) { ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2015,7 +2015,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2458,7 +2458,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) { ; SSE2: # %bb.0: ; SSE2-NEXT: psllw $5, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -2466,7 +2466,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) { ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX-NEXT: cmpw $-1, %ax ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; @@ -2860,7 +2860,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) { ; SSE2-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE2-NEXT: packsswb %xmm1, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -2876,7 +2876,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) { ; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2889,7 +2889,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -3305,7 +3305,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) { ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: packsswb %xmm2, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -3330,7 +3330,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) { ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX1-NEXT: cmpw $-1, %ax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -3347,7 +3347,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; AVX2-NEXT: cmpw $-1, %ax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq diff --git a/test/CodeGen/X86/sat-add.ll b/test/CodeGen/X86/sat-add.ll index 1f2b69f8afe..27683532592 100644 --- a/test/CodeGen/X86/sat-add.ll +++ b/test/CodeGen/X86/sat-add.ll @@ -60,8 +60,7 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { ; ANY-LABEL: unsigned_sat_constant_i16_using_min: ; ANY: # %bb.0: -; ANY-NEXT: movzwl %di, %eax -; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5 +; ANY-NEXT: cmpw $-43, %di ; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5 ; ANY-NEXT: cmovbl %edi, %eax ; ANY-NEXT: addl $42, %eax diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll index a1d27d38fc5..648935dd08a 100644 --- a/test/CodeGen/X86/setcc.ll +++ b/test/CodeGen/X86/setcc.ll @@ -9,7 +9,7 @@ define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { ; CHECK-LABEL: t1: ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $26, %edi +; CHECK-NEXT: cmpw $26, %di ; CHECK-NEXT: seta %al ; CHECK-NEXT: shll $5, %eax ; CHECK-NEXT: retq @@ -22,7 +22,7 @@ define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp { ; CHECK-LABEL: t2: ; CHECK: ## %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $26, %edi +; CHECK-NEXT: cmpw $26, %di ; CHECK-NEXT: setb %al ; CHECK-NEXT: shll $5, %eax ; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/signed-truncation-check.ll b/test/CodeGen/X86/signed-truncation-check.ll index e8c0a8ca578..c455f3c4c8a 100644 --- a/test/CodeGen/X86/signed-truncation-check.ll +++ b/test/CodeGen/X86/signed-truncation-check.ll @@ -596,16 +596,14 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl $8, %eax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: cmpl $16, %eax +; X86-NEXT: cmpw $16, %ax ; X86-NEXT: setb %al ; X86-NEXT: retl ; ; X64-LABEL: add_ultcmp_bad_i16_i4: ; X64: # %bb.0: ; X64-NEXT: addl $8, %edi -; X64-NEXT: movzwl %di, %eax -; X64-NEXT: cmpl $16, %eax +; X64-NEXT: cmpw $16, %di ; X64-NEXT: setb %al ; X64-NEXT: retq %tmp0 = add i16 %x, 8 ; 1U << (4-1) diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll index 2c278e7ec63..28e396c8cf4 100644 --- a/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -516,8 +516,7 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt ; CHECK: testq %rdi, %rdi ; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]] ; -; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]] -; CHECK-NEXT: cmpl $66, [[BF_LOAD]] +; CHECK: cmpw $66, (%rdi) ; CHECK-NEXT: jne [[CLEANUP]] ; ; CHECK: movq 8(%rdi), %rdi -- 2.11.0