From: Craig Topper Date: Mon, 5 Feb 2018 18:31:04 +0000 (+0000) Subject: [X86] Artificially lower the complexity of the scalar ANDN patterns so that AND with... X-Git-Tag: android-x86-7.1-r4~5440 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=73cb5b8a646d3e3a964745c357a85195c13b0615;p=android-x86%2Fexternal-llvm.git [X86] Artificially lower the complexity of the scalar ANDN patterns so that AND with immediate will match first. This allows the immediate to folded into the and instead of being forced to move into a register. This can sometimes result in shorter encodings since the and can sign extend an immediate. This also allows us to match an and to a movzx after a not. This can cause an extra move if the input to the separate NOT has an additional user which requires a copy before the NOT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324260 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d35b4338c72..e6219aa6fcb 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1285,12 +1285,13 @@ multiclass bmi_andn; } -let Predicates = [HasBMI], Defs = [EFLAGS] in { +// Complexity is reduced to give and with immediate a chance to match first. +let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in { defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8PS, VEX_4V; defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8PS, VEX_4V, VEX_W; } -let Predicates = [HasBMI] in { +let Predicates = [HasBMI], AddedComplexity = -6 in { def : Pat<(and (not GR32:$src1), GR32:$src2), (ANDN32rr GR32:$src1, GR32:$src2)>; def : Pat<(and (not GR64:$src1), GR64:$src2), diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 7c973af6a6d..886e4f2fbcb 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -6687,9 +6687,8 @@ define i32 @mask16_zext(i16 %x) { ; ; SKX-LABEL: mask16_zext: ; SKX: # %bb.0: -; SKX-NEXT: movl $65535, %eax # imm = 0xFFFF -; SKX-NEXT: # sched: [1:0.25] -; SKX-NEXT: andnl %eax, %edi, %eax # sched: [1:0.50] +; SKX-NEXT: notl %edi # sched: [1:0.25] +; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll index 4da10f6cffa..0e79a5a65c1 100644 --- a/test/CodeGen/X86/bmi.ll +++ b/test/CodeGen/X86/bmi.ll @@ -188,8 +188,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) { define i1 @and_cmp_const(i32 %x) { ; CHECK-LABEL: and_cmp_const: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $43, %eax -; CHECK-NEXT: andnl %eax, %edi, %eax +; CHECK-NEXT: notl %edi +; CHECK-NEXT: andl $43, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %and = and i32 %x, 43 diff --git a/test/CodeGen/X86/pr32282.ll b/test/CodeGen/X86/pr32282.ll index 6da2ae0b564..7398974b354 100644 --- a/test/CodeGen/X86/pr32282.ll +++ b/test/CodeGen/X86/pr32282.ll @@ -12,24 +12,28 @@ define void @foo() { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: pushl %eax +; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: movl d, %eax -; X86-NEXT: movl d+4, %ecx -; X86-NEXT: movl $701685459, %edx # imm = 0x29D2DED3 -; X86-NEXT: andnl %edx, %ecx, %ecx -; X86-NEXT: movl $-564453154, %edx # imm = 0xDE5B20DE -; X86-NEXT: andnl %edx, %eax, %edx -; X86-NEXT: shrdl $21, %ecx, %edx -; X86-NEXT: shrl $21, %ecx +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl d, %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: movl d+4, %edx +; X86-NEXT: notl %edx +; X86-NEXT: andl $701685459, %edx # imm = 0x29D2DED3 +; X86-NEXT: andl $-564453154, %ecx # imm = 0xDE5B20DE +; X86-NEXT: shrdl $21, %edx, %ecx +; X86-NEXT: shrl $21, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: testb %al, %al -; X86-NEXT: cmovnel %ecx, %edx -; X86-NEXT: cmovnel %eax, %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: cmovnel %eax, %esi +; X86-NEXT: cmovel %ecx, %edx ; X86-NEXT: andl $-2, %edx ; X86-NEXT: addl $7, %edx -; X86-NEXT: adcxl %eax, %ecx -; X86-NEXT: pushl %ecx +; X86-NEXT: adcxl %eax, %esi +; X86-NEXT: pushl %esi ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl %edx ; X86-NEXT: .cfi_adjust_cfa_offset 4 @@ -37,12 +41,13 @@ define void @foo() { ; X86-NEXT: .cfi_adjust_cfa_offset 4 ; X86-NEXT: pushl $0 ; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: calll __divdi3 +; X86-NEXT: calll __divdi3@PLT ; X86-NEXT: addl $16, %esp ; X86-NEXT: .cfi_adjust_cfa_offset -16 ; X86-NEXT: orl %eax, %edx ; X86-NEXT: setne {{[0-9]+}}(%esp) -; X86-NEXT: popl %eax +; X86-NEXT: addl $4, %esp +; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: foo: