From: Simon Pilgrim Date: Thu, 2 Aug 2018 10:53:53 +0000 (+0000) Subject: [X86][SSE] Add more UDIV nonuniform-constant vector tests X-Git-Tag: android-x86-9.0-r1~14780 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=7cb2547b5f8e00ab658aff95128057900c1a6f72;p=android-x86%2Fexternal-llvm.git [X86][SSE] Add more UDIV nonuniform-constant vector tests Ensure we cover all paths for vector data as requested on D49248 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338698 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/combine-udiv.ll b/test/CodeGen/X86/combine-udiv.ll index dabf25d8206..a8b1be02518 100644 --- a/test/CodeGen/X86/combine-udiv.ll +++ b/test/CodeGen/X86/combine-udiv.ll @@ -450,3 +450,241 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { %1 = udiv <8 x i16> %x, ret <8 x i16> %1 } + +define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) { +; SSE-LABEL: combine_vec_udiv_nonuniform2: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: imull $59919, %eax, %eax # imm = 0xEA0F +; SSE-NEXT: shrl $21, %eax +; SSE-NEXT: pextrw $0, %xmm0, %ecx +; SSE-NEXT: shrl %ecx +; SSE-NEXT: imull $16393, %ecx, %ecx # imm = 0x4009 +; SSE-NEXT: shrl $29, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: imull $58255, %eax, %eax # imm = 0xE38F +; SSE-NEXT: shrl $21, %eax +; SSE-NEXT: pinsrw $2, %eax, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: imull $32787, %eax, %eax # imm = 0x8013 +; SSE-NEXT: shrl $31, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: pextrw $4, %xmm0, %eax +; SSE-NEXT: imull $55189, %eax, %eax # imm = 0xD795 +; SSE-NEXT: shrl $21, %eax +; SSE-NEXT: pinsrw $4, %eax, %xmm1 +; SSE-NEXT: pextrw $5, %xmm0, %eax +; SSE-NEXT: imull $8197, %eax, %eax # imm = 0x2005 +; SSE-NEXT: shrl $29, %eax +; SSE-NEXT: pinsrw $5, %eax, %xmm1 +; SSE-NEXT: pextrw $6, %xmm0, %eax +; SSE-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD +; SSE-NEXT: shrl $21, %eax +; SSE-NEXT: pinsrw $6, %eax, %xmm1 +; SSE-NEXT: pextrw $7, %xmm0, %eax +; SSE-NEXT: imull $32789, %eax, %eax # imm = 0x8015 +; SSE-NEXT: shrl $31, %eax +; SSE-NEXT: pinsrw $7, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_vec_udiv_nonuniform2: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: imull $59919, %eax, %eax # imm = 0xEA0F +; AVX-NEXT: shrl $21, %eax +; AVX-NEXT: vpextrw $0, %xmm0, %ecx +; AVX-NEXT: shrl %ecx +; AVX-NEXT: imull $16393, %ecx, %ecx # imm = 0x4009 +; AVX-NEXT: shrl $29, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: imull $58255, %eax, %eax # imm = 0xE38F +; AVX-NEXT: shrl $21, %eax +; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: imull $32787, %eax, %eax # imm = 0x8013 +; AVX-NEXT: shrl $31, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $4, %xmm0, %eax +; AVX-NEXT: imull $55189, %eax, %eax # imm = 0xD795 +; AVX-NEXT: shrl $21, %eax +; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $5, %xmm0, %eax +; AVX-NEXT: imull $8197, %eax, %eax # imm = 0x2005 +; AVX-NEXT: shrl $29, %eax +; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $6, %xmm0, %eax +; AVX-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD +; AVX-NEXT: shrl $21, %eax +; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $7, %xmm0, %eax +; AVX-NEXT: imull $32789, %eax, %eax # imm = 0x8015 +; AVX-NEXT: shrl $31, %eax +; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = udiv <8 x i16> %x, + ret <8 x i16> %1 +} + +define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) { +; SSE-LABEL: combine_vec_udiv_nonuniform3: +; SSE: # %bb.0: +; SSE-NEXT: pextrw $1, %xmm0, %eax +; SSE-NEXT: imull $25645, %eax, %ecx # imm = 0x642D +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $4, %eax +; SSE-NEXT: movd %xmm0, %ecx +; SSE-NEXT: movzwl %cx, %edx +; SSE-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; SSE-NEXT: shrl $16, %edx +; SSE-NEXT: subl %edx, %ecx +; SSE-NEXT: movzwl %cx, %ecx +; SSE-NEXT: shrl %ecx +; SSE-NEXT: addl %edx, %ecx +; SSE-NEXT: shrl $2, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: pinsrw $1, %eax, %xmm1 +; SSE-NEXT: pextrw $2, %xmm0, %eax +; SSE-NEXT: imull $18351, %eax, %ecx # imm = 0x47AF +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $4, %eax +; SSE-NEXT: pinsrw $2, %eax, %xmm1 +; SSE-NEXT: pextrw $3, %xmm0, %eax +; SSE-NEXT: imull $12137, %eax, %ecx # imm = 0x2F69 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $4, %eax +; SSE-NEXT: pinsrw $3, %eax, %xmm1 +; SSE-NEXT: pextrw $4, %xmm0, %eax +; SSE-NEXT: imull $2115, %eax, %ecx # imm = 0x843 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $4, %eax +; SSE-NEXT: pinsrw $4, %eax, %xmm1 +; SSE-NEXT: pextrw $5, %xmm0, %eax +; SSE-NEXT: imull $23705, %eax, %ecx # imm = 0x5C99 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $5, %eax +; SSE-NEXT: pinsrw $5, %eax, %xmm1 +; SSE-NEXT: pextrw $6, %xmm0, %eax +; SSE-NEXT: imull $1041, %eax, %ecx # imm = 0x411 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $5, %eax +; SSE-NEXT: pinsrw $6, %eax, %xmm1 +; SSE-NEXT: pextrw $7, %xmm0, %eax +; SSE-NEXT: imull $517, %eax, %ecx # imm = 0x205 +; SSE-NEXT: shrl $16, %ecx +; SSE-NEXT: subl %ecx, %eax +; SSE-NEXT: movzwl %ax, %eax +; SSE-NEXT: shrl %eax +; SSE-NEXT: addl %ecx, %eax +; SSE-NEXT: shrl $6, %eax +; SSE-NEXT: pinsrw $7, %eax, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_vec_udiv_nonuniform3: +; AVX: # %bb.0: +; AVX-NEXT: vpextrw $1, %xmm0, %eax +; AVX-NEXT: imull $25645, %eax, %ecx # imm = 0x642D +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $4, %eax +; AVX-NEXT: vmovd %xmm0, %ecx +; AVX-NEXT: movzwl %cx, %edx +; AVX-NEXT: imull $9363, %edx, %edx # imm = 0x2493 +; AVX-NEXT: shrl $16, %edx +; AVX-NEXT: subl %edx, %ecx +; AVX-NEXT: movzwl %cx, %ecx +; AVX-NEXT: shrl %ecx +; AVX-NEXT: addl %edx, %ecx +; AVX-NEXT: shrl $2, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $2, %xmm0, %eax +; AVX-NEXT: imull $18351, %eax, %ecx # imm = 0x47AF +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $4, %eax +; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $3, %xmm0, %eax +; AVX-NEXT: imull $12137, %eax, %ecx # imm = 0x2F69 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $4, %eax +; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $4, %xmm0, %eax +; AVX-NEXT: imull $2115, %eax, %ecx # imm = 0x843 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $4, %eax +; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $5, %xmm0, %eax +; AVX-NEXT: imull $23705, %eax, %ecx # imm = 0x5C99 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $5, %eax +; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $6, %xmm0, %eax +; AVX-NEXT: imull $1041, %eax, %ecx # imm = 0x411 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $5, %eax +; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 +; AVX-NEXT: vpextrw $7, %xmm0, %eax +; AVX-NEXT: imull $517, %eax, %ecx # imm = 0x205 +; AVX-NEXT: shrl $16, %ecx +; AVX-NEXT: subl %ecx, %eax +; AVX-NEXT: movzwl %ax, %eax +; AVX-NEXT: shrl %eax +; AVX-NEXT: addl %ecx, %eax +; AVX-NEXT: shrl $6, %eax +; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 +; AVX-NEXT: retq + %1 = udiv <8 x i16> %x, + ret <8 x i16> %1 +}