From 4b4553340dded4e8ce8f56690c08561a39a4aa56 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Wed, 15 Mar 2017 11:19:35 -0700 Subject: [PATCH] SIMD cmpeq for x86/x86_64 Rationale: Break-out CL of ART Vectorizer. Enables fast all-ones optimization. Bug: 34083438 Test: assembler_x86[_64]_test Change-Id: I70bd71305f2ecc322ccada5471c197a578c0526e --- compiler/utils/x86/assembler_x86.cc | 37 ++++++++++++++++++++++++++ compiler/utils/x86/assembler_x86.h | 5 ++++ compiler/utils/x86/assembler_x86_test.cc | 16 +++++++++++ compiler/utils/x86_64/assembler_x86_64.cc | 37 ++++++++++++++++++++++++++ compiler/utils/x86_64/assembler_x86_64.h | 5 ++++ compiler/utils/x86_64/assembler_x86_64_test.cc | 16 +++++++++++ 6 files changed, 116 insertions(+) diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 0a6ceefe6..5307dc09d 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1221,6 +1221,43 @@ void X86Assembler::por(XmmRegister dst, XmmRegister src) { } +void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 9d7ca77a5..f52cf16c8 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -495,6 +495,11 @@ class X86Assembler FINAL : public Assembler { void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 52c7507bc..23049079e 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -605,6 +605,22 @@ TEST_F(AssemblerX86Test, POr) { DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86Test, PCmpeqB) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); +} + +TEST_F(AssemblerX86Test, PCmpeqW) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw"); +} + +TEST_F(AssemblerX86Test, PCmpeqD) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd"); +} + +TEST_F(AssemblerX86Test, PCmpeqQ) { + DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq"); +} + TEST_F(AssemblerX86Test, ShufPS) { DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index d7fed5b7a..d20a6965c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1427,6 +1427,43 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x74); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x75); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x76); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x38); + EmitUint8(0x29); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 93c24b826..08e17e81e 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -523,6 +523,11 @@ class X86_64Assembler FINAL : public Assembler { void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); + void pcmpeqw(XmmRegister dst, XmmRegister src); + void pcmpeqd(XmmRegister dst, XmmRegister src); + void pcmpeqq(XmmRegister dst, XmmRegister src); + void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm); void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 9d62fd1f6..20062fdb0 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1293,6 +1293,22 @@ TEST_F(AssemblerX86_64Test, Por) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86_64Test, PCmpeqb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd"); +} + +TEST_F(AssemblerX86_64Test, PCmpeqq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq"); +} + TEST_F(AssemblerX86_64Test, Shufps) { DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps"); } -- 2.11.0