X86: Add patterns for the movbe instruction (mov + bswap, only available on atom)

author Benjamin Kramer <benny.kra@googlemail.com>

Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)

committer Benjamin Kramer <benny.kra@googlemail.com>

Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)
author Benjamin Kramer <benny.kra@googlemail.com>
Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)
committer Benjamin Kramer <benny.kra@googlemail.com>
Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index 8d9eaba..b98f5fb 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2,11 +2,6 @@
  // Random ideas for the X86 backend.
  //===---------------------------------------------------------------------===//
  
-We should add support for the "movbe" instruction, which does a byte-swapping
-copy (3-addr bswap + memory support?)  This is available on Atom processors.
-
-//===---------------------------------------------------------------------===//
-
  This should be one DIV/IDIV instruction, not a libcall:
  
  unsigned test(unsigned long long X, unsigned Y) {
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index b795119..4505daf 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -133,7 +133,7 @@ def : Proc<"core2",           [FeatureSSSE3, FeatureCMPXCHG16B,
                                 FeatureSlowBTMem]>;
  def : Proc<"penryn",          [FeatureSSE41, FeatureCMPXCHG16B,
                                 FeatureSlowBTMem]>;
-def : Proc<"atom",            [FeatureSSE3,  FeatureCMPXCHG16B,
+def : Proc<"atom",            [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
                                 FeatureSlowBTMem]>;
  // "Arrandale" along with corei3 and corei5
  def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index efae5fb..2640a90 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1308,17 +1308,23 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
  //
  let Predicates = [HasMOVBE] in {
    def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
-                    "movbe{w}\t{$src, $dst|$dst, $src}", []>, OpSize, T8;
+                    "movbe{w}\t{$src, $dst|$dst, $src}",
+                    [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8;
    def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-                    "movbe{l}\t{$src, $dst|$dst, $src}", []>, T8;
+                    "movbe{l}\t{$src, $dst|$dst, $src}",
+                    [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8;
    def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-                     "movbe{q}\t{$src, $dst|$dst, $src}", []>, T8;
+                     "movbe{q}\t{$src, $dst|$dst, $src}",
+                     [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8;
    def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
-                    "movbe{w}\t{$src, $dst|$dst, $src}", []>, OpSize, T8;
+                    "movbe{w}\t{$src, $dst|$dst, $src}",
+                    [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8;
    def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                    "movbe{l}\t{$src, $dst|$dst, $src}", []>, T8;
+                    "movbe{l}\t{$src, $dst|$dst, $src}",
+                    [(store (bswap GR32:$src), addr:$dst)]>, T8;
    def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                     "movbe{q}\t{$src, $dst|$dst, $src}", []>, T8;
+                     "movbe{q}\t{$src, $dst|$dst, $src}",
+                     [(store (bswap GR64:$src), addr:$dst)]>, T8;
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/movbe.ll b/test/CodeGen/X86/movbe.ll

new file mode 100644 (file)

index 0000000..0ed8469
--- /dev/null
+++ b/test/CodeGen/X86/movbe.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=x86-64 -mcpu=atom < %s | FileCheck %s
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+define void @test1(i32* nocapture %x, i32 %y) nounwind {
+  %bswap = call i32 @llvm.bswap.i32(i32 %y)
+  store i32 %bswap, i32* %x, align 4
+  ret void
+; CHECK: test1:
+; CHECK: movbel        %esi, (%rdi)
+}
+
+define i32 @test2(i32* %x) nounwind {
+  %load = load i32* %x, align 4
+  %bswap = call i32 @llvm.bswap.i32(i32 %load)
+  ret i32 %bswap
+; CHECK: test2:
+; CHECK: movbel        (%rdi), %eax
+}
+
+define void @test3(i64* %x, i64 %y) nounwind {
+  %bswap = call i64 @llvm.bswap.i64(i64 %y)
+  store i64 %bswap, i64* %x, align 8
+  ret void
+; CHECK: test3:
+; CHECK: movbeq        %rsi, (%rdi)
+}
+
+define i64 @test4(i64* %x) nounwind {
+  %load = load i64* %x, align 8
+  %bswap = call i64 @llvm.bswap.i64(i64 %load)
+  ret i64 %bswap
+; CHECK: test4:
+; CHECK: movbeq        (%rdi), %rax
+}
author	Benjamin Kramer <benny.kra@googlemail.com>
	Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Mon, 10 Oct 2011 18:34:56 +0000 (18:34 +0000)
lib/Target/X86/README.txt		patch \| blob \| history
lib/Target/X86/X86.td		patch \| blob \| history
lib/Target/X86/X86InstrInfo.td		patch \| blob \| history
test/CodeGen/X86/movbe.ll	[new file with mode: 0644]	patch \| blob