def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
}
-multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, bit Commutable = 0,
+ string Ver = ""> {
+ let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
(bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
-defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
-defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
-; FIXME - missed commutation opportunities.
-
define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfadd:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfadd (%eax), %mm0
-; X32-NEXT: pfadd %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfadd (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfadd:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfadd (%rsi), %mm0
-; X64-NEXT: pfadd %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfadd (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
}
declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
+; FIXME - missed PFSUB commutation.
define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfsub:
; X32: # BB#0:
}
declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
+; FIXME - missed PFSUBR commutation.
define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
; X32-LABEL: commute_m_pfsubr:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfmul (%eax), %mm0
-; X32-NEXT: pfmul %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfmul (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfmul:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfmul (%rsi), %mm0
-; X64-NEXT: pfmul %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfmul (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pfcmpeq (%eax), %mm0
-; X32-NEXT: pfcmpeq %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pfcmpeq (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pfcmpeq:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pfcmpeq (%rsi), %mm0
-; X64-NEXT: pfcmpeq %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pfcmpeq (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pavgusb (%eax), %mm0
-; X32-NEXT: pavgusb %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pavgusb (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pavgusb:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pavgusb (%rsi), %mm0
-; X64-NEXT: pavgusb %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pavgusb (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movq (%edx), %mm0
-; X32-NEXT: movq (%ecx), %mm1
; X32-NEXT: pmulhrw (%eax), %mm0
-; X32-NEXT: pmulhrw %mm0, %mm1
-; X32-NEXT: movq %mm1, (%ecx)
+; X32-NEXT: pmulhrw (%ecx), %mm0
+; X32-NEXT: movq %mm0, (%ecx)
; X32-NEXT: retl
;
; X64-LABEL: commute_m_pmulhrw:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %mm0
-; X64-NEXT: movq (%rdx), %mm1
; X64-NEXT: pmulhrw (%rsi), %mm0
-; X64-NEXT: pmulhrw %mm0, %mm1
-; X64-NEXT: movq %mm1, (%rdx)
+; X64-NEXT: pmulhrw (%rdx), %mm0
+; X64-NEXT: movq %mm0, (%rdx)
; X64-NEXT: retq
%1 = load x86_mmx, x86_mmx* %a0
%2 = load x86_mmx, x86_mmx* %a1