def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
+def alignedloadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
+}]>;
+def memopf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr)), [{
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ return Subtarget->hasSSEUnalignedMem() ||
+ Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
+}]>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
// Extra selection patterns for FR128, f128, f128mem
// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
-def : Pat<(store (f128 FR128:$src), addr:$dst),
+def : Pat<(alignedstore (f128 FR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
+def : Pat<(store (f128 FR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
-def : Pat<(loadf128 addr:$src),
+def : Pat<(alignedloadf128 addr:$src),
(COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
+def : Pat<(loadf128 addr:$src),
+ (COPY_TO_REGCLASS (MOVUPSrm addr:$src), FR128)>;
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
-def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86fand FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
-def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86for FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
-def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
+def : Pat<(X86fxor FR128:$src1, (memopf128 addr:$src2)),
(COPY_TO_REGCLASS
(XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;
;
; SSE-F128-LABEL: extract_f128_0:
; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movaps %xmm0, (%rdi)
+; SSE-F128-NEXT: movups %xmm0, (%rdi)
; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 0
store fp128 %vecext, fp128* %dst, align 1
;
; SSE-F128-LABEL: extract_f128_1:
; SSE-F128: # %bb.0:
-; SSE-F128-NEXT: movaps %xmm1, (%rdi)
+; SSE-F128-NEXT: movups %xmm1, (%rdi)
; SSE-F128-NEXT: retq
%vecext = extractelement <2 x fp128> %foo, i32 1
store fp128 %vecext, fp128* %dst, align 1