Before revision 336728, the "mayLoad" flag for instruction (V)MOVLPSrm was
inferred directly from the "default" pattern associated with the instruction
definition.
r336728 removed special node X86Movlps, and all the patterns associated to it.
Now instruction (V)MOVLPSrm doesn't have a pattern associated to it, and the
'mayLoad/hasSideEffects' flags are left unset.
When the instruction info is emitted by tablegen, method
CodeGenDAGPatterns::InferInstructionFlags() sees that (V)MOVLPSrm doesn't have a
pattern, and flags are undefined. So, it conservatively sets the
"hasSideEffects" flag for it.
As a consequence, we were losing the 'mayLoad' flag, and we were gaining a
'hasSideEffect' flag in its place.
This patch fixes the issue (originally reported by Michael Holmen).
The mca tests show the differences in the instruction info flags. Instructions
that were affected by this problem were: MOVLPSrm/VMOVLPSrm/VMOVLPSZ128rm.
Differential Revision: https://reviews.llvm.org/D49182
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336818
91177308-0d34-0410-b5e6-
96231b3b80d8
multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
string base_opc, string asm_opr> {
+ let hasSideEffects = 0, mayLoad = 1 in
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
SSEPackedSingle>, PS,
Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
+ let hasSideEffects = 0, mayLoad = 1 in
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 1 1 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 1 1 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 3 3.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 1 6 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 1 6 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 1 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 2 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 7 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 7 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 4 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 1 4 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 1 4 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 7 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 7 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 7 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 7 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 1 1.00 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 6 1.00 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 2 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 6 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 2 1 1.00 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 2 6 1.00 U movlps (%rax), %xmm2
+# CHECK-NEXT: 2 6 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 2 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 0.50 * vmovlpd %xmm0, (%rax)
# CHECK-NEXT: 1 8 0.50 * vmovlpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 * vmovlps %xmm0, (%rax)
-# CHECK-NEXT: 1 8 0.50 U vmovlps (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 8 0.50 * vmovlps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmovmskpd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 vmovmskpd %ymm0, %ecx
# CHECK-NEXT: 1 1 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 0.50 * movhps %xmm0, (%rax)
# CHECK-NEXT: 1 8 0.50 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * movlps %xmm0, (%rax)
-# CHECK-NEXT: 1 8 0.50 U movlps (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 0.50 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.50 * * U movntq %mm0, (%rax)