From 0e709703af5bbc9ea6e75e1f99c2dd0dae261869 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Aug 2017 16:04:50 +0200
Subject: [PATCH] mm, locking/barriers: Clarify tlb_flush_pending() barriers

Better document the ordering around tlb_flush_pending().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 78 ++++++++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 33 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index dc1edec05a3f..57378c7cb5f8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,30 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 extern void tlb_finish_mmu(struct mmu_gather *tlb,
 				unsigned long start, unsigned long end);
 
-/*
- * Memory barriers to keep this state in sync are graciously provided by
- * the page table locks, outside of which no page table modifications happen.
- * The barriers are used to ensure the order between tlb_flush_pending updates,
- * which happen while the lock is not taken, and the PTE updates, which happen
- * while the lock is taken, are serialized.
- */
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	/*
-	 * Must be called with PTL held; such that our PTL acquire will have
-	 * observed the store from set_tlb_flush_pending().
-	 */
-	return atomic_read(&mm->tlb_flush_pending) > 0;
-}
-
-/*
- * Returns true if there are two above TLB batching threads in parallel.
- */
-static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
-{
-	return atomic_read(&mm->tlb_flush_pending) > 1;
-}
-
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
@@ -558,7 +534,6 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm)
 static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_inc(&mm->tlb_flush_pending);
-
 	/*
 	 * The only time this value is relevant is when there are indeed pages
 	 * to flush. And we'll only flush pages after changing them, which
@@ -580,24 +555,61 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 	 *	flush_tlb_range();
 	 *	atomic_dec(&mm->tlb_flush_pending);
 	 *
-	 * So the =true store is constrained by the PTL unlock, and the =false
-	 * store is constrained by the TLB invalidate.
+	 * Where the increment if constrained by the PTL unlock, it thus
+	 * ensures that the increment is visible if the PTE modification is
+	 * visible. After all, if there is no PTE modification, nobody cares
+	 * about TLB flushes either.
+	 *
+	 * This very much relies on users (mm_tlb_flush_pending() and
+	 * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
+	 * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
+	 * locks (PPC) the unlock of one doesn't order against the lock of
+	 * another PTL.
+	 *
+	 * The decrement is ordered by the flush_tlb_range(), such that
+	 * mm_tlb_flush_pending() will not return false unless all flushes have
+	 * completed.
 	 */
 }
 
-/* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 	/*
-	 * Guarantee that the tlb_flush_pending does not not leak into the
-	 * critical section, since we must order the PTE change and changes to
-	 * the pending TLB flush indication. We could have relied on TLB flush
-	 * as a memory barrier, but this behavior is not clearly documented.
+	 * See inc_tlb_flush_pending().
+	 *
+	 * This cannot be smp_mb__before_atomic() because smp_mb() simply does
+	 * not order against TLB invalidate completion, which is what we need.
+	 *
+	 * Therefore we must rely on tlb_flush_*() to guarantee order.
 	 */
-	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+{
+	/*
+	 * Must be called after having acquired the PTL; orders against that
+	 * PTLs release and therefore ensures that if we observe the modified
+	 * PTE we must also observe the increment from inc_tlb_flush_pending().
+	 *
+	 * That is, it only guarantees to return true if there is a flush
+	 * pending for _this_ PTL.
+	 */
+	return atomic_read(&mm->tlb_flush_pending);
+}
+
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	/*
+	 * Similar to mm_tlb_flush_pending(), we must have acquired the PTL
+	 * for which there is a TLB flush pending in order to guarantee
+	 * we've seen both that PTE modification and the increment.
+	 *
+	 * (no requirement on actually still holding the PTL, that is irrelevant)
+	 */
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
 struct vm_fault;
 
 struct vm_special_mapping {
-- 
2.11.0