l2t_seq_next should increase position index

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / mm / rmap.c
diff --git a/mm/rmap.c b/mm/rmap.c

index b577fbb..cf733fa 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -408,7 +408,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
         list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
                 struct anon_vma *anon_vma = avc->anon_vma;
  
-               BUG_ON(anon_vma->degree);
+               VM_WARN_ON(anon_vma->degree);
                 put_anon_vma(anon_vma);
  
                 list_del(&avc->same_vma);
@@ -587,19 +587,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
  }
  
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void percpu_flush_tlb_batch_pages(void *data)
-{
-       /*
-        * All TLB entries are flushed on the assumption that it is
-        * cheaper to flush all TLBs and let them be refilled than
-        * flushing individual PFNs. Note that we do not track mm's
-        * to flush as that might simply be multiple full TLB flushes
-        * for no gain.
-        */
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-       flush_tlb_local();
-}
-
  /*
   * Flush TLB entries for recently unmapped pages from remote CPUs. It is
   * important if a PTE was dirty when it was unmapped that it's flushed
@@ -616,15 +603,14 @@ void try_to_unmap_flush(void)
  
         cpu = get_cpu();
  
-       trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL);
-
-       if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask))
-               percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask);
-
-       if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) {
-               smp_call_function_many(&tlb_ubc->cpumask,
-                       percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true);
+       if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask)) {
+               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+               local_flush_tlb();
+               trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
         }
+
+       if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids)
+               flush_tlb_others(&tlb_ubc->cpumask, NULL, 0, TLB_FLUSH_ALL);
         cpumask_clear(&tlb_ubc->cpumask);
         tlb_ubc->flush_required = false;
         tlb_ubc->writable = false;
@@ -649,6 +635,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
         tlb_ubc->flush_required = true;
  
         /*
+        * Ensure compiler does not re-order the setting of tlb_flush_batched
+        * before the PTE is cleared.
+        */
+       barrier();
+       mm->tlb_flush_batched = true;
+
+       /*
          * If the PTE was dirty then it's best to assume it's writable. The
          * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
          * before the page is queued for IO.
@@ -675,6 +668,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  
         return should_defer;
  }
+
+/*
+ * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
+ * releasing the PTL if TLB flushes are batched. It's possible for a parallel
+ * operation such as mprotect or munmap to race between reclaim unmapping
+ * the page and flushing the page. If this race occurs, it potentially allows
+ * access to data via a stale TLB entry. Tracking all mm's that have TLB
+ * batching in flight would be expensive during reclaim so instead track
+ * whether TLB batching occurred in the past and if so then do a flush here
+ * if required. This will cost one additional flush per reclaim cycle paid
+ * by the first operation at risk such as mprotect and mumap.
+ *
+ * This must be called under the PTL so that an access to tlb_flush_batched
+ * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
+ * via the PTL.
+ */
+void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+       if (mm->tlb_flush_batched) {
+               flush_tlb_mm(mm);
+
+               /*
+                * Do not allow the compiler to re-order the clearing of
+                * tlb_flush_batched before the tlb is flushed.
+                */
+               barrier();
+               mm->tlb_flush_batched = false;
+       }
+}
  #else
  static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
                 struct page *page, bool writable)
@@ -1302,12 +1324,41 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         pte_t pteval;
         spinlock_t *ptl;
         int ret = SWAP_AGAIN;
+       unsigned long sh_address;
+       bool pmd_sharing_possible = false;
+       unsigned long spmd_start, spmd_end;
         enum ttu_flags flags = (enum ttu_flags)arg;
  
         /* munlock has nothing to gain from examining un-locked vmas */
         if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
                 goto out;
  
+       /*
+        * Only use the range_start/end mmu notifiers if huge pmd sharing
+        * is possible.  In the normal case, mmu_notifier_invalidate_page
+        * is sufficient as we only unmap a page.  However, if we unshare
+        * a pmd, we will unmap a PUD_SIZE range.
+        */
+       if (PageHuge(page)) {
+               spmd_start = address;
+               spmd_end = spmd_start + vma_mmu_pagesize(vma);
+
+               /*
+                * Check if pmd sharing is possible.  If possible, we could
+                * unmap a PUD_SIZE range.  spmd_start/spmd_end will be
+                * modified if sharing is possible.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &spmd_start,
+                                                               &spmd_end);
+               if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) {
+                       sh_address = address;
+
+                       pmd_sharing_possible = true;
+                       mmu_notifier_invalidate_range_start(vma->vm_mm,
+                                                       spmd_start, spmd_end);
+               }
+       }
+
         pte = page_check_address(page, mm, address, &ptl, 0);
         if (!pte)
                 goto out;
@@ -1334,6 +1385,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 }
         }
  
+       /*
+        * Call huge_pmd_unshare to potentially unshare a huge pmd.  Pass
+        * sh_address as it will be modified if unsharing is successful.
+        */
+       if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) {
+               /*
+                * huge_pmd_unshare unmapped an entire PMD page.  There is
+                * no way of knowing exactly which PMDs may be cached for
+                * this mm, so flush them all.  spmd_start/spmd_end cover
+                * this PUD_SIZE range.
+                */
+               flush_cache_range(vma, spmd_start, spmd_end);
+               flush_tlb_range(vma, spmd_start, spmd_end);
+
+               /*
+                * The ref count of the PMD page was dropped which is part
+                * of the way map counting is done for shared PMDs.  When
+                * there is no other sharing, huge_pmd_unshare returns false
+                * and we will unmap the actual page and drop map count
+                * to zero.
+                */
+               goto out_unmap;
+       }
+
         /* Nuke the page table entry. */
         flush_cache_page(vma, address, page_to_pfn(page));
         if (should_defer_flush(mm, flags)) {
@@ -1428,6 +1503,9 @@ out_unmap:
         if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
                 mmu_notifier_invalidate_page(mm, address);
  out:
+       if (pmd_sharing_possible)
+               mmu_notifier_invalidate_range_end(vma->vm_mm,
+                                                       spmd_start, spmd_end);
         return ret;
  }