Merge tag 'asm-generic-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd...

[uclinux-h8/linux.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index c125c49..6666bc2 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -735,9 +735,6 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
  
         set_pte_at(vma->vm_mm, address, ptep, pte);
  
-       if (vma->vm_flags & VM_LOCKED)
-               mlock_vma_page(page);
-
         /*
          * No need to invalidate - it was non-present before. However
          * secondary CPUs may have mappings that need invalidating.
@@ -1309,22 +1306,34 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
   * Parameter block passed down to zap_pte_range in exceptional cases.
   */
  struct zap_details {
-       struct address_space *zap_mapping;      /* Check page->mapping if set */
         struct folio *single_folio;     /* Locked folio to be unmapped */
+       bool even_cows;                 /* Zap COWed private pages too? */
  };
  
-/*
- * We set details->zap_mapping when we want to unmap shared but keep private
- * pages. Return true if skip zapping this page, false otherwise.
- */
-static inline bool
-zap_skip_check_mapping(struct zap_details *details, struct page *page)
+/* Whether we should zap all COWed (private) pages too */
+static inline bool should_zap_cows(struct zap_details *details)
  {
-       if (!details || !page)
-               return false;
+       /* By default, zap all pages */
+       if (!details)
+               return true;
+
+       /* Or, we zap COWed pages only if the caller wants to */
+       return details->even_cows;
+}
+
+/* Decides whether we should zap this page with the page pointer specified */
+static inline bool should_zap_page(struct zap_details *details, struct page *page)
+{
+       /* If we can make a decision without *page.. */
+       if (should_zap_cows(details))
+               return true;
+
+       /* E.g. the caller passes NULL for the case of a zero page */
+       if (!page)
+               return true;
  
-       return details->zap_mapping &&
-               (details->zap_mapping != page_rmapping(page));
+       /* Otherwise we should only zap non-anon pages */
+       return !PageAnon(page);
  }
  
  static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -1349,6 +1358,8 @@ again:
         arch_enter_lazy_mmu_mode();
         do {
                 pte_t ptent = *pte;
+               struct page *page;
+
                 if (pte_none(ptent))
                         continue;
  
@@ -1356,10 +1367,8 @@ again:
                         break;
  
                 if (pte_present(ptent)) {
-                       struct page *page;
-
                         page = vm_normal_page(vma, addr, ptent);
-                       if (unlikely(zap_skip_check_mapping(details, page)))
+                       if (unlikely(!should_zap_page(details, page)))
                                 continue;
                         ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                         tlb->fullmm);
@@ -1377,7 +1386,7 @@ again:
                                         mark_page_accessed(page);
                         }
                         rss[mm_counter(page)]--;
-                       page_remove_rmap(page, false);
+                       page_remove_rmap(page, vma, false);
                         if (unlikely(page_mapcount(page) < 0))
                                 print_bad_pte(vma, addr, ptent, page);
                         if (unlikely(__tlb_remove_page(tlb, page))) {
@@ -1391,34 +1400,32 @@ again:
                 entry = pte_to_swp_entry(ptent);
                 if (is_device_private_entry(entry) ||
                     is_device_exclusive_entry(entry)) {
-                       struct page *page = pfn_swap_entry_to_page(entry);
-
-                       if (unlikely(zap_skip_check_mapping(details, page)))
+                       page = pfn_swap_entry_to_page(entry);
+                       if (unlikely(!should_zap_page(details, page)))
                                 continue;
-                       pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
                         rss[mm_counter(page)]--;
-
                         if (is_device_private_entry(entry))
-                               page_remove_rmap(page, false);
-
+                               page_remove_rmap(page, vma, false);
                         put_page(page);
-                       continue;
-               }
-
-               /* If details->check_mapping, we leave swap entries. */
-               if (unlikely(details))
-                       continue;
-
-               if (!non_swap_entry(entry))
+               } else if (!non_swap_entry(entry)) {
+                       /* Genuine swap entry, hence a private anon page */
+                       if (!should_zap_cows(details))
+                               continue;
                         rss[MM_SWAPENTS]--;
-               else if (is_migration_entry(entry)) {
-                       struct page *page;
-
+                       if (unlikely(!free_swap_and_cache(entry)))
+                               print_bad_pte(vma, addr, ptent, NULL);
+               } else if (is_migration_entry(entry)) {
                         page = pfn_swap_entry_to_page(entry);
+                       if (!should_zap_page(details, page))
+                               continue;
                         rss[mm_counter(page)]--;
+               } else if (is_hwpoison_entry(entry)) {
+                       if (!should_zap_cows(details))
+                               continue;
+               } else {
+                       /* We should have covered all the swap entry types */
+                       WARN_ON_ONCE(1);
                 }
-               if (unlikely(!free_swap_and_cache(entry)))
-                       print_bad_pte(vma, addr, ptent, NULL);
                 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
         } while (pte++, addr += PAGE_SIZE, addr != end);
  
@@ -1705,7 +1712,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr
  void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                 unsigned long size)
  {
-       if (address < vma->vm_start || address + size > vma->vm_end ||
+       if (!range_in_vma(vma, address, address + size) ||
                         !(vma->vm_flags & VM_PFNMAP))
                 return;
  
@@ -1753,16 +1760,16 @@ static int validate_page_before_insert(struct page *page)
         return 0;
  }
  
-static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
+static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
                         unsigned long addr, struct page *page, pgprot_t prot)
  {
         if (!pte_none(*pte))
                 return -EBUSY;
         /* Ok, finally just insert the thing.. */
         get_page(page);
-       inc_mm_counter_fast(mm, mm_counter_file(page));
-       page_add_file_rmap(page, false);
-       set_pte_at(mm, addr, pte, mk_pte(page, prot));
+       inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
+       page_add_file_rmap(page, vma, false);
+       set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
         return 0;
  }
  
@@ -1776,7 +1783,6 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte,
  static int insert_page(struct vm_area_struct *vma, unsigned long addr,
                         struct page *page, pgprot_t prot)
  {
-       struct mm_struct *mm = vma->vm_mm;
         int retval;
         pte_t *pte;
         spinlock_t *ptl;
@@ -1785,17 +1791,17 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
         if (retval)
                 goto out;
         retval = -ENOMEM;
-       pte = get_locked_pte(mm, addr, &ptl);
+       pte = get_locked_pte(vma->vm_mm, addr, &ptl);
         if (!pte)
                 goto out;
-       retval = insert_page_into_pte_locked(mm, pte, addr, page, prot);
+       retval = insert_page_into_pte_locked(vma, pte, addr, page, prot);
         pte_unmap_unlock(pte, ptl);
  out:
         return retval;
  }
  
  #ifdef pte_index
-static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
+static int insert_page_in_batch_locked(struct vm_area_struct *vma, pte_t *pte,
                         unsigned long addr, struct page *page, pgprot_t prot)
  {
         int err;
@@ -1805,7 +1811,7 @@ static int insert_page_in_batch_locked(struct mm_struct *mm, pte_t *pte,
         err = validate_page_before_insert(page);
         if (err)
                 return err;
-       return insert_page_into_pte_locked(mm, pte, addr, page, prot);
+       return insert_page_into_pte_locked(vma, pte, addr, page, prot);
  }
  
  /* insert_pages() amortizes the cost of spinlock operations
@@ -1842,7 +1848,7 @@ more:
  
                 start_pte = pte_offset_map_lock(mm, pmd, addr, &pte_lock);
                 for (pte = start_pte; pte_idx < batch_size; ++pte, ++pte_idx) {
-                       int err = insert_page_in_batch_locked(mm, pte,
+                       int err = insert_page_in_batch_locked(vma, pte,
                                 addr, pages[curr_page_idx], prot);
                         if (unlikely(err)) {
                                 pte_unmap_unlock(start_pte, pte_lock);
@@ -3098,7 +3104,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                          * mapcount is visible. So transitively, TLBs to
                          * old page will be flushed before it can be reused.
                          */
-                       page_remove_rmap(old_page, false);
+                       page_remove_rmap(old_page, vma, false);
                 }
  
                 /* Free the old page.. */
@@ -3118,16 +3124,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
          */
         mmu_notifier_invalidate_range_only_end(&range);
         if (old_page) {
-               /*
-                * Don't let another task, with possibly unlocked vma,
-                * keep the mlocked page.
-                */
-               if (page_copied && (vma->vm_flags & VM_LOCKED)) {
-                       lock_page(old_page);    /* LRU manipulation */
-                       if (PageMlocked(old_page))
-                               munlock_vma_page(old_page);
-                       unlock_page(old_page);
-               }
                 if (page_copied)
                         free_swap_cache(old_page);
                 put_page(old_page);
@@ -3340,12 +3336,8 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
         vma_interval_tree_foreach(vma, root, first_index, last_index) {
                 vba = vma->vm_pgoff;
                 vea = vba + vma_pages(vma) - 1;
-               zba = first_index;
-               if (zba < vba)
-                       zba = vba;
-               zea = last_index;
-               if (zea > vea)
-                       zea = vea;
+               zba = max(first_index, vba);
+               zea = min(last_index, vea);
  
                 unmap_mapping_range_vma(vma,
                         ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
@@ -3377,7 +3369,7 @@ void unmap_mapping_folio(struct folio *folio)
         first_index = folio->index;
         last_index = folio->index + folio_nr_pages(folio) - 1;
  
-       details.zap_mapping = mapping;
+       details.even_cows = false;
         details.single_folio = folio;
  
         i_mmap_lock_write(mapping);
@@ -3406,7 +3398,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
         pgoff_t first_index = start;
         pgoff_t last_index = start + nr - 1;
  
-       details.zap_mapping = even_cows ? NULL : mapping;
+       details.even_cows = even_cows;
         if (last_index < first_index)
                 last_index = ULONG_MAX;
  
@@ -3871,11 +3863,16 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
                 return ret;
  
         if (unlikely(PageHWPoison(vmf->page))) {
-               if (ret & VM_FAULT_LOCKED)
+               vm_fault_t poisonret = VM_FAULT_HWPOISON;
+               if (ret & VM_FAULT_LOCKED) {
+                       /* Retry if a clean page was removed from the cache. */
+                       if (invalidate_inode_page(vmf->page))
+                               poisonret = 0;
                         unlock_page(vmf->page);
+               }
                 put_page(vmf->page);
                 vmf->page = NULL;
-               return VM_FAULT_HWPOISON;
+               return poisonret;
         }
  
         if (unlikely(!(ret & VM_FAULT_LOCKED)))
@@ -3947,7 +3944,8 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
                 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
  
         add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
-       page_add_file_rmap(page, true);
+       page_add_file_rmap(page, vma, true);
+
         /*
          * deposit and withdraw with pmd lock held
          */
@@ -3996,7 +3994,7 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
                 lru_cache_add_inactive_or_unevictable(page, vma);
         } else {
                 inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
-               page_add_file_rmap(page, false);
+               page_add_file_rmap(page, vma, false);
         }
         set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
  }
@@ -4622,6 +4620,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
         struct vm_fault vmf = {
                 .vma = vma,
                 .address = address & PAGE_MASK,
+               .real_address = address,
                 .flags = flags,
                 .pgoff = linear_page_index(vma, address),
                 .gfp_mask = __get_fault_gfp_mask(vma),
@@ -5256,14 +5255,6 @@ void print_vma_addr(char *prefix, unsigned long ip)
  #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
  void __might_fault(const char *file, int line)
  {
-       /*
-        * Some code (nfs/sunrpc) uses socket ops on kernel memory while
-        * holding the mmap_lock, this is safe because kernel memory doesn't
-        * get paged out, therefore we'll never actually fault, and the
-        * below annotations will generate false positives.
-        */
-       if (uaccess_kernel())
-               return;
         if (pagefault_disabled())
                 return;
         __might_sleep(file, line);
@@ -5444,6 +5435,8 @@ long copy_huge_page_from_user(struct page *dst_page,
                 if (rc)
                         break;
  
+               flush_dcache_page(subpage);
+
                 cond_resched();
         }
         return ret_val;