ARM: dts: at91: sama5d3: define clock rate range for tcb1

[sagit-ice-cold/kernel_xiaomi_msm8998.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index ef6963b..fd932e7 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1221,12 +1221,23 @@ void free_huge_page(struct page *page)
         ClearPagePrivate(page);
  
         /*
-        * A return code of zero implies that the subpool will be under its
-        * minimum size if the reservation is not restored after page is free.
-        * Therefore, force restore_reserve operation.
+        * If PagePrivate() was set on page, page allocation consumed a
+        * reservation.  If the page was associated with a subpool, there
+        * would have been a page reserved in the subpool before allocation
+        * via hugepage_subpool_get_pages().  Since we are 'restoring' the
+        * reservtion, do not call hugepage_subpool_put_pages() as this will
+        * remove the reserved page from the subpool.
          */
-       if (hugepage_subpool_put_pages(spool, 1) == 0)
-               restore_reserve = true;
+       if (!restore_reserve) {
+               /*
+                * A return code of zero implies that the subpool will be
+                * under its minimum size if the reservation is not restored
+                * after page is free.  Therefore, force restore_reserve
+                * operation.
+                */
+               if (hugepage_subpool_put_pages(spool, 1) == 0)
+                       restore_reserve = true;
+       }
  
         spin_lock(&hugetlb_lock);
         clear_page_huge_active(page);
@@ -1416,12 +1427,13 @@ static void dissolve_free_huge_page(struct page *page)
  {
         spin_lock(&hugetlb_lock);
         if (PageHuge(page) && !page_count(page)) {
-               struct hstate *h = page_hstate(page);
-               int nid = page_to_nid(page);
-               list_del(&page->lru);
+               struct page *head = compound_head(page);
+               struct hstate *h = page_hstate(head);
+               int nid = page_to_nid(head);
+               list_del(&head->lru);
                 h->free_huge_pages--;
                 h->free_huge_pages_node[nid]--;
-               update_and_free_page(h, page);
+               update_and_free_page(h, head);
         }
         spin_unlock(&hugetlb_lock);
  }
@@ -1429,7 +1441,8 @@ static void dissolve_free_huge_page(struct page *page)
  /*
   * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
   * make specified memory blocks removable from the system.
- * Note that start_pfn should aligned with (minimum) hugepage size.
+ * Note that this will dissolve a free gigantic hugepage completely, if any
+ * part of it lies within the given range.
   */
  void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
  {
@@ -1438,7 +1451,6 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
         if (!hugepages_supported())
                 return;
  
-       VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
         for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
                 dissolve_free_huge_page(pfn_to_page(pfn));
  }
@@ -1722,23 +1734,32 @@ free:
  }
  
  /*
- * When releasing a hugetlb pool reservation, any surplus pages that were
- * allocated to satisfy the reservation must be explicitly freed if they were
- * never used.
- * Called with hugetlb_lock held.
+ * This routine has two main purposes:
+ * 1) Decrement the reservation count (resv_huge_pages) by the value passed
+ *    in unused_resv_pages.  This corresponds to the prior adjustments made
+ *    to the associated reservation map.
+ * 2) Free any unused surplus pages that may have been allocated to satisfy
+ *    the reservation.  As many as unused_resv_pages may be freed.
+ *
+ * Called with hugetlb_lock held.  However, the lock could be dropped (and
+ * reacquired) during calls to cond_resched_lock.  Whenever dropping the lock,
+ * we must make sure nobody else can claim pages we are in the process of
+ * freeing.  Do this by ensuring resv_huge_page always is greater than the
+ * number of huge pages we plan to free when dropping the lock.
   */
  static void return_unused_surplus_pages(struct hstate *h,
                                         unsigned long unused_resv_pages)
  {
         unsigned long nr_pages;
  
-       /* Uncommit the reservation */
-       h->resv_huge_pages -= unused_resv_pages;
-
         /* Cannot return gigantic pages currently */
         if (hstate_is_gigantic(h))
-               return;
+               goto out;
  
+       /*
+        * Part (or even all) of the reservation could have been backed
+        * by pre-allocated pages. Only free surplus pages.
+        */
         nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
  
         /*
@@ -1748,12 +1769,22 @@ static void return_unused_surplus_pages(struct hstate *h,
          * when the nodes with surplus pages have no free pages.
          * free_pool_huge_page() will balance the the freed pages across the
          * on-line nodes with memory and will handle the hstate accounting.
+        *
+        * Note that we decrement resv_huge_pages as we free the pages.  If
+        * we drop the lock, resv_huge_pages will still be sufficiently large
+        * to cover subsequent pages we may free.
          */
         while (nr_pages--) {
+               h->resv_huge_pages--;
+               unused_resv_pages--;
                 if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
-                       break;
+                       goto out;
                 cond_resched_lock(&hugetlb_lock);
         }
+
+out:
+       /* Fully uncommit the reservation */
+       h->resv_huge_pages -= unused_resv_pages;
  }
  
  
@@ -2018,6 +2049,7 @@ static void __init gather_bootmem_prealloc(void)
                  */
                 if (hstate_is_gigantic(h))
                         adjust_managed_page_count(page, 1 << h->order);
+               cond_resched();
         }
  }
  
@@ -2170,6 +2202,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                  * and reducing the surplus.
                  */
                 spin_unlock(&hugetlb_lock);
+
+               /* yield cpu to avoid soft lockup */
+               cond_resched();
+
                 if (hstate_is_gigantic(h))
                         ret = alloc_fresh_gigantic_page(h, nodes_allowed);
                 else
@@ -3078,7 +3114,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
  int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                             struct vm_area_struct *vma)
  {
-       pte_t *src_pte, *dst_pte, entry;
+       pte_t *src_pte, *dst_pte, entry, dst_entry;
         struct page *ptepage;
         unsigned long addr;
         int cow;
@@ -3106,15 +3142,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                         break;
                 }
  
-               /* If the pagetables are shared don't copy or take references */
-               if (dst_pte == src_pte)
+               /*
+                * If the pagetables are shared don't copy or take references.
+                * dst_pte == src_pte is the common case of src/dest sharing.
+                *
+                * However, src could have 'unshared' and dst shares with
+                * another vma.  If dst_pte !none, this implies sharing.
+                * Check here before taking page table lock, and once again
+                * after taking the lock below.
+                */
+               dst_entry = huge_ptep_get(dst_pte);
+               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
                         continue;
  
                 dst_ptl = huge_pte_lock(h, dst, dst_pte);
                 src_ptl = huge_pte_lockptr(h, src, src_pte);
                 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                 entry = huge_ptep_get(src_pte);
-               if (huge_pte_none(entry)) { /* skip none entry */
+               dst_entry = huge_ptep_get(dst_pte);
+               if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+                       /*
+                        * Skip if src entry none.  Also, skip in the
+                        * unlikely case dst entry !none as this implies
+                        * sharing with another vma.
+                        */
                         ;
                 } else if (unlikely(is_hugetlb_entry_migration(entry) ||
                                     is_hugetlb_entry_hwpoisoned(entry))) {
@@ -3432,7 +3483,6 @@ retry_avoidcopy:
         copy_user_huge_page(new_page, old_page, address, vma,
                             pages_per_huge_page(h));
         __SetPageUptodate(new_page);
-       set_page_huge_active(new_page);
  
         mmun_start = address & huge_page_mask(h);
         mmun_end = mmun_start + huge_page_size(h);
@@ -3454,6 +3504,7 @@ retry_avoidcopy:
                                 make_huge_pte(vma, new_page, 1));
                 page_remove_rmap(old_page);
                 hugepage_add_new_anon_rmap(new_page, vma, address);
+               set_page_huge_active(new_page);
                 /* Make the old page be freed below */
                 new_page = old_page;
         }
@@ -3512,6 +3563,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                 return err;
         ClearPagePrivate(page);
  
+       /*
+        * set page dirty so that it will not be removed from cache/file
+        * by non-hugetlbfs specific code paths.
+        */
+       set_page_dirty(page);
+
         spin_lock(&inode->i_lock);
         inode->i_blocks += blocks_per_huge_page(h);
         spin_unlock(&inode->i_lock);
@@ -3529,6 +3586,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
         struct page *page;
         pte_t new_pte;
         spinlock_t *ptl;
+       bool new_page = false;
  
         /*
          * Currently, we are forced to kill the process in the event the
@@ -3562,7 +3620,7 @@ retry:
                 }
                 clear_huge_page(page, address, pages_per_huge_page(h));
                 __SetPageUptodate(page);
-               set_page_huge_active(page);
+               new_page = true;
  
                 if (vma->vm_flags & VM_MAYSHARE) {
                         int err = huge_add_to_page_cache(page, mapping, idx);
@@ -3634,6 +3692,15 @@ retry:
         }
  
         spin_unlock(ptl);
+
+       /*
+        * Only make newly allocated pages active.  Existing pages found
+        * in the pagecache could be !page_huge_active() if they have been
+        * isolated for migration.
+        */
+       if (new_page)
+               set_page_huge_active(page);
+
         unlock_page(page);
  out:
         return ret;
@@ -3647,21 +3714,14 @@ backout_unlocked:
  }
  
  #ifdef CONFIG_SMP
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
-                           struct vm_area_struct *vma,
-                           struct address_space *mapping,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
                             pgoff_t idx, unsigned long address)
  {
         unsigned long key[2];
         u32 hash;
  
-       if (vma->vm_flags & VM_SHARED) {
-               key[0] = (unsigned long) mapping;
-               key[1] = idx;
-       } else {
-               key[0] = (unsigned long) mm;
-               key[1] = address >> huge_page_shift(h);
-       }
+       key[0] = (unsigned long) mapping;
+       key[1] = idx;
  
         hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
  
@@ -3672,9 +3732,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
   * For uniprocesor systems we always use a single mutex, so just
   * return 0 and avoid the hashing overhead.
   */
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
-                           struct vm_area_struct *vma,
-                           struct address_space *mapping,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
                             pgoff_t idx, unsigned long address)
  {
         return 0;
@@ -3720,7 +3778,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          * get spurious allocation failures if two CPUs race to instantiate
          * the same page in the page cache.
          */
-       hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
+       hash = hugetlb_fault_mutex_hash(h, mapping, idx, address);
         mutex_lock(&hugetlb_fault_mutex_table[hash]);
  
         entry = huge_ptep_get(ptep);
@@ -4007,6 +4065,14 @@ int hugetlb_reserve_pages(struct inode *inode,
         struct resv_map *resv_map;
         long gbl_reserve;
  
+       /* This should never happen */
+       if (from > to) {
+#ifdef CONFIG_DEBUG_VM
+               WARN(1, "%s called with a negative range\n", __func__);
+#endif
+               return -EINVAL;
+       }
+
         /*
          * Only apply hugepage reservation if asked. At fault time, an
          * attempt will be made for VM_NORESERVE to allocate a page
@@ -4096,7 +4162,9 @@ int hugetlb_reserve_pages(struct inode *inode,
         return 0;
  out_err:
         if (!vma || vma->vm_flags & VM_MAYSHARE)
-               region_abort(resv_map, from, to);
+               /* Don't call region_abort if region_chg failed */
+               if (chg >= 0)
+                       region_abort(resv_map, from, to);
         if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
                 kref_put(&resv_map->refs, resv_map_release);
         return ret;
@@ -4170,13 +4238,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
         /*
          * check on proper vm_flags and page table alignment
          */
-       if (vma->vm_flags & VM_MAYSHARE &&
-           vma->vm_start <= base && end <= vma->vm_end)
+       if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
                 return true;
         return false;
  }
  
  /*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+       unsigned long check_addr = *start;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+               unsigned long a_start = check_addr & PUD_MASK;
+               unsigned long a_end = a_start + PUD_SIZE;
+
+               /*
+                * If sharing is possible, adjust start/end if necessary.
+                */
+               if (range_in_vma(vma, a_start, a_end)) {
+                       if (a_start < *start)
+                               *start = a_start;
+                       if (a_end > *end)
+                               *end = a_end;
+               }
+       }
+}
+
+/*
   * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
   * and returns the corresponding pte. While this is not necessary for the
   * !shared pmd case because we can allocate the pmd later as well, it makes the
@@ -4209,7 +4305,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
                 if (saddr) {
                         spte = huge_pte_offset(svma->vm_mm, saddr);
                         if (spte) {
-                               mm_inc_nr_pmds(mm);
                                 get_page(virt_to_page(spte));
                                 break;
                         }
@@ -4224,9 +4319,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
         if (pud_none(*pud)) {
                 pud_populate(mm, pud,
                                 (pmd_t *)((unsigned long)spte & PAGE_MASK));
+               mm_inc_nr_pmds(mm);
         } else {
                 put_page(virt_to_page(spte));
-               mm_inc_nr_pmds(mm);
         }
         spin_unlock(ptl);
  out:
@@ -4273,6 +4368,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  {
         return 0;
  }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
  #define want_pmd_share()       (0)
  #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
  
@@ -4339,6 +4439,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  {
         struct page *page = NULL;
         spinlock_t *ptl;
+       pte_t pte;
  retry:
         ptl = pmd_lockptr(mm, pmd);
         spin_lock(ptl);
@@ -4348,12 +4449,13 @@ retry:
          */
         if (!pmd_huge(*pmd))
                 goto out;
-       if (pmd_present(*pmd)) {
+       pte = huge_ptep_get((pte_t *)pmd);
+       if (pte_present(pte)) {
                 page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
                 if (flags & FOLL_GET)
                         get_page(page);
         } else {
-               if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+               if (is_hugetlb_entry_migration(pte)) {
                         spin_unlock(ptl);
                         __migration_entry_wait(mm, (pte_t *)pmd, ptl);
                         goto retry;