hugetlb: make free_huge_page irq safe

author Mike Kravetz <mike.kravetz@oracle.com>

Wed, 5 May 2021 01:35:07 +0000 (18:35 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 5 May 2021 18:27:22 +0000 (11:27 -0700)
author Mike Kravetz <mike.kravetz@oracle.com>
Wed, 5 May 2021 01:35:07 +0000 (18:35 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 5 May 2021 18:27:22 +0000 (11:27 -0700)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index d7db936..47f56e2 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -94,9 +94,10 @@ static inline bool subpool_is_free(struct hugepage_subpool *spool)
         return true;
  }
  
-static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
+                                               unsigned long irq_flags)
  {
-       spin_unlock(&spool->lock);
+       spin_unlock_irqrestore(&spool->lock, irq_flags);
  
         /* If no pages are used, and no other handles to the subpool
          * remain, give up any reservations based on minimum size and
@@ -135,10 +136,12 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
  
  void hugepage_put_subpool(struct hugepage_subpool *spool)
  {
-       spin_lock(&spool->lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&spool->lock, flags);
         BUG_ON(!spool->count);
         spool->count--;
-       unlock_or_release_subpool(spool);
+       unlock_or_release_subpool(spool, flags);
  }
  
  /*
@@ -157,7 +160,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
         if (!spool)
                 return ret;
  
-       spin_lock(&spool->lock);
+       spin_lock_irq(&spool->lock);
  
         if (spool->max_hpages != -1) {          /* maximum size accounting */
                 if ((spool->used_hpages + delta) <= spool->max_hpages)
@@ -184,7 +187,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
         }
  
  unlock_ret:
-       spin_unlock(&spool->lock);
+       spin_unlock_irq(&spool->lock);
         return ret;
  }
  
@@ -198,11 +201,12 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
                                        long delta)
  {
         long ret = delta;
+       unsigned long flags;
  
         if (!spool)
                 return delta;
  
-       spin_lock(&spool->lock);
+       spin_lock_irqsave(&spool->lock, flags);
  
         if (spool->max_hpages != -1)            /* maximum size accounting */
                 spool->used_hpages -= delta;
@@ -223,7 +227,7 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
          * If hugetlbfs_put_super couldn't free spool due to an outstanding
          * quota reference, free it now.
          */
-       unlock_or_release_subpool(spool);
+       unlock_or_release_subpool(spool, flags);
  
         return ret;
  }
@@ -1412,7 +1416,7 @@ struct hstate *size_to_hstate(unsigned long size)
         return NULL;
  }
  
-static void __free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
  {
         /*
          * Can't pass hstate in here because it is called from the
@@ -1422,6 +1426,7 @@ static void __free_huge_page(struct page *page)
         int nid = page_to_nid(page);
         struct hugepage_subpool *spool = hugetlb_page_subpool(page);
         bool restore_reserve;
+       unsigned long flags;
  
         VM_BUG_ON_PAGE(page_count(page), page);
         VM_BUG_ON_PAGE(page_mapcount(page), page);
@@ -1450,7 +1455,7 @@ static void __free_huge_page(struct page *page)
                         restore_reserve = true;
         }
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irqsave(&hugetlb_lock, flags);
         ClearHPageMigratable(page);
         hugetlb_cgroup_uncharge_page(hstate_index(h),
                                      pages_per_huge_page(h), page);
@@ -1461,66 +1466,18 @@ static void __free_huge_page(struct page *page)
  
         if (HPageTemporary(page)) {
                 remove_hugetlb_page(h, page, false);
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irqrestore(&hugetlb_lock, flags);
                 update_and_free_page(h, page);
         } else if (h->surplus_huge_pages_node[nid]) {
                 /* remove the page from active list */
                 remove_hugetlb_page(h, page, true);
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irqrestore(&hugetlb_lock, flags);
                 update_and_free_page(h, page);
         } else {
                 arch_clear_hugepage_flags(page);
                 enqueue_huge_page(h, page);
-               spin_unlock(&hugetlb_lock);
-       }
-}
-
-/*
- * As free_huge_page() can be called from a non-task context, we have
- * to defer the actual freeing in a workqueue to prevent potential
- * hugetlb_lock deadlock.
- *
- * free_hpage_workfn() locklessly retrieves the linked list of pages to
- * be freed and frees them one-by-one. As the page->mapping pointer is
- * going to be cleared in __free_huge_page() anyway, it is reused as the
- * llist_node structure of a lockless linked list of huge pages to be freed.
- */
-static LLIST_HEAD(hpage_freelist);
-
-static void free_hpage_workfn(struct work_struct *work)
-{
-       struct llist_node *node;
-       struct page *page;
-
-       node = llist_del_all(&hpage_freelist);
-
-       while (node) {
-               page = container_of((struct address_space **)node,
-                                    struct page, mapping);
-               node = node->next;
-               __free_huge_page(page);
-       }
-}
-static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
-
-void free_huge_page(struct page *page)
-{
-       /*
-        * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
-        */
-       if (!in_task()) {
-               /*
-                * Only call schedule_work() if hpage_freelist is previously
-                * empty. Otherwise, schedule_work() had been called but the
-                * workfn hasn't retrieved the list yet.
-                */
-               if (llist_add((struct llist_node *)&page->mapping,
-                             &hpage_freelist))
-                       schedule_work(&free_hpage_work);
-               return;
+               spin_unlock_irqrestore(&hugetlb_lock, flags);
         }
-
-       __free_huge_page(page);
  }
  
  static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -1530,11 +1487,11 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
         hugetlb_set_page_subpool(page, NULL);
         set_hugetlb_cgroup(page, NULL);
         set_hugetlb_cgroup_rsvd(page, NULL);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         h->nr_huge_pages++;
         h->nr_huge_pages_node[nid]++;
         ClearHPageFreed(page);
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  }
  
  static void prep_compound_gigantic_page(struct page *page, unsigned int order)
@@ -1780,7 +1737,7 @@ retry:
         if (!PageHuge(page))
                 return 0;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         if (!PageHuge(page)) {
                 rc = 0;
                 goto out;
@@ -1797,7 +1754,7 @@ retry:
                  * when it is dissolved.
                  */
                 if (unlikely(!HPageFreed(head))) {
-                       spin_unlock(&hugetlb_lock);
+                       spin_unlock_irq(&hugetlb_lock);
                         cond_resched();
  
                         /*
@@ -1821,12 +1778,12 @@ retry:
                 }
                 remove_hugetlb_page(h, page, false);
                 h->max_huge_pages--;
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
                 update_and_free_page(h, head);
                 return 0;
         }
  out:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         return rc;
  }
  
@@ -1868,16 +1825,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
         if (hstate_is_gigantic(h))
                 return NULL;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
                 goto out_unlock;
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
         if (!page)
                 return NULL;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         /*
          * We could have raced with the pool size change.
          * Double check that and simply deallocate the new page
@@ -1887,7 +1844,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
          */
         if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
                 SetHPageTemporary(page);
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
                 put_page(page);
                 return NULL;
         } else {
@@ -1896,7 +1853,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
         }
  
  out_unlock:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         return page;
  }
@@ -1946,17 +1903,17 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
  struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
                 nodemask_t *nmask, gfp_t gfp_mask)
  {
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         if (h->free_huge_pages - h->resv_huge_pages > 0) {
                 struct page *page;
  
                 page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
                 if (page) {
-                       spin_unlock(&hugetlb_lock);
+                       spin_unlock_irq(&hugetlb_lock);
                         return page;
                 }
         }
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
  }
@@ -2004,7 +1961,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
  
         ret = -ENOMEM;
  retry:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         for (i = 0; i < needed; i++) {
                 page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
                                 NUMA_NO_NODE, NULL);
@@ -2021,7 +1978,7 @@ retry:
          * After retaking hugetlb_lock, we need to recalculate 'needed'
          * because either resv_huge_pages or free_huge_pages may have changed.
          */
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         needed = (h->resv_huge_pages + delta) -
                         (h->free_huge_pages + allocated);
         if (needed > 0) {
@@ -2061,12 +2018,12 @@ retry:
                 enqueue_huge_page(h, page);
         }
  free:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         /* Free unnecessary surplus pages to the buddy allocator */
         list_for_each_entry_safe(page, tmp, &surplus_list, lru)
                 put_page(page);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
  
         return ret;
  }
@@ -2116,9 +2073,9 @@ static void return_unused_surplus_pages(struct hstate *h,
         }
  
  out:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         update_and_free_pages_bulk(h, &page_list);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
  }
  
  
@@ -2352,7 +2309,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
         if (ret)
                 goto out_uncharge_cgroup_reservation;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         /*
          * glb_chg is passed to indicate whether or not a page must be taken
          * from the global free pool (global change).  gbl_chg == 0 indicates
@@ -2360,7 +2317,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
          */
         page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
         if (!page) {
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
                 page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
                 if (!page)
                         goto out_uncharge_cgroup;
@@ -2368,7 +2325,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                         SetHPageRestoreReserve(page);
                         h->resv_huge_pages--;
                 }
-               spin_lock(&hugetlb_lock);
+               spin_lock_irq(&hugetlb_lock);
                 list_add(&page->lru, &h->hugepage_activelist);
                 /* Fall through */
         }
@@ -2381,7 +2338,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                                                   h_cg, page);
         }
  
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         hugetlb_set_page_subpool(page, spool);
  
@@ -2593,9 +2550,9 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
         }
  
  out:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         update_and_free_pages_bulk(h, &page_list);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
  }
  #else
  static inline void try_to_free_low(struct hstate *h, unsigned long count,
@@ -2660,7 +2617,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
          * pages in hstate via the proc/sysfs interfaces.
          */
         mutex_lock(&h->resize_lock);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
  
         /*
          * Check for a node specific request.
@@ -2691,7 +2648,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
          */
         if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
                 if (count > persistent_huge_pages(h)) {
-                       spin_unlock(&hugetlb_lock);
+                       spin_unlock_irq(&hugetlb_lock);
                         mutex_unlock(&h->resize_lock);
                         NODEMASK_FREE(node_alloc_noretry);
                         return -EINVAL;
@@ -2721,14 +2678,14 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                  * page, free_huge_page will handle it by freeing the page
                  * and reducing the surplus.
                  */
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
  
                 /* yield cpu to avoid soft lockup */
                 cond_resched();
  
                 ret = alloc_pool_huge_page(h, nodes_allowed,
                                                 node_alloc_noretry);
-               spin_lock(&hugetlb_lock);
+               spin_lock_irq(&hugetlb_lock);
                 if (!ret)
                         goto out;
  
@@ -2767,9 +2724,9 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                 list_add(&page->lru, &page_list);
         }
         /* free the pages after dropping lock */
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         update_and_free_pages_bulk(h, &page_list);
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
  
         while (count < persistent_huge_pages(h)) {
                 if (!adjust_pool_surplus(h, nodes_allowed, 1))
@@ -2777,7 +2734,7 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
         }
  out:
         h->max_huge_pages = persistent_huge_pages(h);
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         mutex_unlock(&h->resize_lock);
  
         NODEMASK_FREE(node_alloc_noretry);
@@ -2933,9 +2890,9 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
         if (err)
                 return err;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         h->nr_overcommit_huge_pages = input;
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
  
         return count;
  }
@@ -3522,9 +3479,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
                 goto out;
  
         if (write) {
-               spin_lock(&hugetlb_lock);
+               spin_lock_irq(&hugetlb_lock);
                 h->nr_overcommit_huge_pages = tmp;
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
         }
  out:
         return ret;
@@ -3620,7 +3577,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
         if (!delta)
                 return 0;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         /*
          * When cpuset is configured, it breaks the strict hugetlb page
          * reservation as the accounting is done on a global variable. Such
@@ -3659,7 +3616,7 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
                 return_unused_surplus_pages(h, (unsigned long) -delta);
  
  out:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         return ret;
  }
  
@@ -5687,7 +5644,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
  {
         bool ret = true;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         if (!PageHeadHuge(page) ||
             !HPageMigratable(page) ||
             !get_page_unless_zero(page)) {
@@ -5697,16 +5654,16 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
         ClearHPageMigratable(page);
         list_move_tail(&page->lru, list);
  unlock:
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         return ret;
  }
  
  void putback_active_hugepage(struct page *page)
  {
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         SetHPageMigratable(page);
         list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         put_page(page);
  }
  
@@ -5740,12 +5697,12 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
                  */
                 if (new_nid == old_nid)
                         return;
-               spin_lock(&hugetlb_lock);
+               spin_lock_irq(&hugetlb_lock);
                 if (h->surplus_huge_pages_node[old_nid]) {
                         h->surplus_huge_pages_node[old_nid]--;
                         h->surplus_huge_pages_node[new_nid]++;
                 }
-               spin_unlock(&hugetlb_lock);
+               spin_unlock_irq(&hugetlb_lock);
         }
  }
  
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c

index 726b85f..5383023 100644 (file)
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -204,11 +204,11 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
         do {
                 idx = 0;
                 for_each_hstate(h) {
-                       spin_lock(&hugetlb_lock);
+                       spin_lock_irq(&hugetlb_lock);
                         list_for_each_entry(page, &h->hugepage_activelist, lru)
                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
  
-                       spin_unlock(&hugetlb_lock);
+                       spin_unlock_irq(&hugetlb_lock);
                         idx++;
                 }
                 cond_resched();
@@ -784,7 +784,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
         if (hugetlb_cgroup_disabled())
                 return;
  
-       spin_lock(&hugetlb_lock);
+       spin_lock_irq(&hugetlb_lock);
         h_cg = hugetlb_cgroup_from_page(oldhpage);
         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
         set_hugetlb_cgroup(oldhpage, NULL);
@@ -794,7 +794,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
         set_hugetlb_cgroup(newhpage, h_cg);
         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
         list_move(&newhpage->lru, &h->hugepage_activelist);
-       spin_unlock(&hugetlb_lock);
+       spin_unlock_irq(&hugetlb_lock);
         return;
  }
author	Mike Kravetz <mike.kravetz@oracle.com>
	Wed, 5 May 2021 01:35:07 +0000 (18:35 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 5 May 2021 18:27:22 +0000 (11:27 -0700)
mm/hugetlb.c		patch \| blob \| history
mm/hugetlb_cgroup.c		patch \| blob \| history