spin_unlock(&resv->lock);
trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg)
+ if (!trg) {
+ kfree(nrg);
return -ENOMEM;
+ }
spin_lock(&resv->lock);
list_add(&trg->link, &resv->region_cache);
retry:
spin_lock(&resv->lock);
list_for_each_entry_safe(rg, trg, head, link) {
- if (rg->to <= f)
+ /*
+ * Skip regions before the range to be deleted. file_region
+ * ranges are normally of the form [from, to). However, there
+ * may be a "placeholder" entry in the map which is of the form
+ * (from, to) with from == to. Check for placeholder entries
+ * at the beginning of the range to be deleted.
+ */
+ if (rg->to <= f && (rg->to != rg->from || rg->to != f))
continue;
+
if (rg->from >= t)
break;
{
spin_lock(&hugetlb_lock);
if (PageHuge(page) && !page_count(page)) {
- struct hstate *h = page_hstate(page);
- int nid = page_to_nid(page);
- list_del(&page->lru);
+ struct page *head = compound_head(page);
+ struct hstate *h = page_hstate(head);
+ int nid = page_to_nid(head);
+ list_del(&head->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
- update_and_free_page(h, page);
+ update_and_free_page(h, head);
}
spin_unlock(&hugetlb_lock);
}
/*
* Dissolve free hugepages in a given pfn range. Used by memory hotplug to
* make specified memory blocks removable from the system.
- * Note that start_pfn should aligned with (minimum) hugepage size.
+ * Note that this will dissolve a free gigantic hugepage completely, if any
+ * part of it lies within the given range.
*/
void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
{
if (!hugepages_supported())
return;
- VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
dissolve_free_huge_page(pfn_to_page(pfn));
}
}
/*
- * When releasing a hugetlb pool reservation, any surplus pages that were
- * allocated to satisfy the reservation must be explicitly freed if they were
- * never used.
- * Called with hugetlb_lock held.
+ * This routine has two main purposes:
+ * 1) Decrement the reservation count (resv_huge_pages) by the value passed
+ * in unused_resv_pages. This corresponds to the prior adjustments made
+ * to the associated reservation map.
+ * 2) Free any unused surplus pages that may have been allocated to satisfy
+ * the reservation. As many as unused_resv_pages may be freed.
+ *
+ * Called with hugetlb_lock held. However, the lock could be dropped (and
+ * reacquired) during calls to cond_resched_lock. Whenever dropping the lock,
+ * we must make sure nobody else can claim pages we are in the process of
+ * freeing. Do this by ensuring resv_huge_page always is greater than the
+ * number of huge pages we plan to free when dropping the lock.
*/
static void return_unused_surplus_pages(struct hstate *h,
unsigned long unused_resv_pages)
{
unsigned long nr_pages;
- /* Uncommit the reservation */
- h->resv_huge_pages -= unused_resv_pages;
-
/* Cannot return gigantic pages currently */
if (hstate_is_gigantic(h))
- return;
+ goto out;
+ /*
+ * Part (or even all) of the reservation could have been backed
+ * by pre-allocated pages. Only free surplus pages.
+ */
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
/*
* when the nodes with surplus pages have no free pages.
* free_pool_huge_page() will balance the the freed pages across the
* on-line nodes with memory and will handle the hstate accounting.
+ *
+ * Note that we decrement resv_huge_pages as we free the pages. If
+ * we drop the lock, resv_huge_pages will still be sufficiently large
+ * to cover subsequent pages we may free.
*/
while (nr_pages--) {
+ h->resv_huge_pages--;
+ unused_resv_pages--;
if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
- break;
+ goto out;
cond_resched_lock(&hugetlb_lock);
}
+
+out:
+ /* Fully uncommit the reservation */
+ h->resv_huge_pages -= unused_resv_pages;
}
*/
if (hstate_is_gigantic(h))
adjust_managed_page_count(page, 1 << h->order);
+ cond_resched();
}
}
* and reducing the surplus.
*/
spin_unlock(&hugetlb_lock);
+
+ /* yield cpu to avoid soft lockup */
+ cond_resched();
+
if (hstate_is_gigantic(h))
ret = alloc_fresh_gigantic_page(h, nodes_allowed);
else
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
{
- pte_t *src_pte, *dst_pte, entry;
+ pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage;
unsigned long addr;
int cow;
break;
}
- /* If the pagetables are shared don't copy or take references */
- if (dst_pte == src_pte)
+ /*
+ * If the pagetables are shared don't copy or take references.
+ * dst_pte == src_pte is the common case of src/dest sharing.
+ *
+ * However, src could have 'unshared' and dst shares with
+ * another vma. If dst_pte !none, this implies sharing.
+ * Check here before taking page table lock, and once again
+ * after taking the lock below.
+ */
+ dst_entry = huge_ptep_get(dst_pte);
+ if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
continue;
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
- if (huge_pte_none(entry)) { /* skip none entry */
+ dst_entry = huge_ptep_get(dst_pte);
+ if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+ /*
+ * Skip if src entry none. Also, skip in the
+ * unlikely case dst entry !none as this implies
+ * sharing with another vma.
+ */
;
} else if (unlikely(is_hugetlb_entry_migration(entry) ||
is_hugetlb_entry_hwpoisoned(entry))) {
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
+ } else {
+ ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+ if (!ptep)
+ return VM_FAULT_OOM;
}
- ptep = huge_pte_alloc(mm, address, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
-
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
if (saddr) {
spte = huge_pte_offset(svma->vm_mm, saddr);
if (spte) {
- mm_inc_nr_pmds(mm);
get_page(virt_to_page(spte));
break;
}
if (pud_none(*pud)) {
pud_populate(mm, pud,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
+ mm_inc_nr_pmds(mm);
} else {
put_page(virt_to_page(spte));
- mm_inc_nr_pmds(mm);
}
spin_unlock(ptl);
out:
{
struct page *page = NULL;
spinlock_t *ptl;
+ pte_t pte;
retry:
ptl = pmd_lockptr(mm, pmd);
spin_lock(ptl);
*/
if (!pmd_huge(*pmd))
goto out;
- if (pmd_present(*pmd)) {
+ pte = huge_ptep_get((pte_t *)pmd);
+ if (pte_present(pte)) {
page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
if (flags & FOLL_GET)
get_page(page);
} else {
- if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+ if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
__migration_entry_wait(mm, (pte_t *)pmd, ptl);
goto retry;