OSDN Git Service

shmem: fix quota lock nesting in huge hole handling
authorHugh Dickins <hughd@google.com>
Tue, 25 Jul 2023 14:45:10 +0000 (16:45 +0200)
committerChristian Brauner <brauner@kernel.org>
Wed, 9 Aug 2023 07:15:40 +0000 (09:15 +0200)
i_pages lock nests inside i_lock, but shmem_charge() and shmem_uncharge()
were being called from THP splitting or collapsing while i_pages lock was
held, and now go on to call dquot_alloc_block_nodirty() which takes
i_lock to update i_blocks.

We may well want to take i_lock out of this path later, in the non-quota
case even if it's left in the quota case (or perhaps use i_lock instead
of shmem's info->lock throughout); but don't get into that at this time.

Move the shmem_charge() and shmem_uncharge() calls out from under i_pages
lock, accounting the full batch of holes in a single call.

Still pass the pages argument to shmem_uncharge(), but it happens now to
be unused: shmem_recalc_inode() is designed to account for clean pages
freed behind shmem's back, so it gets the accounting right by itself;
then the later call to shmem_inode_unacct_blocks() led to imbalance
(that WARN_ON(inode->i_blocks) in shmem_evict_inode()).

Reported-by: syzbot+38ca19393fb3344f57e6@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/0000000000008e62f40600bfe080@google.com/
Reported-by: syzbot+440ff8cca06ee7a1d4db@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/00000000000076a7840600bfb6e8@google.com/
Signed-off-by: Hugh Dickins <hughd@google.com>
Tested-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Message-Id: <20230725144510.253763-8-cem@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
mm/huge_memory.c
mm/khugepaged.c
mm/shmem.c

index eb36783..d301c32 100644 (file)
@@ -2521,7 +2521,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct address_space *swap_cache = NULL;
        unsigned long offset = 0;
        unsigned int nr = thp_nr_pages(head);
-       int i;
+       int i, nr_dropped = 0;
 
        /* complete memcg works before add pages to LRU */
        split_page_memcg(head, nr);
@@ -2546,7 +2546,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        struct folio *tail = page_folio(head + i);
 
                        if (shmem_mapping(head->mapping))
-                               shmem_uncharge(head->mapping->host, 1);
+                               nr_dropped++;
                        else if (folio_test_clear_dirty(tail))
                                folio_account_cleaned(tail,
                                        inode_to_wb(folio->mapping->host));
@@ -2583,6 +2583,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        }
        local_irq_enable();
 
+       if (nr_dropped)
+               shmem_uncharge(head->mapping->host, nr_dropped);
        remap_page(folio, nr);
 
        if (PageSwapCache(head)) {
index 78c8d5d..47d1d32 100644 (file)
@@ -1955,10 +1955,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
                                                goto xa_locked;
                                        }
                                }
-                               if (!shmem_charge(mapping->host, 1)) {
-                                       result = SCAN_FAIL;
-                                       goto xa_locked;
-                               }
                                nr_none++;
                                continue;
                        }
@@ -2145,8 +2141,13 @@ xa_unlocked:
         */
        try_to_unmap_flush();
 
-       if (result != SCAN_SUCCEED)
+       if (result == SCAN_SUCCEED && nr_none &&
+           !shmem_charge(mapping->host, nr_none))
+               result = SCAN_FAIL;
+       if (result != SCAN_SUCCEED) {
+               nr_none = 0;
                goto rollback;
+       }
 
        /*
         * The old pages are locked, so they won't change anymore.
@@ -2283,8 +2284,8 @@ rollback:
        if (nr_none) {
                xas_lock_irq(&xas);
                mapping->nrpages -= nr_none;
-               shmem_uncharge(mapping->host, nr_none);
                xas_unlock_irq(&xas);
+               shmem_uncharge(mapping->host, nr_none);
        }
 
        list_for_each_entry_safe(page, tmp, &pagelist, lru) {
index 5f2c9e2..72326ea 100644 (file)
@@ -424,18 +424,20 @@ static void shmem_recalc_inode(struct inode *inode)
 bool shmem_charge(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
-       unsigned long flags;
+       struct address_space *mapping = inode->i_mapping;
 
        if (shmem_inode_acct_block(inode, pages))
                return false;
 
        /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
-       inode->i_mapping->nrpages += pages;
+       xa_lock_irq(&mapping->i_pages);
+       mapping->nrpages += pages;
+       xa_unlock_irq(&mapping->i_pages);
 
-       spin_lock_irqsave(&info->lock, flags);
+       spin_lock_irq(&info->lock);
        info->alloced += pages;
        shmem_recalc_inode(inode);
-       spin_unlock_irqrestore(&info->lock, flags);
+       spin_unlock_irq(&info->lock);
 
        return true;
 }
@@ -443,16 +445,13 @@ bool shmem_charge(struct inode *inode, long pages)
 void shmem_uncharge(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
-       unsigned long flags;
 
        /* nrpages adjustment done by __filemap_remove_folio() or caller */
 
-       spin_lock_irqsave(&info->lock, flags);
-       info->alloced -= pages;
+       spin_lock_irq(&info->lock);
        shmem_recalc_inode(inode);
-       spin_unlock_irqrestore(&info->lock, flags);
-
-       shmem_inode_unacct_blocks(inode, pages);
+       /* which has called shmem_inode_unacct_blocks() if necessary */
+       spin_unlock_irq(&info->lock);
 }
 
 /*